提交 e4932b70 编写于 作者: L LielinJiang

Merge branch 'master' of https://github.com/PaddlePaddle/PaddleGAN into readme

import os
import datetime
import argparse
import numpy
import networks
modelnames = networks.__all__
# import datasets
datasetNames = ('Vimeo_90K_interp') #datasets.__all__
parser = argparse.ArgumentParser(description='DAIN')
parser.add_argument('--debug', action='store_true', help='Enable debug mode')
parser.add_argument('--netName',
type=str,
default='DAIN',
choices=modelnames,
help='model architecture: ' + ' | '.join(modelnames) +
' (default: DAIN)')
parser.add_argument('--datasetName',
default='Vimeo_90K_interp',
choices=datasetNames,
nargs='+',
help='dataset type : ' + ' | '.join(datasetNames) +
' (default: Vimeo_90K_interp)')
parser.add_argument('--video_path',
default='',
help='the path of selected videos')
parser.add_argument('--output_path', default='', help='the output root path')
parser.add_argument('--seed',
type=int,
default=1,
help='random seed (default: 1)')
parser.add_argument('--batch_size',
'-b',
type=int,
default=1,
help='batch size (default:1)')
parser.add_argument('--channels',
'-c',
type=int,
default=3,
choices=[1, 3],
help='channels of images (default:3)')
parser.add_argument('--filter_size',
'-f',
type=int,
default=4,
help='the size of filters used (default: 4)',
choices=[2, 4, 6, 5, 51])
parser.add_argument('--time_step',
type=float,
default=0.5,
help='choose the time steps')
parser.add_argument(
'--alpha',
type=float,
nargs='+',
default=[0.0, 1.0],
help=
'the ration of loss for interpolated and rectified result (default: [0.0, 1.0])'
)
parser.add_argument('--frame_rate',
type=int,
default=None,
help='frame rate of the input video')
parser.add_argument('--patience',
type=int,
default=5,
help='the patience of reduce on plateou')
parser.add_argument('--factor',
type=float,
default=0.2,
help='the factor of reduce on plateou')
parser.add_argument('--saved_model',
type=str,
default='',
help='path to the model weights')
parser.add_argument('--no-date',
action='store_true',
help='don\'t append date timestamp to folder')
parser.add_argument('--use_cuda',
default=True,
type=bool,
help='use cuda or not')
parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not')
parser.add_argument('--remove_duplicates',
default=True,
type=bool,
help='remove duplicate frames or not')
from .dain import DAIN
from .dain_slowmotion import DAIN_slowmotion
__all__ = ('DAIN', 'DAIN_slowmotion')
import paddle.fluid as fluid
import resblock
import pwcnet
class DAIN(fluid.dygraph.Layer):
def __init__(self, channel=3, filter_size=4, timestep=0.5, training=True):
# base class initialization
super(DAIN, self).__init__()
self.filter_size = filter_size
self.training = training
self.timestep = timestep
assert (timestep == 0.5)
self.numFrames = int(1.0 / timestep) - 1
ctx_ch = 3 * 64 + 3
inplanes = 3 + 3 + 3 + 2 * 1 + 2 * 2 + 16 * 2 + 2 * ctx_ch
self.rectifyNet = resblock.__dict__['MultipleBasicBlock_4'](inplanes,
64)
self.flownets = pwcnet.__dict__['pwc_dc_net']()
self.div_flow = 20.0
def forward(self, input):
"""
Parameters
----------
input: shape (3, batch, 3, width, height)
-----------
"""
losses = []
offsets = []
'''
STEP 1: sequeeze the input
'''
if self.training == True:
assert input.shape[0] == 3
input_0 = input[0]
input_1 = input[1]
input_2 = input[2]
else:
# print(input.shape[0])
assert input.shape[0] == 2
input_0 = input[0]
input_2 = input[1]
#prepare the input data of current scale
cur_input_0 = input_0
if self.training == True:
cur_input_1 = input_1
cur_input_2 = input_2
'''
STEP 3.2: concatenating the inputs.
'''
cur_offset_input = fluid.layers.concat([cur_input_0, cur_input_2],
axis=1)
'''
STEP 3.3: perform the estimation
'''
time_offsets = [
kk * self.timestep for kk in range(1, 1 + self.numFrames, 1)
]
cur_offset_outputs = [
self.forward_flownets(self.flownets,
cur_offset_input,
time_offsets=time_offsets),
self.forward_flownets(self.flownets,
fluid.layers.concat(
[cur_input_2, cur_input_0], axis=1),
time_offsets=time_offsets[::-1])
]
cur_offset_output = [cur_offset_outputs[0][0], cur_offset_outputs[1][0]]
# Warp image use warp-op in PWC-Net
ref0 = self.flownets.warp_nomask(cur_input_0, cur_offset_output[0])
ref2 = self.flownets.warp_nomask(cur_input_2, cur_offset_output[1])
cur_output = (ref0 + ref2) / 2.0
rectify_input = fluid.layers.concat([
cur_output, ref0, ref2, cur_offset_output[0], cur_offset_output[1]
],
axis=1)
cur_output_rectified = self.rectifyNet(rectify_input) + cur_output
'''
STEP 3.5: for training phase, we collect the variables to be penalized.
'''
if self.training == True:
losses += [cur_output - cur_input_1]
losses += [cur_output_rectified - cur_input_1]
offsets += [cur_offset_output]
'''
STEP 4: return the results
'''
if self.training == True:
# if in the training phase, we output the losses to be minimized.
# return losses, loss_occlusion
return losses, offsets
else:
cur_outputs = [cur_output, cur_output_rectified]
return cur_outputs, cur_offset_output
def forward_flownets(self, model, input, time_offsets=None):
if time_offsets == None:
time_offsets = [0.5]
elif type(time_offsets) == float:
time_offsets = [time_offsets]
elif type(time_offsets) == list:
pass
# this is a single direction motion results, but not a bidirectional one
temp = model(input)
# single direction to bidirection should haven it.
temps = [
self.div_flow * temp * time_offset for time_offset in time_offsets
]
# nearest interpolation won't be better i think
temps = [fluid.layers.resize_bilinear(temp, scale=4) for temp in temps]
return temps
import paddle.fluid as fluid
import resblock
import time
import pwcnet
class DAIN_slowmotion(fluid.dygraph.Layer):
def __init__(self, channel=3, filter_size=4, timestep=0.5, training=True):
# base class initialization
super(DAIN_slowmotion, self).__init__()
self.filter_size = filter_size
self.training = training
self.timestep = timestep
self.num_frames = int(1.0 / timestep) - 1
ctx_ch = 3 * 64 + 3
# inplanes = 3 + 3 + 3 + 2*1 + 2*2 + 2
inplanes = 13
self.flownets = pwcnet.__dict__['pwc_dc_net']()
self.rectifyNet = resblock.__dict__['MultipleBasicBlock_4'](inplanes,
64)
self.div_flow = 20.0
def forward(self, input):
"""
Parameters
----------
input: shape (3, batch, 3, width, height)
-----------
"""
losses = []
offsets = []
'''
STEP 1: sequeeze the input
'''
if self.training == True:
assert input.shape[0] == 3
input_0 = input[0]
input_1 = input[1]
input_2 = input[2]
else:
assert input.shape[0] == 2
input_0 = input[0]
input_2 = input[1]
#prepare the input data of current scale
cur_input_0 = input_0
if self.training == True:
cur_input_1 = input_1
cur_input_2 = input_2
'''
STEP 3.2: concatenating the inputs.
'''
cur_offset_input = fluid.layers.concat([cur_input_0, cur_input_2],
axis=1)
'''
STEP 3.3: perform the estimation
'''
time_offsets = [
kk * self.timestep for kk in range(1, 1 + self.num_frames, 1)
]
cur_offset_outputs = [
self.forward_flownets(self.flownets,
cur_offset_input,
time_offsets=time_offsets),
self.forward_flownets(self.flownets,
fluid.layers.concat(
[cur_input_2, cur_input_0], axis=1),
time_offsets=time_offsets[::-1])
]
'''
STEP 3.4: perform the frame interpolation process
'''
count = 0
for temp_0, temp_1, timeoffset in zip(cur_offset_outputs[0],
cur_offset_outputs[1],
time_offsets):
cur_offset_output = [temp_0, temp_1]
ref0 = self.flownets.warp_nomask(cur_input_0, cur_offset_output[0])
ref2 = self.flownets.warp_nomask(cur_input_2, cur_offset_output[1])
cur_output_temp = (ref0 + ref2) / 2.0
if count == 0:
cur_output = fluid.layers.unsqueeze(cur_output_temp, axes=0)
else:
cur_output_ = fluid.layers.unsqueeze(cur_output_temp, axes=0)
cur_output = fluid.layers.concat([cur_output, cur_output_],
axis=0)
rectify_input = fluid.layers.concat([
cur_output_temp, ref0, ref2, cur_offset_output[0],
cur_offset_output[1]
],
axis=1)
cur_output_rectified_temp = self.rectifyNet(
rectify_input) + cur_output_temp
if count == 0:
cur_output_rectified = fluid.layers.unsqueeze(
cur_output_rectified_temp, axes=0)
else:
cur_output_rectified_ = fluid.layers.unsqueeze(
cur_output_rectified_temp, axes=0)
cur_output_rectified = fluid.layers.concat(
[cur_output_rectified, cur_output_rectified_], axis=0)
count += 1
'''
STEP 3.5: for training phase, we collect the variables to be penalized.
'''
if self.training == True:
losses += [cur_output - cur_input_1]
losses += [cur_output_rectified - cur_input_1]
offsets += [cur_offset_output]
'''
STEP 4: return the results
'''
if self.training == True:
# if in the training phase, we output the losses to be minimized.
# return losses, loss_occlusion
return losses, offsets
else:
cur_outputs = [cur_output, cur_output_rectified]
return cur_outputs, cur_offset_output
def forward_flownets(self, model, input, time_offsets=None):
if time_offsets == None:
time_offsets = [0.5]
elif type(time_offsets) == float:
time_offsets = [time_offsets]
elif type(time_offsets) == list:
pass
# this is a single direction motion results, but not a bidirectional one
temp = model(input)
# single direction to bidirection should haven it.
temps = [
self.div_flow * temp * time_offset for time_offset in time_offsets
]
# nearest interpolation won't be better i think
temps = [fluid.layers.resize_bilinear(temp, scale=4) for temp in temps]
return temps
import os
import sys
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
import time
import glob
import numpy as np
from imageio import imread, imsave
from tqdm import tqdm
import cv2
import paddle.fluid as fluid
from paddle.utils.download import get_path_from_url
from ppgan.utils.video import video2frames, frames2video
from util import *
from my_args import parser
DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar'
def infer_engine(model_dir,
run_mode='fluid',
batch_size=1,
use_gpu=False,
min_subgraph_size=3):
if not use_gpu and not run_mode == 'fluid':
raise ValueError(
"Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
.format(run_mode, use_gpu))
precision_map = {
'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
}
config = fluid.core.AnalysisConfig(os.path.join(model_dir, 'model'),
os.path.join(model_dir, 'params'))
if use_gpu:
# initial GPU memory(M), device ID
config.enable_use_gpu(100, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
else:
config.disable_gpu()
if run_mode in precision_map.keys():
config.enable_tensorrt_engine(workspace_size=1 << 10,
max_batch_size=batch_size,
min_subgraph_size=min_subgraph_size,
precision_mode=precision_map[run_mode],
use_static=False,
use_calib_mode=False)
# disable print log when predict
config.disable_glog_info()
# enable shared memory
config.enable_memory_optim()
# disable feed, fetch OP, needed by zero_copy_run
config.switch_use_feed_fetch_ops(False)
predictor = fluid.core.create_paddle_predictor(config)
return predictor
def executor(model_dir, use_gpu=False):
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
program, feed_names, fetch_targets = fluid.io.load_inference_model(
dirname=model_dir,
executor=exe,
model_filename='model',
params_filename='params')
return exe, program, fetch_targets
class VideoFrameInterp(object):
def __init__(self,
time_step,
model_path,
video_path,
use_gpu=True,
key_frame_thread=0.,
output_path='output',
remove_duplicates=True):
self.video_path = video_path
self.output_path = os.path.join(output_path, 'DAIN')
if model_path is None:
model_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path)
self.model_path = model_path
self.time_step = time_step
self.key_frame_thread = key_frame_thread
self.exe, self.program, self.fetch_targets = executor(model_path,
use_gpu=use_gpu)
def run(self):
frame_path_input = os.path.join(self.output_path, 'frames-input')
frame_path_interpolated = os.path.join(self.output_path,
'frames-interpolated')
frame_path_combined = os.path.join(self.output_path, 'frames-combined')
video_path_output = os.path.join(self.output_path, 'videos-output')
if not os.path.exists(self.output_path):
os.makedirs(self.output_path)
if not os.path.exists(frame_path_input):
os.makedirs(frame_path_input)
if not os.path.exists(frame_path_interpolated):
os.makedirs(frame_path_interpolated)
if not os.path.exists(frame_path_combined):
os.makedirs(frame_path_combined)
if not os.path.exists(video_path_output):
os.makedirs(video_path_output)
timestep = self.time_step
num_frames = int(1.0 / timestep) - 1
if self.video_path.endswith('.mp4'):
videos = [self.video_path]
else:
videos = sorted(glob.glob(os.path.join(self.video_path, '*.mp4')))
for cnt, vid in enumerate(videos):
print("Interpolating video:", vid)
cap = cv2.VideoCapture(vid)
fps = cap.get(cv2.CAP_PROP_FPS)
print("Old fps (frame rate): ", fps)
times_interp = int(1.0 / timestep)
r2 = str(int(fps) * times_interp)
print("New fps (frame rate): ", r2)
out_path = video2frames(vid, frame_path_input)
vidname = vid.split('/')[-1].split('.')[0]
tot_timer = AverageMeter()
proc_timer = AverageMeter()
end = time.time()
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
if remove_duplicates:
frames = remove_duplicates(out_path)
img = imread(frames[0])
int_width = img.shape[1]
int_height = img.shape[0]
channel = img.shape[2]
if not channel == 3:
continue
if int_width != ((int_width >> 7) << 7):
int_width_pad = (
((int_width >> 7) + 1) << 7) # more than necessary
padding_left = int((int_width_pad - int_width) / 2)
padding_right = int_width_pad - int_width - padding_left
else:
int_width_pad = int_width
padding_left = 32
padding_right = 32
if int_height != ((int_height >> 7) << 7):
int_height_pad = (
((int_height >> 7) + 1) << 7) # more than necessary
padding_top = int((int_height_pad - int_height) / 2)
padding_bottom = int_height_pad - int_height - padding_top
else:
int_height_pad = int_height
padding_top = 32
padding_bottom = 32
frame_num = len(frames)
print('processing {} frames, from video: {}'.format(frame_num, vid))
if not os.path.exists(os.path.join(frame_path_interpolated,
vidname)):
os.makedirs(os.path.join(frame_path_interpolated, vidname))
if not os.path.exists(os.path.join(frame_path_combined, vidname)):
os.makedirs(os.path.join(frame_path_combined, vidname))
for i in tqdm(range(frame_num - 1)):
first = frames[i]
second = frames[i + 1]
img_first = imread(first)
img_second = imread(second)
'''--------------Frame change test------------------------'''
img_first_gray = np.dot(img_first[..., :3],
[0.299, 0.587, 0.114])
img_second_gray = np.dot(img_second[..., :3],
[0.299, 0.587, 0.114])
img_first_gray = img_first_gray.flatten(order='C')
img_second_gray = img_second_gray.flatten(order='C')
corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1]
key_frame = False
if corr < self.key_frame_thread:
key_frame = True
'''-------------------------------------------------------'''
X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255
assert (X0.shape[1] == X1.shape[1])
assert (X0.shape[2] == X1.shape[2])
X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X = np.concatenate((X0, X1), axis=0)
proc_end = time.time()
o = self.exe.run(self.program,
fetch_list=self.fetch_targets,
feed={"image": X})
y_ = o[0]
proc_timer.update(time.time() - proc_end)
tot_timer.update(time.time() - end)
end = time.time()
y_ = [
np.transpose(
255.0 * item.clip(
0, 1.0)[0, :, padding_top:padding_top + int_height,
padding_left:padding_left + int_width],
(1, 2, 0)) for item in y_
]
time_offsets = [
kk * timestep for kk in range(1, 1 + num_frames, 1)
]
count = 1
for item, time_offset in zip(y_, time_offsets):
out_dir = os.path.join(
frame_path_interpolated, vidname,
"{:0>6d}_{:0>4d}.png".format(i, count))
count = count + 1
imsave(out_dir, np.round(item).astype(np.uint8))
num_frames = int(1.0 / timestep) - 1
input_dir = os.path.join(frame_path_input, vidname)
interpolated_dir = os.path.join(frame_path_interpolated, vidname)
combined_dir = os.path.join(frame_path_combined, vidname)
combine_frames(input_dir, interpolated_dir, combined_dir,
num_frames)
frame_pattern_combined = os.path.join(frame_path_combined, vidname,
'%08d.png')
video_pattern_output = os.path.join(video_path_output,
vidname + '.mp4')
if os.path.exists(video_pattern_output):
os.remove(video_pattern_output)
frames2video(frame_pattern_combined, video_pattern_output, r2)
return frame_pattern_combined, video_pattern_output
if __name__ == '__main__':
args = parser.parse_args()
predictor = VideoFrameInterp(args.time_step,
args.saved_model,
args.video_path,
args.output_path,
remove_duplicates=args.remove_duplicates)
predictor.run()
自定义OP编译:
2. sh make.sh编译成correlation_lib.so动态库
3. 添加动态库路径到LD_LIBRARY_PATH:
```
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python3.7 -c 'import paddle; print(paddle.sysconfig.get_lib())'`
```
4. 添加correlation op的python路径:
```
export PYTHONPATH=$PYTHONPATH:`pwd`
```
5. python test_correlation.py运行单测,验证是否加载成功。
PS: 如果paddle whl包是从官网上下载的,需要使用gcc 4.8,即把make.sh中的g++ 改为 g++-4.8
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include <string>
#include <unordered_map>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
inline std::vector<int64_t> CorrelationOutputSize(int batch, int input_height, int input_width, int stride1, int stride2, int kernel_size, int pad_size, int max_displacement) {
std::vector<int64_t> output_shape({batch});
int kernel_radius = (kernel_size - 1) / 2;
int border_radius = kernel_radius + max_displacement;
int padded_input_height = input_height + 2 * pad_size;
int padded_input_width = input_width + 2 * pad_size;
int output_channel = ((max_displacement/stride2) * 2 + 1) * ((max_displacement/stride2) * 2 + 1);
output_shape.push_back(output_channel);
int output_height = std::ceil(static_cast<float>(padded_input_height - 2 * border_radius) / static_cast<float>(stride1));
int output_width = std::ceil(static_cast<float>(padded_input_width - 2 * border_radius) / static_cast<float>(stride1));
output_shape.push_back(output_height);
output_shape.push_back(output_width);
return output_shape;
}
class CorrelationOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override{
AddInput("Input1", "input1");
AddInput("Input2", "input2");
AddOutput("Output", "output");
AddAttr<int>("pad_size", "pad size for input1 and input2");
AddAttr<int>("kernel_size", "kernel size of input1 and input2");
AddAttr<int>("max_displacement", "max displacement of input1 and input2");
AddAttr<int>("stride1", "Input1 stride");
AddAttr<int>("stride2", "Input2 stride");
AddAttr<int>("corr_type_multiply", "correlation coefficient").SetDefault(1);
AddComment(R"DOC(Correlation of two feature map. Only support NCHW data format.)DOC");
}
};
class CorrelationOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{
PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(input1) cannot be null");
PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(input2) cannot be null");
int stride1 = ctx->Attrs().Get<int>("stride1");
int stride2 = ctx->Attrs().Get<int>("stride2");
int max_displacement = ctx->Attrs().Get<int>("max_displacement");
int pad_size = ctx->Attrs().Get<int>("pad_size");
int kernel_size = ctx->Attrs().Get<int>("kernel_size");
auto in_dims = ctx->GetInputDim("Input1");
auto in2_dims = ctx->GetInputDim("Input2");
PADDLE_ENFORCE_EQ(in_dims.size() == 4, true, "input1 must be 4-dims");
PADDLE_ENFORCE_EQ(in2_dims.size() == 4, true, "input2 must be 4-dims");
std::vector<int64_t> output_shape = CorrelationOutputSize(in_dims[0], in_dims[2], in_dims[3], stride1, stride2, kernel_size, pad_size, max_displacement);
ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override{
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input1");
PADDLE_ENFORCE_EQ(input_data_type, ctx.Input<Tensor>("Input2")->type(), "Input1 and Input2 shoule have same type");
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
};
template <typename T>
class CorrelationOpGradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("correlation_grad");
op->SetInput("Input1", this->Input("Input1"));
op->SetInput("Input2", this->Input("Input2"));
op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
op->SetOutput(framework::GradVarName("Input1"), this->InputGrad("Input1"));
op->SetOutput(framework::GradVarName("Input2"), this->InputGrad("Input2"));
op->SetAttrMap(this->Attrs());
}
};
class CorrelationOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{
PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(Input1) should not be null");
PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(Input2) should not be null");
PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Output")), true, "Input(Output@GRAD) should not be null");
auto in1_dims = ctx->GetInputDim("Input1");
auto in2_dims = ctx->GetInputDim("Input2");
ctx->SetOutputDim(framework::GradVarName("Input1"), in1_dims);
ctx->SetOutputDim(framework::GradVarName("Input2"), in1_dims);
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override{
const auto* var = ctx.InputVar(framework::GradVarName("Output"));
if (var == nullptr) {
PADDLE_THROW("cannot find Output@GRAD");
}
return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(ctx, "Input1"), ctx.GetPlace());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(correlation, ops::CorrelationOp, ops::CorrelationOpMaker,
ops::CorrelationOpGradMaker<paddle::framework::OpDesc>,
ops::CorrelationOpGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(correlation_grad, ops::CorrelationOpGrad);
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#define THREADS_PER_BLOCK 32
#define FULL_MASK 0xffffffff
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T>
__forceinline__ __device__ T warpReduceSum(T val) {
for (int offset = 16; offset > 0; offset /= 2) {
val += __shfl_down_sync(FULL_MASK, val, offset);
}
return val;
}
template <typename T>
__forceinline__ __device__ T blockReduceSum(T val) {
static __shared__ T shared[32];
int lane = threadIdx.x % warpSize;
int wid = threadIdx.x / warpSize;
val = warpReduceSum(val);
if (lane == 0)
shared[wid] = val;
__syncthreads();
val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0;
if (wid == 0)
val = warpReduceSum(val);
return val;
}
template <typename T>
__global__ void set_zero(T *x, int num) {
for(int i = blockIdx.x * blockDim.x + threadIdx.x; i < num; i += blockDim.x * gridDim.x)
x[i] = static_cast<T>(0);
}
template <typename T>
__global__ void channel_first(const T *input, T *rinput, const int channel, const int height, const int width, const int pad_size) {
int n = blockIdx.x;
int h = blockIdx.y;
int w = blockIdx.z;
int ch_off = threadIdx.x;
T value;
int dimchw = channel * height * width;
int dimhw = height * width;
int p_dimw = (width + 2 * pad_size);
int p_dimh = (height + 2 * pad_size);
int p_dimchw = channel * p_dimw * p_dimh;
int p_dimcw = channel * p_dimw;
for (int c = ch_off; c < channel; c += THREADS_PER_BLOCK) {
value = input[n * dimchw + c * dimhw + h * width + w];
rinput[n * p_dimchw + (h + pad_size) * p_dimcw + (w + pad_size) * channel + c] = value;
}
}
template <typename T>
__global__ void correlation_forward(T *output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int input_channel, const int input_height, const int input_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) {
int p_input_width = input_width + 2 * pad_size;
int p_input_height = input_height + 2 * pad_size;
int kernel_rad = (kernel_size - 1) / 2;
int displacement_rad = max_displacement / stride2;
int displacement_size = 2 * displacement_rad + 1;
int n = blockIdx.x;
int h1 = blockIdx.y * stride1 + max_displacement;
int w1 = blockIdx.z * stride1 + max_displacement;
int c = threadIdx.x;
int p_dimchw = p_input_height * p_input_width * input_channel;
int p_dimcw = p_input_width * input_channel;
int p_dimc = input_channel;
int t_dimchw = output_channel * output_height * output_width;
int t_dimhw = output_height * output_width;
int t_dimw = output_width;
int nelems = kernel_size * kernel_size * p_dimc;
for (int tj = -displacement_rad; tj <= displacement_rad; ++tj) {
for(int ti = -displacement_rad; ti <= displacement_rad; ++ti) {
int w2 = w1 + ti * stride2;
int h2 = h1 + tj * stride2;
T acc0 = 0;
for(int j = -kernel_rad; j <= kernel_rad; ++j) {
for(int i = -kernel_rad; i <= kernel_rad; ++i) {
for(int ch = c; ch < p_dimc; ch += blockDim.x) {
int index1 = n * p_dimchw + (h1 + j) * p_dimcw + (w1 + i) * p_dimc + ch;
int index2 = n * p_dimchw + (h2 + j) * p_dimcw + (w2 + i) * p_dimc + ch;
acc0 += static_cast<T>(rinput1[index1] * rinput2[index2]);
}
}
}
if (blockDim.x == warpSize) {
__syncwarp();
acc0 = warpReduceSum(acc0);
} else {
__syncthreads();
acc0 = blockReduceSum(acc0);
}
if (threadIdx.x == 0) {
int tc = (tj + displacement_rad) * displacement_size + (ti + displacement_rad);
const int t_index = n * t_dimchw + tc * t_dimhw + blockIdx.y * t_dimw + blockIdx.z;
output[t_index] = static_cast<T>(acc0 / nelems);
}
}
}
}
//class CorrelationKernel<platform::CUDADeviceContext, T>
template <typename T>
class CorrelationKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must be CUDAPlace");
auto *input1 = ctx.Input<Tensor>("Input1");
auto *input2 = ctx.Input<Tensor>("Input2");
int pad_size = ctx.Attr<int>("pad_size");
int kernel_size = ctx.Attr<int>("kernel_size");
int stride1 = ctx.Attr<int>("stride1");
int stride2 = ctx.Attr<int>("stride2");
int max_displacement = ctx.Attr<int>("max_displacement");
int corr_type_multiply = ctx.Attr<int>("corr_type_multiply");
auto *output = ctx.Output<Tensor>("Output");
output->mutable_data<T>(ctx.GetPlace());
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
// base on input1, NCHW
auto in_dims = input1->dims();
int N = in_dims[0];
int C = in_dims[1];
int H = in_dims[2];
int W = in_dims[3];
int padded_input_height = H + 2 * pad_size;
int padded_input_width = W + 2 * pad_size;
Tensor rinput1 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
rinput1.mutable_data<T>(ctx.GetPlace());
Tensor rinput2 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
rinput2.mutable_data<T>(ctx.GetPlace());
set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data<T>(), rinput1.numel());
set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data<T>(), rinput2.numel());
set_zero<<<(output->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(output->data<T>(), output->numel());
auto out_dims = output->dims();
int OC = out_dims[1];
int OH = out_dims[2];
int OW = out_dims[3];
dim3 blocks_grid(N, H, W);
dim3 threads_block(THREADS_PER_BLOCK);
channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input1->data<T>(), rinput1.data<T>(), C, H, W, pad_size);
channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input2->data<T>(), rinput2.data<T>(), C, H, W, pad_size);
dim3 threadsPerBlock(THREADS_PER_BLOCK);
dim3 totalBlocksCorr(N, OH, OW);
correlation_forward<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(output->data<T>(), OC, OH, OW, rinput1.data<T>(),
C, H, W, rinput2.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
}
};
template <typename T>
__global__ void correlation_backward_input1(int item, T *grad_input1, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) {
int n = item;
int h = blockIdx.x * stride1 + pad_size;
int w = blockIdx.y * stride1 + pad_size;
int c = blockIdx.z;
int tch_off = threadIdx.x;
int kernel_rad = (kernel_size - 1) / 2;
int displacement_rad = max_displacement / stride2;
int displacement_size = 2 * displacement_rad + 1;
int xmin = (w - kernel_rad - max_displacement) / stride1;
int ymin = (h - kernel_rad - max_displacement) / stride1;
int xmax = (w + kernel_rad - max_displacement) / stride1;
int ymax = (h + kernel_rad - max_displacement) / stride1;
if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) {
return;
}
if (xmin > xmax || ymin > ymax) {
return;
}
xmin = max(0, xmin);
xmax = min(output_width - 1, xmax);
ymin = max(0, ymin);
ymax = min(output_height - 1, ymax);
int p_input_width = input_width + 2 * pad_size;
int p_input_height = input_height + 2 * pad_size;
int p_dimchw = input_channel * p_input_height * p_input_width;
int p_dimcw = input_channel * p_input_width;
int p_dimc = input_channel;
int t_dimchw = output_channel * output_height * output_width;
int t_dimhw = output_height * output_width;
int t_dimw = output_width;
int o_dimchw = input_channel * input_height * input_width;
int o_dimhw = input_height * input_width;
int o_dimw = input_width;
int nelems = kernel_size * kernel_size * input_channel;
__shared__ T prod_sum[THREADS_PER_BLOCK];
prod_sum[tch_off] = 0;
for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) {
int i2 = (tc % displacement_size - displacement_rad) * stride2;
int j2 = (tc / displacement_size - displacement_rad) * stride2;
int index2 = n * p_dimchw + (h + j2) * p_dimcw + (w + i2) * p_dimc + c;
T val2 = rinput2[index2];
for (int j = ymin; j <= ymax; ++j) {
for (int i = xmin; i <= xmax; ++i) {
int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i;
prod_sum[tch_off] += grad_output[t_index] * val2;
}
}
}
__syncthreads();
if (tch_off == 0) {
T reduce_sum = 0;
for (int index = 0; index < THREADS_PER_BLOCK; index++) {
reduce_sum += prod_sum[index];
}
const int index1 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size);
grad_input1[index1] = static_cast<T>(reduce_sum / nelems);
}
}
template <typename T>
__global__ void correlation_backward_input2(int item, T *grad_input2, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2){
int n = item;
int h = blockIdx.x * stride1 + pad_size;
int w = blockIdx.y * stride1 + pad_size;
int c = blockIdx.z;
int tch_off = threadIdx.x;
int kernel_rad = (kernel_size - 1) / 2;
int displacement_rad = max_displacement / stride2;
int displacement_size = 2 * displacement_rad + 1;
int p_input_width = input_width + 2 * pad_size;
int p_input_height = input_height + 2 * pad_size;
int p_dimchw = input_channel * p_input_height * p_input_width;
int p_dimcw = input_channel * p_input_width;
int p_dimc = input_channel;
int t_dimchw = output_channel * output_height * output_width;
int t_dimhw = output_height * output_width;
int t_dimw = output_width;
int o_dimchw = input_channel * input_height * input_width;
int o_dimhw = input_height * input_width;
int o_dimw = input_width;
int nelems = kernel_size * kernel_size * input_channel;
__shared__ T prod_sum[THREADS_PER_BLOCK];
prod_sum[tch_off] = 0;
for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) {
int i2 = (tc % displacement_size - displacement_rad) * stride2;
int j2 = (tc / displacement_size - displacement_rad) * stride2;
int xmin = (w - kernel_rad - max_displacement - i2) / stride1;
int ymin = (h - kernel_rad - max_displacement - j2) / stride1;
int xmax = (w + kernel_rad - max_displacement - i2) / stride1;
int ymax = (h + kernel_rad - max_displacement - j2) / stride1;
if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) {
continue;
}
if (xmin > xmax || ymin > ymax) {
continue;
}
xmin = max(0, xmin);
xmax = min(output_width - 1, xmax);
ymin = max(0, ymin);
ymax = min(output_height - 1, ymax);
int index1 = n * p_dimchw + (h - j2) * p_dimcw + (w - i2) * p_dimc + c;
T val1 = rinput1[index1];
for (int j = ymin; j <= ymax; ++j) {
for (int i = xmin; i <= xmax; ++i) {
int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i;
prod_sum[tch_off] += grad_output[t_index] * val1;
}
}
}
__syncthreads();
if (tch_off == 0) {
T reduce_sum = 0;
for (int index = 0; index < THREADS_PER_BLOCK; index++) {
reduce_sum += prod_sum[index];
}
const int index2 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size);
grad_input2[index2] = static_cast<T>(reduce_sum / nelems);
}
}
template <typename T>
class CorrelationGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must use CUDAPlace.");
const auto *input1 = ctx.Input<Tensor>("Input1");
const auto *input2 = ctx.Input<Tensor>("Input2");
const auto *grad_output = ctx.Input<Tensor>(framework::GradVarName("Output"));
const int pad_size = ctx.Attr<int>("pad_size");
const int kernel_size = ctx.Attr<int>("kernel_size");
const int stride1 = ctx.Attr<int>("stride1");
const int stride2 = ctx.Attr<int>("stride2");
const int max_displacement = ctx.Attr<int>("max_displacement");
const int corr_type_multiply = ctx.Attr<int>("corr_type_multiply");
auto *grad_input1 = ctx.Output<Tensor>(framework::GradVarName("Input1"));
grad_input1->mutable_data<T>(ctx.GetPlace());
auto *grad_input2 = ctx.Output<Tensor>(framework::GradVarName("Input2"));
grad_input2->mutable_data<T>(ctx.GetPlace());
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
auto in_dims = input1->dims();
int N = in_dims[0];
int C = in_dims[1];
int H = in_dims[2];
int W = in_dims[3];
int padded_input_height = H + 2 * pad_size;
int padded_input_width = W + 2 * pad_size;
Tensor rinput1 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
rinput1.mutable_data<T>(ctx.GetPlace());
Tensor rinput2 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
rinput2.mutable_data<T>(ctx.GetPlace());
set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data<T>(), rinput1.numel());
set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data<T>(), rinput2.numel());
set_zero<<<(grad_input1->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input1->data<T>(), grad_input1->numel());
set_zero<<<(grad_input2->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input2->data<T>(), grad_input2->numel());
auto grad_out_dims = grad_output->dims();
int GOC = grad_out_dims[1];
int GOH = grad_out_dims[2];
int GOW = grad_out_dims[3];
dim3 blocks_grid(N, H, W);
dim3 threads_block(THREADS_PER_BLOCK);
channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input1->data<T>(), rinput1.data<T>(), C, H, W, pad_size);
channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input2->data<T>(), rinput2.data<T>(), C, H, W, pad_size);
dim3 threadsPerBlock(THREADS_PER_BLOCK);
dim3 totalBlocksCorr(H, W, C);
for (int n = 0; n < N; n++) {
correlation_backward_input1<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(n, grad_input1->data<T>(), C, H, W, grad_output->data<T>(), GOC, GOH, GOW, rinput2.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
}
for (int n = 0; n < N; n++) {
correlation_backward_input2<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(n, grad_input2->data<T>(), C, H, W, grad_output->data<T>(), GOC, GOH, GOW, rinput1.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
correlation, ops::CorrelationKernel<float>,
ops::CorrelationKernel<double>);
REGISTER_OP_CUDA_KERNEL(
correlation_grad, ops::CorrelationGradKernel<float>,
ops::CorrelationGradKernel<double>);
# source /ssd1/vis/liufanglong/.bashrc
#export PATH=/home/work/cuda-9.0/bin:$PATH
#export PATH=/home/work/cuda-9.0/bin:$PATH
#export LD_LIBRARY_PATH="/home/work/cuda-9.0/lib64:$LD_LIBRARY_PATH"
#export LD_LIBRARY_PATH=/home/vis/chao/local/cudnn_v7.6/cuda/lib64:$LD_LIBRARY_PATH
#export CPLUS_INCLUDE_PATH=/home/vis/chao/local/cudnn_v7.6/cuda/include:/ssd1/vis/liufanglong/local/fluid_1.1.0_for_slurm/nccl_2.3.5/include:$CPLUS_INCLUDE_PATH
#export LD_LIBRARY_PATH=/ssd1/vis/liufanglong/local/fluid_1.1.0_for_slurm/nccl_2.3.5/lib:$LD_LIBRARY_PATH
include_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_include())' )
lib_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_lib())' )
echo $include_dir
echo $lib_dir
OPS='correlation_op'
for op in ${OPS}
do
nvcc ${op}.cu -c -o ${op}.cu.o -ccbin cc -DPADDLE_WITH_CUDA -DEIGEN_USE_GPU -DPADDLE_USE_DSO -DPADDLE_WITH_MKLDNN -Xcompiler -fPIC -std=c++11 -Xcompiler -fPIC -w --expt-relaxed-constexpr -O0 -g -DNVCC \
-I ${include_dir}/third_party/ \
-I ${include_dir}
done
# g++-4.8 correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
# g++ ${OPS}.cu.o ${OPS}.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
g++ correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
-I ${include_dir}/third_party/ \
-I ${include_dir} \
-L ${lib_dir} \
-L /usr/local/cuda/lib64/ -lpaddle_framework -lcudart
# rm *.cu.o
import unittest
from correlation import correlation
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
def corr(x_1,
x_2,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1,
corr_multiply=1):
K = kernel_size
# rinput1 = np.pad(x_1, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3)
# rinput2 = np.pad(x_2, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3)
rinput1 = np.pad(x_1, ((0, 0), (0, 0), (pad_size, pad_size),
(pad_size, pad_size)),
mode='constant')
rinput2 = np.pad(x_2, ((0, 0), (0, 0), (pad_size, pad_size),
(pad_size, pad_size)),
mode='constant')
rinput1 = np.transpose(rinput1, (0, 2, 3, 1))
rinput2 = np.transpose(rinput2, (0, 2, 3, 1))
B = int(rinput1.shape[0])
H = int(x_1.shape[2])
W = int(x_2.shape[3])
d = max_displacement
D = 2 * d + 1
output = np.zeros((B, D * D, H, W), dtype=np.float32)
for b in range(B):
for i in range(H):
for j in range(W):
for k in range(-d, d + 1):
for l in range(-d, d + 1):
x1_index = i + pad_size
y1_index = j + pad_size
x2_index = x1_index + k
y2_index = y1_index + l
output[b, l + d + D * (k + d), i,
j] = np.mean(rinput1[b, x1_index:x1_index + K,
y1_index:y1_index + K] *
rinput2[b, x2_index:x2_index + K,
y2_index:y2_index + K])
return output
class TestCorrelationOp(unittest.TestCase):
def test_check_output(self):
#x_shape = (1, 196, 3, 3)
np.random.seed(13)
np.set_printoptions(threshold=np.inf)
x_shape = (2, 10, 3, 3)
x_type = 'float32'
x1 = fluid.layers.data(name='x1',
shape=x_shape,
dtype=x_type,
append_batch_size=False)
x2 = fluid.layers.data(name='x2',
shape=x_shape,
dtype=x_type,
append_batch_size=False)
x1_np = np.random.randn(2, 3, 4, 5).astype(x_type)
x2_np = np.random.randn(2, 3, 4, 5).astype(x_type)
out_np = corr(x1_np,
x2_np,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1)
out = correlation(x1,
x2,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1)
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
res = exe.run(feed={'x1': x1_np, 'x2': x2_np}, fetch_list=[out.name])
self.assertTrue(np.allclose(res[0], out_np))
class Net(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Net, self).__init__(name_scope)
def forward(self, x1, x2):
y = correlation(x1,
x2,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1)
return y
class TestCorrelationOpDyGraph(unittest.TestCase):
def test_check_output(self):
np.random.seed(13)
np.set_printoptions(threshold=np.inf)
x_shape = (2, 10, 3, 3)
x_type = 'float32'
place = fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
x1_np = np.random.randn(2, 3, 4, 5).astype(x_type)
x2_np = np.random.randn(2, 3, 4, 5).astype(x_type)
out_np = corr(x1_np,
x2_np,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1)
x1 = to_variable(x1_np)
x2 = to_variable(x2_np)
corr_pd = Net('corr_pd')
y = corr_pd(x1, x2)
out = y.numpy()
self.assertTrue(np.allclose(out, out_np))
if __name__ == '__main__':
unittest.main()
此差异已折叠。
import paddle.fluid as fluid
from paddle.fluid.dygraph import Conv2D
__all__ = ['MultipleBasicBlock', 'MultipleBasicBlock_4']
def conv3x3(in_planes, out_planes, dilation=1, stride=1, param_attr=None):
return Conv2D(in_planes,
out_planes,
filter_size=3,
stride=stride,
padding=int(dilation * (3 - 1) / 2),
dilation=dilation,
bias_attr=False,
param_attr=param_attr)
class BasicBlock(fluid.dygraph.Layer):
expansion = 1
def __init__(self, inplanes, planes, dilation=1, stride=1, downsample=None):
super(BasicBlock, self).__init__()
param_attr = fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=1.0, seed=0))
self.conv1 = conv3x3(inplanes, planes, dilation, stride, param_attr)
self.conv2 = conv3x3(planes, planes, param_attr=param_attr)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
# out = self.bn1(out)
out = fluid.layers.relu(out)
out = self.conv2(out)
# out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = fluid.layers.relu(out)
return out
class MultipleBasicBlock(fluid.dygraph.Layer):
def __init__(self,
input_feature,
block,
num_blocks,
intermediate_feature=64,
dense=True):
super(MultipleBasicBlock, self).__init__()
self.dense = dense
self.num_block = num_blocks
self.intermediate_feature = intermediate_feature
param_attr = fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=1.0, seed=0))
self.block1 = Conv2D(input_feature,
intermediate_feature,
filter_size=7,
stride=1,
padding=3,
bias_attr=True,
param_attr=param_attr)
dim = intermediate_feature
self.block2 = block(dim, dim, dilation=1) if num_blocks >= 2 else None
self.block3 = block(dim, dim, dilation=1) if num_blocks >= 3 else None
self.block4 = block(dim, dim, dilation=1) if num_blocks >= 4 else None
self.block5 = Conv2D(dim, 3, 3, 1, 1)
def forward(self, x):
x = fluid.layers.relu(self.block1(x))
x = self.block2(x) if self.num_block >= 2 else x
x = self.block3(x) if self.num_block >= 3 else x
x = self.block4(x) if self.num_block >= 4 else x
x = self.block5(x)
return x
def MultipleBasicBlock_4(input_feature, intermediate_feature=64):
model = MultipleBasicBlock(input_feature, BasicBlock, 4,
intermediate_feature)
return model
cd pwcnet/correlation_op
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python -c 'import paddle; print(paddle.sysconfig.get_lib())'`
export PYTHONPATH=$PYTHONPATH:`pwd`
cd ../../
VID_PATH=/paddle/work/github/DAIN/data/CBA.mp4
OUT_PATH=output
MODEL_PATH=DAIN_paddle_weight
CUDA_VISIBLE_DEVICES=2 python predict.py \
--time_step 0.125 \
--video_path=$VID_PATH \
--output_path=$OUT_PATH \
--saved_model=$MODEL_PATH
\ No newline at end of file
import os, sys
import glob
import shutil
import cv2
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def combine_frames(input, interpolated, combined, num_frames):
frames1 = sorted(glob.glob(os.path.join(input, '*.png')))
frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png')))
num1 = len(frames1)
num2 = len(frames2)
# assert (num1 - 1) * num_frames == num2
for i in range(num1):
src = frames1[i]
imgname = int(src.split('/')[-1].split('.')[-2])
assert i == imgname
dst = os.path.join(combined, '{:08d}.png'.format(i * (num_frames + 1)))
shutil.copy2(src, dst)
if i < num1 - 1:
try:
for k in range(num_frames):
src = frames2[i * num_frames + k]
dst = os.path.join(
combined,
'{:08d}.png'.format(i * (num_frames + 1) + k + 1))
shutil.copy2(src, dst)
except Exception as e:
print(e)
print(len(frames2), num_frames, i, k, i * num_frames + k)
def remove_duplicates(paths):
def dhash(image, hash_size=8):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
resized = cv2.resize(gray, (hash_size + 1, hash_size))
diff = resized[:, 1:] > resized[:, :-1]
return sum([2**i for (i, v) in enumerate(diff.flatten()) if v])
hashes = {}
image_paths = sorted(glob.glob(os.path.join(paths, '*.png')))
for image_path in image_paths:
image = cv2.imread(image_path)
h = dhash(image)
p = hashes.get(h, [])
p.append(image_path)
hashes[h] = p
for (h, hashed_paths) in hashes.items():
if len(hashed_paths) > 1:
for p in hashed_paths[1:]:
os.remove(p)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
for fid, frame in enumerate(frames):
new_name = '{:08d}'.format(fid) + '.png'
new_name = os.path.join(paths, new_name)
os.rename(frame, new_name)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
return frames
import paddle
from skimage import color
import numpy as np
from PIL import Image
def convertLAB2RGB( lab ):
lab[:, :, 0:1] = lab[:, :, 0:1] * 100 # [0, 1] -> [0, 100]
lab[:, :, 1:3] = np.clip(lab[:, :, 1:3] * 255 - 128, -100, 100) # [0, 1] -> [-128, 128]
rgb = color.lab2rgb( lab.astype(np.float64) )
return rgb
def convertRGB2LABTensor( rgb ):
lab = color.rgb2lab( np.asarray( rgb ) ) # RGB -> LAB L[0, 100] a[-127, 128] b[-128, 127]
ab = np.clip(lab[:, :, 1:3] + 128, 0, 255) # AB --> [0, 255]
ab = paddle.to_tensor(ab.astype('float32')) / 255.
L = lab[:, :, 0] * 2.55 # L --> [0, 255]
L = Image.fromarray( np.uint8( L ) )
L = paddle.to_tensor(np.array(L).astype('float32')[..., np.newaxis] / 255.0)
return L, ab
def addMergin(img, target_w, target_h, background_color=(0,0,0)):
width, height = img.size
if width==target_w and height==target_h:
return img
scale = max(target_w,target_h)/max(width, height)
width = int(width*scale/16.)*16
height = int(height*scale/16.)*16
img = img.resize((width, height), Image.BICUBIC)
xp = (target_w-width)//2
yp = (target_h-height)//2
result = Image.new(img.mode, (target_w, target_h), background_color)
result.paste(img, (xp, yp))
return result
import cv2
import numpy as np
def read_img(path, size=None, is_gt=False):
"""read image by cv2
return: Numpy float32, HWC, BGR, [0,1]"""
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
img = img.astype(np.float32) / 255.
if img.ndim == 2:
img = np.expand_dims(img, axis=2)
if img.shape[2] > 3:
img = img[:, :, :3]
return img
def get_test_neighbor_frames(crt_i, N, max_n, padding='new_info'):
"""Generate an index list for reading N frames from a sequence of images
Args:
crt_i (int): current center index
max_n (int): max number of the sequence of images (calculated from 1)
N (int): reading N frames
padding (str): padding mode, one of replicate | reflection | new_info | circle
Example: crt_i = 0, N = 5
replicate: [0, 0, 0, 1, 2]
reflection: [2, 1, 0, 1, 2]
new_info: [4, 3, 0, 1, 2]
circle: [3, 4, 0, 1, 2]
Returns:
return_l (list [int]): a list of indexes
"""
max_n = max_n - 1
n_pad = N // 2
return_l = []
for i in range(crt_i - n_pad, crt_i + n_pad + 1):
if i < 0:
if padding == 'replicate':
add_idx = 0
elif padding == 'reflection':
add_idx = -i
elif padding == 'new_info':
add_idx = (crt_i + n_pad) + (-i)
elif padding == 'circle':
add_idx = N + i
else:
raise ValueError('Wrong padding mode')
elif i > max_n:
if padding == 'replicate':
add_idx = max_n
elif padding == 'reflection':
add_idx = max_n * 2 - i
elif padding == 'new_info':
add_idx = (crt_i - n_pad) - (i - max_n)
elif padding == 'circle':
add_idx = i - N
else:
raise ValueError('Wrong padding mode')
else:
add_idx = i
return_l.append(add_idx)
# name_b = '{:08d}'.format(crt_i)
return return_l
class EDVRDataset:
def __init__(self, frame_paths):
self.frames = frame_paths
def __getitem__(self, index):
indexs = get_test_neighbor_frames(index, 5, len(self.frames))
frame_list = []
for i in indexs:
img = read_img(self.frames[i])
frame_list.append(img)
img_LQs = np.stack(frame_list, axis=0)
# BGR to RGB, HWC to CHW, numpy to tensor
img_LQs = img_LQs[:, :, :, [2, 1, 0]]
img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32')
return img_LQs, self.frames[index]
def __len__(self):
return len(self.frames)
# examples of running programs:
# bash ./run.sh inference EDVR ./configs/edvr_L.yaml
# bash ./run.sh predict EDvR ./cofings/edvr_L.yaml
# configs should be ./configs/xxx.yaml
mode=$1
name=$2
configs=$3
save_inference_dir="./data/inference_model"
use_gpu=True
fix_random_seed=False
log_interval=1
valid_interval=1
weights="./weights/paddle_state_dict_L.npz"
export CUDA_VISIBLE_DEVICES=6 #0,1,5,6 fast, 2,3,4,7 slow
# export FLAGS_fast_eager_deletion_mode=1
# export FLAGS_eager_delete_tensor_gb=0.0
# export FLAGS_fraction_of_gpu_memory_to_use=0.98
if [ "$mode"x == "predict"x ]; then
echo $mode $name $configs $weights
if [ "$weights"x != ""x ]; then
python predict.py --model_name=$name \
--config=$configs \
--log_interval=$log_interval \
--video_path='' \
--use_gpu=$use_gpu
else
python predict.py --model_name=$name \
--config=$configs \
--log_interval=$log_interval \
--use_gpu=$use_gpu \
--video_path=''
fi
fi
dataset_params:
root_dir: data/vox-png
frame_shape: [256, 256, 3]
id_sampling: True
pairs_list: data/vox256.csv
augmentation_params:
flip_param:
horizontal_flip: True
time_flip: True
jitter_param:
brightness: 0.1
contrast: 0.1
saturation: 0.1
hue: 0.1
model_params:
common_params:
num_kp: 10
num_channels: 3
estimate_jacobian: True
kp_detector_params:
temperature: 0.1
block_expansion: 32
max_features: 1024
scale_factor: 0.25
num_blocks: 5
generator_params:
block_expansion: 64
max_features: 512
num_down_blocks: 2
num_bottleneck_blocks: 6
estimate_occlusion_map: True
dense_motion_params:
block_expansion: 64
max_features: 1024
num_blocks: 5
scale_factor: 0.25
discriminator_params:
scales: [1]
block_expansion: 32
max_features: 512
num_blocks: 4
sn: True
train_params:
num_epochs: 100
num_repeats: 75
epoch_milestones: [60, 90]
lr_generator: 2.0e-4
lr_discriminator: 2.0e-4
lr_kp_detector: 2.0e-4
batch_size: 40
scales: [1, 0.5, 0.25, 0.125]
checkpoint_freq: 50
transform_params:
sigma_affine: 0.05
sigma_tps: 0.005
points_tps: 5
loss_weights:
generator_gan: 0
discriminator_gan: 1
feature_matching: [10, 10, 10, 10]
perceptual: [10, 10, 10, 10, 10]
equivariance_value: 10
equivariance_jacobian: 10
reconstruction_params:
num_videos: 1000
format: '.mp4'
animate_params:
num_pairs: 50
format: '.mp4'
normalization_params:
adapt_movement_scale: False
use_relative_movement: True
use_relative_jacobian: True
visualizer_params:
kp_size: 5
draw_border: True
colormap: 'gist_rainbow'
# 模型说明
# 目前包含DAIN(插帧模型),DeOldify(上色模型),DeepRemaster(去噪与上色模型),EDVR(基于连续帧(视频)超分辨率模型),RealSR(基于图片的超分辨率模型)
# 参数说明
# input 输入视频的路径
# output 输出视频保存的路径
# proccess_order 要使用的模型及顺序
python tools/video-enhance.py \
--input input.mp4 --output output --proccess_order DeOldify RealSR
import matplotlib
matplotlib.use('Agg')
import os
import sys
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import argparse
import yaml
import pickle
from argparse import ArgumentParser
from tqdm import tqdm
import imageio
import numpy as np
from skimage.transform import resize
from skimage import img_as_ubyte
import paddle
from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator
from ppgan.modules.keypoint_detector import KPDetector
from ppgan.utils.animate import normalize_kp
from scipy.spatial import ConvexHull
paddle.disable_static()
if sys.version_info[0] < 3:
raise Exception(
"You must use Python 3 or higher. Recommended version is Python 3.7")
def load_checkpoints(config_path, checkpoint_path, cpu=False):
with open(config_path) as f:
config = yaml.load(f)
generator = OcclusionAwareGenerator(
**config['model_params']['generator_params'],
**config['model_params']['common_params'])
kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
**config['model_params']['common_params'])
checkpoint = pickle.load(open(checkpoint_path, 'rb'))
generator.set_state_dict(checkpoint['generator'])
kp_detector.set_state_dict(checkpoint['kp_detector'])
generator.eval()
kp_detector.eval()
return generator, kp_detector
def make_animation(source_image,
driving_video,
generator,
kp_detector,
relative=True,
adapt_movement_scale=True,
cpu=False):
with paddle.no_grad():
predictions = []
source = paddle.to_tensor(source_image[np.newaxis].astype(
np.float32)).transpose([0, 3, 1, 2])
# if not cpu:
# source = source.cuda()
driving = paddle.to_tensor(
np.array(driving_video)[np.newaxis].astype(np.float32)).transpose(
[0, 4, 1, 2, 3])
kp_source = kp_detector(source)
kp_driving_initial = kp_detector(driving[:, :, 0])
for frame_idx in tqdm(range(driving.shape[2])):
driving_frame = driving[:, :, frame_idx]
kp_driving = kp_detector(driving_frame)
kp_norm = normalize_kp(kp_source=kp_source,
kp_driving=kp_driving,
kp_driving_initial=kp_driving_initial,
use_relative_movement=relative,
use_relative_jacobian=relative,
adapt_movement_scale=adapt_movement_scale)
out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
predictions.append(
np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0])
return predictions
def find_best_frame(source, driving, cpu=False):
import face_alignment
def normalize_kp(kp):
kp = kp - kp.mean(axis=0, keepdims=True)
area = ConvexHull(kp[:, :2]).volume
area = np.sqrt(area)
kp[:, :2] = kp[:, :2] / area
return kp
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
flip_input=True,
device='cpu' if cpu else 'cuda')
kp_source = fa.get_landmarks(255 * source)[0]
kp_source = normalize_kp(kp_source)
norm = float('inf')
frame_num = 0
for i, image in tqdm(enumerate(driving)):
kp_driving = fa.get_landmarks(255 * image)[0]
kp_driving = normalize_kp(kp_driving)
new_norm = (np.abs(kp_source - kp_driving)**2).sum()
if new_norm < norm:
norm = new_norm
frame_num = i
return frame_num
from ppgan.apps.first_order_predictor import FirstOrderPredictor
parser = argparse.ArgumentParser()
parser.add_argument("--config", default=None, help="path to config")
parser.add_argument("--weight_path",
default=None,
help="path to checkpoint to restore")
parser.add_argument("--source_image", type=str, help="path to source image")
parser.add_argument("--driving_video", type=str, help="path to driving video")
parser.add_argument("--output", default='output', help="path to output")
parser.add_argument("--relative",
dest="relative",
action="store_true",
help="use relative or absolute keypoint coordinates")
parser.add_argument(
"--adapt_scale",
dest="adapt_scale",
action="store_true",
help="adapt movement scale based on convex hull of keypoints")
parser.add_argument(
"--find_best_frame",
dest="find_best_frame",
action="store_true",
help=
"Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)"
)
parser.add_argument("--best_frame",
dest="best_frame",
type=int,
default=None,
help="Set frame to start from.")
parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.")
parser.set_defaults(relative=False)
parser.set_defaults(adapt_scale=False)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--config", required=True, help="path to config")
parser.add_argument("--checkpoint",
default='vox-cpk.pth.tar',
help="path to checkpoint to restore")
parser.add_argument("--source_image",
default='sup-mat/source.png',
help="path to source image")
parser.add_argument("--driving_video",
default='sup-mat/source.png',
help="path to driving video")
parser.add_argument("--result_video",
default='result.mp4',
help="path to output")
parser.add_argument("--relative",
dest="relative",
action="store_true",
help="use relative or absolute keypoint coordinates")
parser.add_argument(
"--adapt_scale",
dest="adapt_scale",
action="store_true",
help="adapt movement scale based on convex hull of keypoints")
parser.add_argument(
"--find_best_frame",
dest="find_best_frame",
action="store_true",
help=
"Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)"
)
parser.add_argument("--best_frame",
dest="best_frame",
type=int,
default=None,
help="Set frame to start from.")
parser.add_argument("--cpu",
dest="cpu",
action="store_true",
help="cpu mode.")
parser.set_defaults(relative=False)
parser.set_defaults(adapt_scale=False)
opt = parser.parse_args()
source_image = imageio.imread(opt.source_image)
reader = imageio.get_reader(opt.driving_video)
fps = reader.get_meta_data()['fps']
driving_video = []
try:
for im in reader:
driving_video.append(im)
except RuntimeError:
pass
reader.close()
source_image = resize(source_image, (256, 256))[..., :3]
driving_video = [
resize(frame, (256, 256))[..., :3] for frame in driving_video
]
generator, kp_detector = load_checkpoints(config_path=opt.config,
checkpoint_path=opt.checkpoint,
cpu=opt.cpu)
if opt.find_best_frame or opt.best_frame is not None:
i = opt.best_frame if opt.best_frame is not None else find_best_frame(
source_image, driving_video, cpu=opt.cpu)
print("Best frame: " + str(i))
driving_forward = driving_video[i:]
driving_backward = driving_video[:(i + 1)][::-1]
predictions_forward = make_animation(
source_image,
driving_forward,
generator,
kp_detector,
relative=opt.relative,
adapt_movement_scale=opt.adapt_scale,
cpu=opt.cpu)
predictions_backward = make_animation(
source_image,
driving_backward,
generator,
kp_detector,
relative=opt.relative,
adapt_movement_scale=opt.adapt_scale,
cpu=opt.cpu)
predictions = predictions_backward[::-1] + predictions_forward[1:]
else:
predictions = make_animation(source_image,
driving_video,
generator,
kp_detector,
relative=opt.relative,
adapt_movement_scale=opt.adapt_scale,
cpu=opt.cpu)
imageio.mimsave(opt.result_video,
[img_as_ubyte(frame) for frame in predictions],
fps=fps)
args = parser.parse_args()
if args.cpu:
paddle.set_device('cpu')
predictor = FirstOrderPredictor(output=args.output,
weight_path=args.weight_path,
config=args.config,
relative=args.relative,
adapt_scale=args.adapt_scale,
find_best_frame=args.find_best_frame,
best_frame=args.best_frame)
predictor.run(args.source_image, args.driving_video)
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import argparse
from pathlib import Path
from PIL import Image
from fire import Fire
import numpy as np
import paddle
import paddle.vision.transforms as T
import ppgan.faceutils as futils
from ppgan.utils.options import parse_args
from ppgan.utils.config import get_config
from ppgan.utils.setup import setup
from ppgan.utils.filesystem import load
from ppgan.engine.trainer import Trainer
from ppgan.models.builder import build_model
from ppgan.utils.preprocess import *
def toImage(net_output):
img = net_output.squeeze(0).transpose(
(1, 2, 0)).numpy() # [1,c,h,w]->[h,w,c]
img = (img * 255.0).clip(0, 255)
img = np.uint8(img)
img = Image.fromarray(img, mode='RGB')
return img
def mask2image(mask: np.array, format="HWC"):
H, W = mask.shape
canvas = np.zeros((H, W, 3), dtype=np.uint8)
for i in range(int(mask.max())):
color = np.random.rand(1, 1, 3) * 255
canvas += (mask == i)[:, :, None] * color.astype(np.uint8)
return canvas
class PreProcess:
def __init__(self, config, need_parser=True):
self.img_size = 256
self.transform = transform = T.Compose([
T.Resize(size=256),
T.Permute(to_rgb=False),
])
self.norm = T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])
if need_parser:
self.face_parser = futils.mask.FaceParser()
self.up_ratio = 0.6 / 0.85
self.down_ratio = 0.2 / 0.85
self.width_ratio = 0.2 / 0.85
def __call__(self, image):
face = futils.dlib.detect(image)
if not face:
return
face_on_image = face[0]
image, face, crop_face = futils.dlib.crop(image, face_on_image,
self.up_ratio,
self.down_ratio,
self.width_ratio)
np_image = np.array(image)
mask = self.face_parser.parse(
np.float32(cv2.resize(np_image, (512, 512))))
mask = cv2.resize(mask.numpy(), (self.img_size, self.img_size),
interpolation=cv2.INTER_NEAREST)
mask = mask.astype(np.uint8)
mask_color = mask2image(mask)
cv2.imwrite('mask_temp.png', mask_color)
mask_tensor = paddle.to_tensor(mask)
lms = futils.dlib.landmarks(image, face) * self.img_size / image.width
lms = lms.round()
P_np = generate_P_from_lmks(lms, self.img_size, self.img_size,
self.img_size)
mask_aug = generate_mask_aug(mask, lms)
image = self.transform(np_image)
return [
self.norm(image),
np.float32(mask_aug),
np.float32(P_np),
np.float32(mask)
], face_on_image, crop_face
class PostProcess:
def __init__(self, config):
self.denoise = True
self.img_size = 256
def __call__(self, source: Image, result: Image):
# TODO: Refract -> name, resize
source = np.array(source)
result = np.array(result)
height, width = source.shape[:2]
small_source = cv2.resize(source, (self.img_size, self.img_size))
laplacian_diff = source.astype(np.float) - cv2.resize(
small_source, (width, height)).astype(np.float)
result = (cv2.resize(result,
(width, height)) + laplacian_diff).round().clip(
0, 255).astype(np.uint8)
if self.denoise:
result = cv2.fastNlMeansDenoisingColored(result)
result = Image.fromarray(result).convert('RGB')
return result
class Inference:
def __init__(self, config, model_path=''):
self.model = build_model(config)
self.preprocess = PreProcess(config)
self.model_path = model_path
def transfer(self, source, reference, with_face=False):
source_input, face, crop_face = self.preprocess(source)
reference_input, face, crop_face = self.preprocess(reference)
consis_mask = np.float32(
calculate_consis_mask(source_input[1], reference_input[1]))
consis_mask = paddle.to_tensor(np.expand_dims(consis_mask, 0))
if not (source_input and reference_input):
if with_face:
return None, None
return
for i in range(len(source_input) - 1):
source_input[i] = paddle.to_tensor(
np.expand_dims(source_input[i], 0))
for i in range(len(reference_input) - 1):
reference_input[i] = paddle.to_tensor(
np.expand_dims(reference_input[i], 0))
input_data = {
'image_A': source_input[0],
'image_B': reference_input[0],
'mask_A_aug': source_input[1],
'mask_B_aug': reference_input[1],
'P_A': source_input[2],
'P_B': reference_input[2],
'consis_mask': consis_mask
}
state_dicts = load(self.model_path)
net = getattr(self.model, 'netG')
net.set_dict(state_dicts['netG'])
result, _ = self.model.test(input_data)
print('result shape: ', result.shape)
min_, max_ = result.min(), result.max()
result += -min_
result = paddle.divide(result, max_ - min_ + 1e-5)
img = toImage(result)
if with_face:
return img, crop_face
img.save('before.png')
return img
def main(args, cfg, save_path='transferred_image.png'):
setup(args, cfg)
inference = Inference(cfg, args.model_path)
postprocess = PostProcess(cfg)
source = Image.open(args.source_path).convert("RGB")
reference_paths = list(Path(args.reference_dir).glob("*"))
np.random.shuffle(reference_paths)
for reference_path in reference_paths:
if not reference_path.is_file():
print(reference_path, "is not a valid file.")
continue
reference = Image.open(reference_path).convert("RGB")
# Transfer the psgan from reference to source.
image, face = inference.transfer(source, reference, with_face=True)
image.save('before.png')
source_crop = source.crop(
(face.left(), face.top(), face.right(), face.bottom()))
image = postprocess(source_crop, image)
image.save(save_path)
if __name__ == '__main__':
args = parse_args()
cfg = get_config(args.config_file)
main(args, cfg)
import sys
sys.path.append('.')
import argparse
import paddle
from DAIN.predict import VideoFrameInterp
from DeepRemaster.predict import DeepReasterPredictor
from DeOldify.predict import DeOldifyPredictor
from RealSR.predict import RealSRPredictor
from EDVR.predict import EDVRPredictor
parser = argparse.ArgumentParser(description='Fix video')
parser.add_argument('--input', type=str, default=None, help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--DAIN_weight',
type=str,
default=None,
help='Path to model weight')
parser.add_argument('--DeepRemaster_weight',
type=str,
default=None,
help='Path to model weight')
parser.add_argument('--DeOldify_weight',
type=str,
default=None,
help='Path to model weight')
parser.add_argument('--RealSR_weight',
type=str,
default=None,
help='Path to model weight')
parser.add_argument('--EDVR_weight',
type=str,
default=None,
help='Path to model weight')
# DAIN args
parser.add_argument('--time_step',
type=float,
default=0.5,
help='choose the time steps')
# DeepRemaster args
parser.add_argument('--reference_dir',
type=str,
default=None,
help='Path to the reference image directory')
parser.add_argument('--colorization',
action='store_true',
default=False,
help='Remaster with colorization')
parser.add_argument('--mindim',
type=int,
default=360,
help='Length of minimum image edges')
# DeOldify args
parser.add_argument('--render_factor',
type=int,
default=32,
help='model inputsize=render_factor*16')
#process order support model name:[DAIN, DeepRemaster, DeOldify, RealSR, EDVR]
parser.add_argument('--proccess_order',
type=str,
default='none',
nargs='+',
help='Process order')
if __name__ == "__main__":
args = parser.parse_args()
orders = args.proccess_order
temp_video_path = None
for order in orders:
print('Model {} proccess start..'.format(order))
if temp_video_path is None:
temp_video_path = args.input
if order == 'DAIN':
predictor = VideoFrameInterp(args.time_step,
args.DAIN_weight,
temp_video_path,
output_path=args.output)
frames_path, temp_video_path = predictor.run()
elif order == 'DeepRemaster':
paddle.disable_static()
predictor = DeepReasterPredictor(
temp_video_path,
args.output,
weight_path=args.DeepRemaster_weight,
colorization=args.colorization,
reference_dir=args.reference_dir,
mindim=args.mindim)
frames_path, temp_video_path = predictor.run()
paddle.enable_static()
elif order == 'DeOldify':
paddle.disable_static()
predictor = DeOldifyPredictor(temp_video_path,
args.output,
weight_path=args.DeOldify_weight)
frames_path, temp_video_path = predictor.run()
paddle.enable_static()
elif order == 'RealSR':
paddle.disable_static()
predictor = RealSRPredictor(temp_video_path,
args.output,
weight_path=args.RealSR_weight)
frames_path, temp_video_path = predictor.run()
paddle.enable_static()
elif order == 'EDVR':
predictor = EDVRPredictor(temp_video_path,
args.output,
weight_path=args.EDVR_weight)
frames_path, temp_video_path = predictor.run()
print('Model {} output frames path:'.format(order), frames_path)
print('Model {} output video path:'.format(order), temp_video_path)
print('Model {} proccess done!'.format(order))
......@@ -36,16 +36,18 @@ dataset:
output_nc: 3
serial_batches: False
pool_size: 50
transform:
load_size: 286
crop_size: 256
preprocess: resize_and_crop
no_flip: False
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
transforms:
- name: Resize
size: [286, 286]
interpolation: 2 #cv2.INTER_CUBIC
- name: RandomCrop
output_size: [256, 256]
- name: RandomHorizontalFlip
prob: 0.5
- name: Permute
- name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
test:
name: SingleDataset
dataroot: data/cityscapes/testB
......@@ -55,17 +57,14 @@ dataset:
output_nc: 3
serial_batches: False
pool_size: 50
transform:
load_size: 256
crop_size: 256
preprocess: resize_and_crop
no_flip: True
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
transforms:
- name: Resize
size: [256, 256]
interpolation: 2 #cv2.INTER_CUBIC
- name: Permute
- name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
optimizer:
name: Adam
......
......@@ -35,16 +35,18 @@ dataset:
output_nc: 3
serial_batches: False
pool_size: 50
transform:
load_size: 286
crop_size: 256
preprocess: resize_and_crop
no_flip: False
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
transforms:
- name: Resize
size: [286, 286]
interpolation: 2 #cv2.INTER_CUBIC
- name: RandomCrop
output_size: [256, 256]
- name: RandomHorizontalFlip
prob: 0.5
- name: Permute
- name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
test:
name: SingleDataset
dataroot: data/horse2zebra/testA
......@@ -55,15 +57,14 @@ dataset:
serial_batches: False
pool_size: 50
transform:
load_size: 256
crop_size: 256
preprocess: resize_and_crop
no_flip: True
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
transform:
- name: Resize
size: [256, 256]
interpolation: 2 #cv2.INTER_CUBIC
- name: Permute
- name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
optimizer:
name: Adam
......
epochs: 100
isTrain: True
output_dir: tmp
checkpoints_dir: checkpoints
lambda_A: 10.0
lambda_B: 10.0
lambda_identity: 0.5
model:
name: MakeupModel
generator:
name: GeneratorPSGANAttention
conv_dim: 64
repeat_num: 6
discriminator:
name: NLayerDiscriminator
ndf: 64
n_layers: 3
input_nc: 3
norm_type: spectral
gan_mode: lsgan
dataset:
train:
name: MakeupDataset
trans_size: 256
dataroot: MT-Dataset
cls_list: [non-makeup, makeup]
phase: train
pool_size: 16
test:
name: MakeupDataset
trans_size: 256
dataroot: MT-Dataset
cls_list: [non-makeup, makeup]
phase: test
pool_size: 16
optimizer:
name: Adam
beta1: 0.5
lr_scheduler:
name: linear
learning_rate: 0.0002
start_epoch: 100
decay_epochs: 100
log_config:
interval: 10
visiual_interval: 500
snapshot_config:
interval: 1
......@@ -33,16 +33,23 @@ dataset:
output_nc: 3
serial_batches: False
pool_size: 0
transform:
load_size: 286
crop_size: 256
preprocess: resize_and_crop
no_flip: False
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
transforms:
- name: Resize
size: [286, 286]
interpolation: 2 #cv2.INTER_CUBIC
keys: [image, image]
- name: PairedRandomCrop
output_size: [256, 256]
keys: [image, image]
- name: PairedRandomHorizontalFlip
prob: 0.5
keys: [image, image]
- name: Permute
keys: [image, image]
- name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
keys: [image, image]
test:
name: PairedDataset
dataroot: data/cityscapes/
......@@ -53,16 +60,18 @@ dataset:
output_nc: 3
serial_batches: True
pool_size: 50
transform:
load_size: 256
crop_size: 256
preprocess: resize_and_crop
no_flip: True
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
transforms:
- name: Resize
size: [256, 256]
interpolation: 2 #cv2.INTER_CUBIC
keys: [image, image]
- name: Permute
keys: [image, image]
- name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
keys: [image, image]
optimizer:
name: Adam
......
......@@ -32,16 +32,23 @@ dataset:
output_nc: 3
serial_batches: False
pool_size: 0
transform:
load_size: 286
crop_size: 256
preprocess: resize_and_crop
no_flip: False
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
transforms:
- name: Resize
size: [286, 286]
interpolation: 2 #cv2.INTER_CUBIC
keys: [image, image]
- name: PairedRandomCrop
output_size: [256, 256]
keys: [image, image]
- name: PairedRandomHorizontalFlip
prob: 0.5
keys: [image, image]
- name: Permute
keys: [image, image]
- name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
keys: [image, image]
test:
name: PairedDataset
dataroot: data/cityscapes/
......@@ -52,16 +59,17 @@ dataset:
output_nc: 3
serial_batches: True
pool_size: 50
transform:
load_size: 256
crop_size: 256
preprocess: resize_and_crop
no_flip: True
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
transforms:
- name: Resize
size: [256, 256]
interpolation: 2 #cv2.INTER_CUBIC
keys: [image, image]
- name: Permute
keys: [image, image]
- name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
keys: [image, image]
optimizer:
name: Adam
......
......@@ -32,16 +32,23 @@ dataset:
output_nc: 3
serial_batches: False
pool_size: 0
transform:
load_size: 286
crop_size: 256
preprocess: resize_and_crop
no_flip: False
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
transforms:
- name: Resize
size: [286, 286]
interpolation: 2 #cv2.INTER_CUBIC
keys: [image, image]
- name: PairedRandomCrop
output_size: [256, 256]
keys: [image, image]
- name: PairedRandomHorizontalFlip
prob: 0.5
keys: [image, image]
- name: Permute
keys: [image, image]
- name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
keys: [image, image]
test:
name: PairedDataset
dataroot: data/facades/
......@@ -52,16 +59,17 @@ dataset:
output_nc: 3
serial_batches: True
pool_size: 50
transform:
load_size: 256
crop_size: 256
preprocess: resize_and_crop
no_flip: True
normalize:
mean:
(127.5, 127.5, 127.5)
std:
(127.5, 127.5, 127.5)
transforms:
- name: Resize
size: [256, 256]
interpolation: 2 #cv2.INTER_CUBIC
keys: [image, image]
- name: Permute
keys: [image, image]
- name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
keys: [image, image]
optimizer:
name: Adam
......
from .dain_predictor import DAINPredictor
from .deepremaster_predictor import DeepRemasterPredictor
from .deoldify_predictor import DeOldifyPredictor
from .realsr_predictor import RealSRPredictor
from .edvr_predictor import EDVRPredictor
from .first_order_predictor import FirstOrderPredictor
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2
import paddle
class BasePredictor(object):
def __init__(self):
pass
def build_inference_model(self):
if paddle.in_dynamic_mode():
# todo self.model = build_model(self.cfg)
pass
else:
place = paddle.fluid.framework._current_expected_place()
self.exe = paddle.fluid.Executor(place)
file_names = os.listdir(self.weight_path)
for file_name in file_names:
if file_name.find('model') > -1:
model_file = file_name
elif file_name.find('param') > -1:
param_file = file_name
self.program, self.feed_names, self.fetch_targets = paddle.static.load_inference_model(
dirname=self.weight_path,
executor=self.exe,
model_filename=model_file,
params_filename=param_file)
print(self.feed_names)
def base_forward(self, inputs):
if paddle.in_dynamic_mode():
out = self.model(inputs)
else:
feed_dict = {}
if isinstance(inputs, dict):
feed_dict = inputs
elif isinstance(inputs, (list, tuple)):
for i, feed_name in enumerate(self.feed_names):
feed_dict[feed_name] = inputs[i]
else:
feed_dict[self.feed_names[0]] = inputs
out = self.exe.run(self.program,
fetch_list=self.fetch_targets,
feed=feed_dict)
return out
def is_video(self, input):
try:
cv2.VideoCapture(input)
return True
except:
return False
def run(self):
raise NotImplementedError
import os, sys
import math
import random
import time
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2
import glob
import shutil
import numpy as np
from tqdm import tqdm
from imageio import imread, imsave
import cv2
import paddle
import paddle.fluid as fluid
from paddle.utils.download import get_path_from_url
from ppgan.utils.video import video2frames, frames2video
from .base_predictor import BasePredictor
DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar'
class DAINPredictor(BasePredictor):
def __init__(self,
output_path='output',
weight_path=None,
time_step=None,
use_gpu=True,
key_frame_thread=0.,
remove_duplicates=False):
self.output_path = os.path.join(output_path, 'DAIN')
if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path)
self.weight_path = weight_path
self.time_step = time_step
self.key_frame_thread = key_frame_thread
self.remove_duplicates = remove_duplicates
self.build_inference_model()
def run(self, video_path):
frame_path_input = os.path.join(self.output_path, 'frames-input')
frame_path_interpolated = os.path.join(self.output_path,
'frames-interpolated')
frame_path_combined = os.path.join(self.output_path, 'frames-combined')
video_path_output = os.path.join(self.output_path, 'videos-output')
if not os.path.exists(self.output_path):
os.makedirs(self.output_path)
if not os.path.exists(frame_path_input):
os.makedirs(frame_path_input)
if not os.path.exists(frame_path_interpolated):
os.makedirs(frame_path_interpolated)
if not os.path.exists(frame_path_combined):
os.makedirs(frame_path_combined)
if not os.path.exists(video_path_output):
os.makedirs(video_path_output)
timestep = self.time_step
num_frames = int(1.0 / timestep) - 1
import networks
from util import *
from my_args import args
if __name__ == '__main__':
DO_MiddleBurryOther = True
video_path = args.video_path
output_path = args.output_path
frame_path_input = os.path.join(output_path, 'frames-input')
frame_path_interpolated = os.path.join(output_path, 'frames-interpolated')
frame_path_combined = os.path.join(output_path, 'frames-combined')
video_path_input = os.path.join(output_path, 'videos-input')
video_path_output = os.path.join(output_path, 'videos-output')
if not os.path.exists(output_path):
os.makedirs(output_path)
if not os.path.exists(frame_path_input):
os.makedirs(frame_path_input)
if not os.path.exists(frame_path_interpolated):
os.makedirs(frame_path_interpolated)
if not os.path.exists(frame_path_combined):
os.makedirs(frame_path_combined)
if not os.path.exists(video_path_input):
os.makedirs(video_path_input)
if not os.path.exists(video_path_output):
os.makedirs(video_path_output)
args.KEY_FRAME_THREAD = 0.
saved_model = args.saved_model
timestep = args.time_step
num_frames = int(1.0 / timestep) - 1
image = fluid.data(name='image',
shape=[2, 1, args.channels, -1, -1],
dtype='float32')
DAIN = networks.__dict__["DAIN_slowmotion"](channel=args.channels,
filter_size=args.filter_size,
timestep=args.time_step,
training=False)
out = DAIN(image)
out = out[0][1]
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
fetch_list = [out.name]
inference_program = fluid.default_main_program().clone(for_test=True)
inference_program = fluid.io.load_persistables(exe, saved_model,
inference_program)
if not DO_MiddleBurryOther:
sys.exit()
if video_path.endswith('.mp4'):
videos = [video_path]
else:
videos = sorted(glob.glob(os.path.join(video_path, '*.mp4')))
for cnt, vid in enumerate(videos):
print("Interpolating video:", vid)
cap = cv2.VideoCapture(vid)
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
print("Old fps (frame rate): ", fps)
timestep = args.time_step
times_interp = int(1.0 / timestep)
r2 = str(int(fps) * times_interp)
print("New fps (frame rate): ", r2)
# set start and end of video
#ss = 0
#t = 10
#ss = time.strftime('%H:%M:%S', time.gmtime(ss))
#t = time.strftime('%H:%M:%S', time.gmtime(t))
#print(r, ss, t)
r = None
ss = None
t = None
out_path = video2frames(video_path, frame_path_input)
out_path = dump_frames_ffmpeg(vid, frame_path_input, r, ss, t)
vidname = vid.split('/')[-1].split('.')[0]
tot_timer = AverageMeter()
proc_timer = AverageMeter()
end = time.time()
vidname = video_path.split('/')[-1].split('.')[0]
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
orig_frames = len(frames)
need_frames = orig_frames * times_interp
if self.remove_duplicates:
frames = self.remove_duplicate_frames(out_path)
left_frames = len(frames)
timestep = left_frames / need_frames
num_frames = int(1.0 / timestep) - 1
img = imread(frames[0])
......@@ -110,7 +99,7 @@ if __name__ == '__main__':
int_height = img.shape[0]
channel = img.shape[2]
if not channel == 3:
continue
return
if int_width != ((int_width >> 7) << 7):
int_width_pad = (((int_width >> 7) + 1) << 7) # more than necessary
......@@ -132,16 +121,13 @@ if __name__ == '__main__':
padding_bottom = 32
frame_num = len(frames)
print(os.path.join(frame_path_input, vidname, '*.png'))
print('processing {} frames, from video: {}'.format(frame_num, vid))
if not os.path.exists(os.path.join(frame_path_interpolated, vidname)):
os.makedirs(os.path.join(frame_path_interpolated, vidname))
if not os.path.exists(os.path.join(frame_path_combined, vidname)):
os.makedirs(os.path.join(frame_path_combined, vidname))
for i in range(frame_num - 1):
print(frames[i])
for i in tqdm(range(frame_num - 1)):
first = frames[i]
second = frames[i + 1]
......@@ -155,79 +141,116 @@ if __name__ == '__main__':
img_second_gray = img_second_gray.flatten(order='C')
corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1]
key_frame = False
if corr < args.KEY_FRAME_THREAD:
if corr < self.key_frame_thread:
key_frame = True
'''-------------------------------------------------------'''
X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255
if key_frame:
y_ = [
np.transpose(255.0 * X0.clip(0, 1.0), (1, 2, 0))
for i in range(num_frames)
]
else:
assert (X0.shape[1] == X1.shape[1])
assert (X0.shape[2] == X1.shape[2])
print("size before padding ", X0.shape)
X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
print("size after padding ", X0.shape)
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X = np.concatenate((X0, X1), axis=0)
proc_end = time.time()
o = exe.run(inference_program,
fetch_list=fetch_list,
feed={"image": X})
y_ = o[0]
proc_timer.update(time.time() - proc_end)
tot_timer.update(time.time() - end)
end = time.time()
print("*******current image process time \t " +
str(time.time() - proc_end) + "s ******")
y_ = [
np.transpose(
255.0 * item.clip(
0, 1.0)[0, :, padding_top:padding_top + int_height,
padding_left:padding_left + int_width],
(1, 2, 0)) for item in y_
]
time_offsets = [
kk * timestep for kk in range(1, 1 + num_frames, 1)
]
count = 1
for item, time_offset in zip(y_, time_offsets):
out_dir = os.path.join(
frame_path_interpolated, vidname,
"{:0>4d}_{:0>4d}.png".format(i, count))
count = count + 1
imsave(out_dir, np.round(item).astype(np.uint8))
timestep = args.time_step
assert (X0.shape[1] == X1.shape[1])
assert (X0.shape[2] == X1.shape[2])
X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X = np.concatenate((X0, X1), axis=0)
o = self.base_forward(X)
y_ = o[0]
y_ = [
np.transpose(
255.0 * item.clip(
0, 1.0)[0, :, padding_top:padding_top + int_height,
padding_left:padding_left + int_width],
(1, 2, 0)) for item in y_
]
time_offsets = [kk * timestep for kk in range(1, 1 + num_frames, 1)]
count = 1
for item, time_offset in zip(y_, time_offsets):
out_dir = os.path.join(frame_path_interpolated, vidname,
"{:0>6d}_{:0>4d}.png".format(i, count))
count = count + 1
imsave(out_dir, np.round(item).astype(np.uint8))
num_frames = int(1.0 / timestep) - 1
input_dir = os.path.join(frame_path_input, vidname)
interpolated_dir = os.path.join(frame_path_interpolated, vidname)
combined_dir = os.path.join(frame_path_combined, vidname)
combine_frames(input_dir, interpolated_dir, combined_dir, num_frames)
self.combine_frames(input_dir, interpolated_dir, combined_dir,
num_frames)
frame_pattern_combined = os.path.join(frame_path_combined, vidname,
'%08d.png')
video_pattern_output = os.path.join(video_path_output, vidname + '.mp4')
if os.path.exists(video_pattern_output):
os.remove(video_pattern_output)
frames_to_video_ffmpeg(frame_pattern_combined, video_pattern_output, r2)
frames2video(frame_pattern_combined, video_pattern_output, r2)
return frame_pattern_combined, video_pattern_output
def combine_frames(self, input, interpolated, combined, num_frames):
frames1 = sorted(glob.glob(os.path.join(input, '*.png')))
frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png')))
num1 = len(frames1)
num2 = len(frames2)
for i in range(num1):
src = frames1[i]
imgname = int(src.split('/')[-1].split('.')[-2])
assert i == imgname
dst = os.path.join(combined,
'{:08d}.png'.format(i * (num_frames + 1)))
shutil.copy2(src, dst)
if i < num1 - 1:
try:
for k in range(num_frames):
src = frames2[i * num_frames + k]
dst = os.path.join(
combined,
'{:08d}.png'.format(i * (num_frames + 1) + k + 1))
shutil.copy2(src, dst)
except Exception as e:
print(e)
def remove_duplicate_frames(self, paths):
def dhash(image, hash_size=8):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
resized = cv2.resize(gray, (hash_size + 1, hash_size))
diff = resized[:, 1:] > resized[:, :-1]
return sum([2**i for (i, v) in enumerate(diff.flatten()) if v])
hashes = {}
image_paths = sorted(glob.glob(os.path.join(paths, '*.png')))
for image_path in image_paths:
image = cv2.imread(image_path)
h = dhash(image)
p = hashes.get(h, [])
p.append(image_path)
hashes[h] = p
for (h, hashed_paths) in hashes.items():
if len(hashed_paths) > 1:
for p in hashed_paths[1:]:
os.remove(p)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
for fid, frame in enumerate(frames):
new_name = '{:08d}'.format(fid) + '.png'
new_name = os.path.join(paths, new_name)
os.rename(frame, new_name)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
return frames
import os
import sys
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
import paddle
import paddle.nn as nn
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2
from PIL import Image
import subprocess
import numpy as np
from tqdm import tqdm
import argparse
import subprocess
import utils
from PIL import Image
from skimage import color
import paddle
from ppgan.models.generators.remaster import NetworkR, NetworkC
from paddle.utils.download import get_path_from_url
from .base_predictor import BasePredictor
DEEPREMASTER_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/deep_remaster.pdparams'
parser = argparse.ArgumentParser(description='Remastering')
parser.add_argument('--input', type=str, default=None, help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--reference_dir',
type=str,
default=None,
help='Path to the reference image directory')
parser.add_argument('--colorization',
action='store_true',
default=False,
help='Remaster without colorization')
parser.add_argument('--mindim',
type=int,
default='360',
help='Length of minimum image edges')
class DeepReasterPredictor:
def convertLAB2RGB(lab):
lab[:, :, 0:1] = lab[:, :, 0:1] * 100 # [0, 1] -> [0, 100]
lab[:, :, 1:3] = np.clip(lab[:, :, 1:3] * 255 - 128, -100,
100) # [0, 1] -> [-128, 128]
rgb = color.lab2rgb(lab.astype(np.float64))
return rgb
def convertRGB2LABTensor(rgb):
lab = color.rgb2lab(
np.asarray(rgb)) # RGB -> LAB L[0, 100] a[-127, 128] b[-128, 127]
ab = np.clip(lab[:, :, 1:3] + 128, 0, 255) # AB --> [0, 255]
ab = paddle.to_tensor(ab.astype('float32')) / 255.
L = lab[:, :, 0] * 2.55 # L --> [0, 255]
L = Image.fromarray(np.uint8(L))
L = paddle.to_tensor(np.array(L).astype('float32')[..., np.newaxis] / 255.0)
return L, ab
def addMergin(img, target_w, target_h, background_color=(0, 0, 0)):
width, height = img.size
if width == target_w and height == target_h:
return img
scale = max(target_w, target_h) / max(width, height)
width = int(width * scale / 16.) * 16
height = int(height * scale / 16.) * 16
img = img.resize((width, height), Image.BICUBIC)
xp = (target_w - width) // 2
yp = (target_h - height) // 2
result = Image.new(img.mode, (target_w, target_h), background_color)
result.paste(img, (xp, yp))
return result
class DeepRemasterPredictor(BasePredictor):
def __init__(self,
input,
output,
output='output',
weight_path=None,
colorization=False,
reference_dir=None,
mindim=360):
self.input = input
self.output = os.path.join(output, 'DeepRemaster')
self.colorization = colorization
self.reference_dir = reference_dir
self.mindim = mindim
if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(DEEPREMASTER_WEIGHT_URL, cur_path)
state_dict, _ = paddle.load(weight_path)
self.weight_path = weight_path
state_dict = paddle.load(weight_path)
self.modelR = NetworkR()
self.modelR.load_dict(state_dict['modelR'])
......@@ -63,7 +92,7 @@ class DeepReasterPredictor:
self.modelC.load_dict(state_dict['modelC'])
self.modelC.eval()
def run(self):
def run(self, video_path):
outputdir = self.output
outputdir_in = os.path.join(outputdir, 'input/')
os.makedirs(outputdir_in, exist_ok=True)
......@@ -94,9 +123,7 @@ class DeepReasterPredictor:
refimgs = []
for i, v in enumerate(refs):
refimg = utils.addMergin(v,
target_w=target_w,
target_h=target_h)
refimg = addMergin(v, target_w=target_w, target_h=target_h)
refimg = np.array(refimg).astype('float32').transpose(
2, 0, 1) / 255.0
refimgs.append(refimg)
......@@ -105,7 +132,7 @@ class DeepReasterPredictor:
refimgs = paddle.unsqueeze(refimgs, 0)
# Load video
cap = cv2.VideoCapture(self.input)
cap = cv2.VideoCapture(video_path)
nframes = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
v_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
v_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
......@@ -156,7 +183,7 @@ class DeepReasterPredictor:
elif nchannels == 3:
cv2.imwrite(outputdir_in + '%07d.png' % index, frame)
frame = frame[:, :, ::-1] ## BGR -> RGB
frame_l, frame_ab = utils.convertRGB2LABTensor(frame)
frame_l, frame_ab = convertRGB2LABTensor(frame)
frame_l = frame_l.transpose([2, 0, 1])
frame_ab = frame_ab.transpose([2, 0, 1])
frame_l = frame_l.reshape([
......@@ -193,7 +220,7 @@ class DeepReasterPredictor:
(out_l, out_ab),
axis=0).detach().numpy().transpose((1, 2, 0))
out = Image.fromarray(
np.uint8(utils.convertLAB2RGB(out) * 255))
np.uint8(convertLAB2RGB(out) * 255))
out.save(outputdir_out + '%07d.png' % (index))
else:
raise ValueError('channels of imag3 must be 3!')
......@@ -214,7 +241,7 @@ class DeepReasterPredictor:
output = paddle.concat(
(out_l, out_c), axis=0).numpy().transpose((1, 2, 0))
output = Image.fromarray(
np.uint8(utils.convertLAB2RGB(output) * 255))
np.uint8(convertLAB2RGB(output) * 255))
output.save(outputdir_out + '%07d.png' % index)
it = it + 1
......@@ -222,7 +249,7 @@ class DeepReasterPredictor:
# Save result videos
outfile = os.path.join(outputdir,
self.input.split('/')[-1].split('.')[0])
video_path.split('/')[-1].split('.')[0])
cmd = 'ffmpeg -y -r %d -i %s%%07d.png -vcodec libx264 -pix_fmt yuv420p -r %d %s_in.mp4' % (
fps, outputdir_in, fps, outfile)
subprocess.call(cmd, shell=True)
......@@ -236,14 +263,3 @@ class DeepReasterPredictor:
cap.release()
pbar.close()
return outputdir_out, '%s_out.mp4' % outfile
if __name__ == "__main__":
args = parser.parse_args()
paddle.disable_static()
predictor = DeepReasterPredictor(args.input,
args.output,
colorization=args.colorization,
reference_dir=args.reference_dir,
mindim=args.mindim)
predictor.run()
import os
import sys
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2
import glob
import argparse
import numpy as np
import paddle
import pickle
from PIL import Image
from tqdm import tqdm
from paddle import fluid
import paddle
from paddle.utils.download import get_path_from_url
from ppgan.utils.video import frames2video, video2frames
from ppgan.models.generators.deoldify import build_model
parser = argparse.ArgumentParser(description='DeOldify')
parser.add_argument('--input', type=str, default='none', help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--render_factor',
type=int,
default=32,
help='model inputsize=render_factor*16')
parser.add_argument('--weight_path',
type=str,
default=None,
help='Path to the reference image directory')
from .base_predictor import BasePredictor
DEOLDIFY_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams'
class DeOldifyPredictor():
def __init__(self,
input,
output,
batch_size=1,
weight_path=None,
render_factor=32):
self.input = input
class DeOldifyPredictor(BasePredictor):
def __init__(self, output='output', weight_path=None, render_factor=32):
# self.input = input
self.output = os.path.join(output, 'DeOldify')
self.render_factor = render_factor
self.model = build_model()
if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(DEOLDIFY_WEIGHT_URL, cur_path)
state_dict, _ = paddle.load(weight_path)
state_dict = paddle.load(weight_path)
self.model.load_dict(state_dict)
self.model.eval()
......@@ -85,8 +77,14 @@ class DeOldifyPredictor():
final = Image.fromarray(final)
return final
def run_single(self, img_path):
ori_img = Image.open(img_path).convert('LA').convert('RGB')
def run_image(self, img):
if isinstance(img, str):
ori_img = Image.open(img).convert('LA').convert('RGB')
elif isinstance(img, np.ndarray):
ori_img = Image.fromarray(img).convert('LA').convert('RGB')
elif isinstance(img, Image.Image):
ori_img = img
img = self.norm(ori_img, self.render_factor)
x = paddle.to_tensor(img[np.newaxis, ...])
out = self.model(x)
......@@ -97,9 +95,8 @@ class DeOldifyPredictor():
pred_img = self.post_process(pred_img, ori_img)
return pred_img
def run(self):
vid = self.input
base_name = os.path.basename(vid).split('.')[0]
def run_video(self, video):
base_name = os.path.basename(video).split('.')[0]
output_path = os.path.join(self.output, base_name)
pred_frame_path = os.path.join(output_path, 'frames_pred')
......@@ -109,15 +106,15 @@ class DeOldifyPredictor():
if not os.path.exists(pred_frame_path):
os.makedirs(pred_frame_path)
cap = cv2.VideoCapture(vid)
cap = cv2.VideoCapture(video)
fps = cap.get(cv2.CAP_PROP_FPS)
out_path = video2frames(vid, output_path)
out_path = video2frames(video, output_path)
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
for frame in tqdm(frames):
pred_img = self.run_single(frame)
pred_img = self.run_image(frame)
frame_name = os.path.basename(frame)
pred_img.save(os.path.join(pred_frame_path, frame_name))
......@@ -130,15 +127,14 @@ class DeOldifyPredictor():
return frame_pattern_combined, vid_out_path
def run(self, input):
if self.is_video(input):
return self.run_video(input)
else:
pred_img = self.run_image(input)
if __name__ == '__main__':
paddle.disable_static()
args = parser.parse_args()
predictor = DeOldifyPredictor(args.input,
args.output,
weight_path=args.weight_path,
render_factor=args.render_factor)
frames_path, temp_video_path = predictor.run()
if self.output:
base_name = os.path.basename(input)
pred_img.save(os.path.join(self.output, base_name + '.png'))
print('output video path:', temp_video_path)
return pred_img
......@@ -13,44 +13,18 @@
#limitations under the License.
import os
import sys
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
import cv2
import time
import argparse
import ast
import glob
import numpy as np
import paddle.fluid as fluid
import cv2
from tqdm import tqdm
from data import EDVRDataset
from paddle.utils.download import get_path_from_url
from ppgan.utils.video import frames2video, video2frames
EDVR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar'
from .base_predictor import BasePredictor
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--input',
type=str,
default=None,
help='input video path')
parser.add_argument('--output',
type=str,
default='output',
help='output path')
parser.add_argument('--weight_path',
type=str,
default=None,
help='weight path')
args = parser.parse_args()
return args
EDVR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar'
def get_img(pred):
......@@ -72,29 +46,107 @@ def save_img(img, framename):
cv2.imwrite(framename, img)
class EDVRPredictor:
def __init__(self, input, output, weight_path=None):
def read_img(path, size=None, is_gt=False):
"""read image by cv2
return: Numpy float32, HWC, BGR, [0,1]"""
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
img = img.astype(np.float32) / 255.
if img.ndim == 2:
img = np.expand_dims(img, axis=2)
if img.shape[2] > 3:
img = img[:, :, :3]
return img
def get_test_neighbor_frames(crt_i, N, max_n, padding='new_info'):
"""Generate an index list for reading N frames from a sequence of images
Args:
crt_i (int): current center index
max_n (int): max number of the sequence of images (calculated from 1)
N (int): reading N frames
padding (str): padding mode, one of replicate | reflection | new_info | circle
Example: crt_i = 0, N = 5
replicate: [0, 0, 0, 1, 2]
reflection: [2, 1, 0, 1, 2]
new_info: [4, 3, 0, 1, 2]
circle: [3, 4, 0, 1, 2]
Returns:
return_l (list [int]): a list of indexes
"""
max_n = max_n - 1
n_pad = N // 2
return_l = []
for i in range(crt_i - n_pad, crt_i + n_pad + 1):
if i < 0:
if padding == 'replicate':
add_idx = 0
elif padding == 'reflection':
add_idx = -i
elif padding == 'new_info':
add_idx = (crt_i + n_pad) + (-i)
elif padding == 'circle':
add_idx = N + i
else:
raise ValueError('Wrong padding mode')
elif i > max_n:
if padding == 'replicate':
add_idx = max_n
elif padding == 'reflection':
add_idx = max_n * 2 - i
elif padding == 'new_info':
add_idx = (crt_i - n_pad) - (i - max_n)
elif padding == 'circle':
add_idx = i - N
else:
raise ValueError('Wrong padding mode')
else:
add_idx = i
return_l.append(add_idx)
return return_l
class EDVRDataset:
def __init__(self, frame_paths):
self.frames = frame_paths
def __getitem__(self, index):
indexs = get_test_neighbor_frames(index, 5, len(self.frames))
frame_list = []
for i in indexs:
img = read_img(self.frames[i])
frame_list.append(img)
img_LQs = np.stack(frame_list, axis=0)
# BGR to RGB, HWC to CHW, numpy to tensor
img_LQs = img_LQs[:, :, :, [2, 1, 0]]
img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32')
return img_LQs, self.frames[index]
def __len__(self):
return len(self.frames)
class EDVRPredictor(BasePredictor):
def __init__(self, output='output', weight_path=None):
self.input = input
self.output = os.path.join(output, 'EDVR')
place = fluid.CUDAPlace(
0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
self.exe = fluid.Executor(place)
if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(EDVR_WEIGHT_URL, cur_path)
model_filename = 'EDVR_model.pdmodel'
params_filename = 'EDVR_params.pdparams'
self.weight_path = weight_path
out = fluid.io.load_inference_model(dirname=weight_path,
model_filename=model_filename,
params_filename=params_filename,
executor=self.exe)
self.infer_prog, self.feed_list, self.fetch_list = out
self.build_inference_model()
def run(self):
vid = self.input
def run(self, video_path):
vid = video_path
base_name = os.path.basename(vid).split('.')[0]
output_path = os.path.join(self.output, base_name)
pred_frame_path = os.path.join(output_path, 'frames_pred')
......@@ -119,11 +171,9 @@ class EDVRPredictor:
for infer_iter, data in enumerate(tqdm(dataset)):
data_feed_in = [data[0]]
infer_outs = self.exe.run(
self.infer_prog,
fetch_list=self.fetch_list,
feed={self.feed_list[0]: np.array(data_feed_in)})
infer_result_list = [item for item in infer_outs]
outs = self.base_forward(np.array(data_feed_in))
infer_result_list = [item for item in outs]
frame_path = data[1]
......@@ -144,9 +194,3 @@ class EDVRPredictor:
frames2video(frame_pattern_combined, vid_out_path, str(int(fps)))
return frame_pattern_combined, vid_out_path
if __name__ == "__main__":
args = parse_args()
predictor = EDVRPredictor(args.input, args.output, args.weight_path)
predictor.run()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import sys
import yaml
import pickle
import imageio
import numpy as np
from tqdm import tqdm
from skimage import img_as_ubyte
from skimage.transform import resize
from scipy.spatial import ConvexHull
import paddle
from paddle.utils.download import get_path_from_url
from ppgan.utils.animate import normalize_kp
from ppgan.modules.keypoint_detector import KPDetector
from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator
from .base_predictor import BasePredictor
class FirstOrderPredictor(BasePredictor):
def __init__(self,
output='output',
weight_path=None,
config=None,
relative=False,
adapt_scale=False,
find_best_frame=False,
best_frame=None):
if config is not None and isinstance(config, str):
self.cfg = yaml.load(config)
elif isinstance(config, dict):
self.cfg = config
elif config is None:
self.cfg = {
'model_params': {
'common_params': {
'num_kp': 10,
'num_channels': 3,
'estimate_jacobian': True
},
'kp_detector_params': {
'temperature': 0.1,
'block_expansion': 32,
'max_features': 1024,
'scale_factor': 0.25,
'num_blocks': 5
},
'generator_params': {
'block_expansion': 64,
'max_features': 512,
'num_down_blocks': 2,
'num_bottleneck_blocks': 6,
'estimate_occlusion_map': True,
'dense_motion_params': {
'block_expansion': 64,
'max_features': 1024,
'num_blocks': 5,
'scale_factor': 0.25
}
}
}
}
if weight_path is None:
vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-cpk.pdparams'
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(vox_cpk_weight_url, cur_path)
self.weight_path = weight_path
self.output = output
self.relative = relative
self.adapt_scale = adapt_scale
self.find_best_frame = find_best_frame
self.best_frame = best_frame
self.generator, self.kp_detector = self.load_checkpoints(
self.cfg, self.weight_path)
def run(self, source_image, driving_video):
source_image = imageio.imread(source_image)
reader = imageio.get_reader(driving_video)
fps = reader.get_meta_data()['fps']
driving_video = []
try:
for im in reader:
driving_video.append(im)
except RuntimeError:
pass
reader.close()
source_image = resize(source_image, (256, 256))[..., :3]
driving_video = [
resize(frame, (256, 256))[..., :3] for frame in driving_video
]
if self.find_best_frame or self.best_frame is not None:
i = self.best_frame if self.best_frame is not None else self.find_best_frame_func(
source_image, driving_video)
print("Best frame: " + str(i))
driving_forward = driving_video[i:]
driving_backward = driving_video[:(i + 1)][::-1]
predictions_forward = self.make_animation(
source_image,
driving_forward,
self.generator,
self.kp_detector,
relative=self.relative,
adapt_movement_scale=self.adapt_scale)
predictions_backward = self.make_animation(
source_image,
driving_backward,
self.generator,
self.kp_detector,
relative=self.relative,
adapt_movement_scale=self.adapt_scale)
predictions = predictions_backward[::-1] + predictions_forward[1:]
else:
predictions = self.make_animation(
source_image,
driving_video,
self.generator,
self.kp_detector,
relative=self.relative,
adapt_movement_scale=self.adapt_scale)
imageio.mimsave(os.path.join(self.output, 'result.mp4'),
[img_as_ubyte(frame) for frame in predictions],
fps=fps)
def load_checkpoints(self, config, checkpoint_path):
generator = OcclusionAwareGenerator(
**config['model_params']['generator_params'],
**config['model_params']['common_params'])
kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
**config['model_params']['common_params'])
checkpoint = paddle.load(self.weight_path)
generator.set_state_dict(checkpoint['generator'])
kp_detector.set_state_dict(checkpoint['kp_detector'])
generator.eval()
kp_detector.eval()
return generator, kp_detector
def make_animation(self,
source_image,
driving_video,
generator,
kp_detector,
relative=True,
adapt_movement_scale=True):
with paddle.no_grad():
predictions = []
source = paddle.to_tensor(source_image[np.newaxis].astype(
np.float32)).transpose([0, 3, 1, 2])
driving = paddle.to_tensor(
np.array(driving_video)[np.newaxis].astype(
np.float32)).transpose([0, 4, 1, 2, 3])
kp_source = kp_detector(source)
kp_driving_initial = kp_detector(driving[:, :, 0])
for frame_idx in tqdm(range(driving.shape[2])):
driving_frame = driving[:, :, frame_idx]
kp_driving = kp_detector(driving_frame)
kp_norm = normalize_kp(
kp_source=kp_source,
kp_driving=kp_driving,
kp_driving_initial=kp_driving_initial,
use_relative_movement=relative,
use_relative_jacobian=relative,
adapt_movement_scale=adapt_movement_scale)
out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
predictions.append(
np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0])
return predictions
def find_best_frame_func(self, source, driving):
import face_alignment
def normalize_kp(kp):
kp = kp - kp.mean(axis=0, keepdims=True)
area = ConvexHull(kp[:, :2]).volume
area = np.sqrt(area)
kp[:, :2] = kp[:, :2] / area
return kp
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
flip_input=True)
kp_source = fa.get_landmarks(255 * source)[0]
kp_source = normalize_kp(kp_source)
norm = float('inf')
frame_num = 0
for i, image in tqdm(enumerate(driving)):
kp_driving = fa.get_landmarks(255 * image)[0]
kp_driving = normalize_kp(kp_driving)
new_norm = (np.abs(kp_source - kp_driving)**2).sum()
if new_norm < norm:
norm = new_norm
frame_num = i
return frame_num
import os
import sys
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2
import glob
import argparse
import numpy as np
import paddle
import pickle
from PIL import Image
from tqdm import tqdm
import paddle
from ppgan.models.generators import RRDBNet
from ppgan.utils.video import frames2video, video2frames
from paddle.utils.download import get_path_from_url
parser = argparse.ArgumentParser(description='RealSR')
parser.add_argument('--input', type=str, default='none', help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--weight_path',
type=str,
default=None,
help='Path to the reference image directory')
from .base_predictor import BasePredictor
REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DF2K_JPEG.pdparams'
class RealSRPredictor():
def __init__(self, input, output, batch_size=1, weight_path=None):
class RealSRPredictor(BasePredictor):
def __init__(self, output='output', weight_path=None):
self.input = input
self.output = os.path.join(output, 'RealSR')
self.model = RRDBNet(3, 3, 64, 23)
if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(REALSR_WEIGHT_URL, cur_path)
state_dict, _ = paddle.load(weight_path)
state_dict = paddle.load(weight_path)
self.model.load_dict(state_dict)
self.model.eval()
......@@ -49,8 +49,14 @@ class RealSRPredictor():
img = img.transpose((1, 2, 0))
return (img * 255).clip(0, 255).astype('uint8')
def run_single(self, img_path):
ori_img = Image.open(img_path).convert('RGB')
def run_image(self, img):
if isinstance(img, str):
ori_img = Image.open(img).convert('RGB')
elif isinstance(img, np.ndarray):
ori_img = Image.fromarray(img).convert('RGB')
elif isinstance(img, Image.Image):
ori_img = img
img = self.norm(ori_img)
x = paddle.to_tensor(img[np.newaxis, ...])
out = self.model(x)
......@@ -59,9 +65,8 @@ class RealSRPredictor():
pred_img = Image.fromarray(pred_img)
return pred_img
def run(self):
vid = self.input
base_name = os.path.basename(vid).split('.')[0]
def run_video(self, video):
base_name = os.path.basename(video).split('.')[0]
output_path = os.path.join(self.output, base_name)
pred_frame_path = os.path.join(output_path, 'frames_pred')
......@@ -71,15 +76,15 @@ class RealSRPredictor():
if not os.path.exists(pred_frame_path):
os.makedirs(pred_frame_path)
cap = cv2.VideoCapture(vid)
cap = cv2.VideoCapture(video)
fps = cap.get(cv2.CAP_PROP_FPS)
out_path = video2frames(vid, output_path)
out_path = video2frames(video, output_path)
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
for frame in tqdm(frames):
pred_img = self.run_single(frame)
pred_img = self.run_image(frame)
frame_name = os.path.basename(frame)
pred_img.save(os.path.join(pred_frame_path, frame_name))
......@@ -92,14 +97,14 @@ class RealSRPredictor():
return frame_pattern_combined, vid_out_path
def run(self, input):
if self.is_video(input):
return self.run_video(input)
else:
pred_img = self.run_image(input)
if __name__ == '__main__':
paddle.disable_static()
args = parser.parse_args()
predictor = RealSRPredictor(args.input,
args.output,
weight_path=args.weight_path)
frames_path, temp_video_path = predictor.run()
if self.output:
base_name = os.path.basename(input)
pred_img.save(os.path.join(self.output, base_name + '.png'))
print('output video path:', temp_video_path)
return pred_img
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .unpaired_dataset import UnpairedDataset
from .single_dataset import SingleDataset
from .paired_dataset import PairedDataset
from .sr_image_dataset import SRImageDataset
\ No newline at end of file
from .sr_image_dataset import SRImageDataset
from .makeup_dataset import MakeupDataset
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import os.path
from .base_dataset import BaseDataset, get_transform
from .transforms.makeup_transforms import get_makeup_transform
import paddle.vision.transforms as T
from PIL import Image
import random
import numpy as np
from ..utils.preprocess import *
from .builder import DATASETS
@DATASETS.register()
class MakeupDataset(BaseDataset):
def __init__(self, cfg):
"""Initialize this dataset class.
Parameters:
opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
"""
BaseDataset.__init__(self, cfg)
self.image_path = cfg.dataroot
self.mode = cfg.phase
self.transform = get_makeup_transform(cfg)
self.norm = T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])
self.transform_mask = get_makeup_transform(cfg, pic="mask")
self.trans_size = cfg.trans_size
self.cls_list = cfg.cls_list
self.cls_A = self.cls_list[0]
self.cls_B = self.cls_list[1]
for cls in self.cls_list:
setattr(
self, cls + "_list_path",
os.path.join(self.image_path, self.mode + '_' + cls + ".txt"))
setattr(self, cls + "_lines",
open(getattr(self, cls + "_list_path"), 'r').readlines())
setattr(self, "num_of_" + cls + "_data",
len(getattr(self, cls + "_lines")))
print('Start preprocessing dataset..!')
self.preprocess()
print('Finished preprocessing dataset..!')
def preprocess(self):
"""preprocess image"""
for cls in self.cls_list:
setattr(self, cls + "_filenames", [])
setattr(self, cls + "_mask_filenames", [])
setattr(self, cls + "_lmks_filenames", [])
lines = getattr(self, cls + "_lines")
random.shuffle(lines)
for i, line in enumerate(lines):
splits = line.split()
getattr(self, cls + "_filenames").append(splits[0])
getattr(self, cls + "_mask_filenames").append(splits[1])
getattr(self, cls + "_lmks_filenames").append(splits[2])
def __getitem__(self, index):
"""Return MANet and MDNet needed params.
Parameters:
index (int) -- a random integer for data indexing
Returns a dictionary that contains needed params.
"""
try:
index_A = random.randint(
0, getattr(self, "num_of_" + self.cls_A + "_data"))
index_B = random.randint(
0, getattr(self, "num_of_" + self.cls_B + "_data"))
if self.mode == 'test':
num_b = getattr(self, 'num_of_' + self.cls_list[1] + '_data')
index_A = int(index / num_b)
index_B = int(index % num_b)
image_A = Image.open(
os.path.join(self.image_path,
getattr(self, self.cls_A +
"_filenames")[index_A])).convert("RGB")
image_B = Image.open(
os.path.join(self.image_path,
getattr(self, self.cls_B +
"_filenames")[index_B])).convert("RGB")
mask_A = np.array(
Image.open(
os.path.join(
self.image_path,
getattr(self,
self.cls_A + "_mask_filenames")[index_A])))
mask_B = np.array(
Image.open(
os.path.join(
self.image_path,
getattr(self, self.cls_B +
"_mask_filenames")[index_B])).convert('L'))
image_A = np.array(image_A)
image_B = np.array(image_B)
image_A = self.transform(image_A)
image_B = self.transform(image_B)
mask_A = cv2.resize(mask_A, (256, 256),
interpolation=cv2.INTER_NEAREST)
mask_B = cv2.resize(mask_B, (256, 256),
interpolation=cv2.INTER_NEAREST)
lmks_A = np.loadtxt(
os.path.join(
self.image_path,
getattr(self, self.cls_A + "_lmks_filenames")[index_A]))
lmks_B = np.loadtxt(
os.path.join(
self.image_path,
getattr(self, self.cls_B + "_lmks_filenames")[index_B]))
lmks_A = lmks_A / image_A.shape[:2] * self.trans_size
lmks_B = lmks_B / image_B.shape[:2] * self.trans_size
P_A = generate_P_from_lmks(lmks_A, self.trans_size,
image_A.shape[0], image_A.shape[1])
P_B = generate_P_from_lmks(lmks_B, self.trans_size,
image_B.shape[0], image_B.shape[1])
mask_A_aug = generate_mask_aug(mask_A, lmks_A)
mask_B_aug = generate_mask_aug(mask_B, lmks_B)
consis_mask = calculate_consis_mask(mask_A_aug, mask_B_aug)
consis_mask_idt_A = calculate_consis_mask(mask_A_aug, mask_A_aug)
consis_mask_idt_B = calculate_consis_mask(mask_A_aug, mask_B_aug)
except Exception as e:
print(e)
return self.__getitem__(index + 1)
return {
'image_A': self.norm(image_A),
'image_B': self.norm(image_B),
'mask_A': np.float32(mask_A),
'mask_B': np.float32(mask_B),
'consis_mask': np.float32(consis_mask),
'P_A': np.float32(P_A),
'P_B': np.float32(P_B),
'consis_mask_idt_A': np.float32(consis_mask_idt_A),
'consis_mask_idt_B': np.float32(consis_mask_idt_B),
'mask_A_aug': np.float32(mask_A_aug),
'mask_B_aug': np.float32(mask_B_aug)
}
def __len__(self):
"""Return the total number of images in the dataset.
As we have two datasets with potentially different number of images,
we take a maximum of
"""
if self.mode == 'train':
num_A = getattr(self, 'num_of_' + self.cls_list[0] + '_data')
num_B = getattr(self, 'num_of_' + self.cls_list[1] + '_data')
return max(num_A, num_B)
elif self.mode == "test":
num_A = getattr(self, 'num_of_' + self.cls_list[0] + '_data')
num_B = getattr(self, 'num_of_' + self.cls_list[1] + '_data')
return num_A * num_B
return max(self.A_size, self.B_size)
......@@ -5,13 +5,13 @@ from .base_dataset import BaseDataset, get_params, get_transform
from .image_folder import make_dataset
from .builder import DATASETS
from .transforms.builder import build_transforms
@DATASETS.register()
class PairedDataset(BaseDataset):
"""A dataset class for paired image dataset.
"""
def __init__(self, cfg):
"""Initialize this dataset class.
......@@ -19,11 +19,14 @@ class PairedDataset(BaseDataset):
cfg (dict) -- stores all the experiment flags
"""
BaseDataset.__init__(self, cfg)
self.dir_AB = os.path.join(cfg.dataroot, cfg.phase) # get the image directory
self.AB_paths = sorted(make_dataset(self.dir_AB, cfg.max_dataset_size)) # get image paths
assert(self.cfg.transform.load_size >= self.cfg.transform.crop_size) # crop_size should be smaller than the size of loaded image
self.dir_AB = os.path.join(cfg.dataroot,
cfg.phase) # get the image directory
self.AB_paths = sorted(make_dataset(
self.dir_AB, cfg.max_dataset_size)) # get image paths
self.input_nc = self.cfg.output_nc if self.cfg.direction == 'BtoA' else self.cfg.input_nc
self.output_nc = self.cfg.input_nc if self.cfg.direction == 'BtoA' else self.cfg.output_nc
self.transforms = build_transforms(cfg.transforms)
def __getitem__(self, index):
"""Return a data point and its metadata information.
......@@ -49,27 +52,11 @@ class PairedDataset(BaseDataset):
A = AB[:h, :w2, :]
B = AB[:h, w2:, :]
# apply the same transform to both A and B
# transform_params = get_params(self.opt, A.size)
transform_params = get_params(self.cfg.transform, (w2, h))
A_transform = get_transform(self.cfg.transform, transform_params, grayscale=(self.input_nc == 1))
B_transform = get_transform(self.cfg.transform, transform_params, grayscale=(self.output_nc == 1))
A = A_transform(A)
B = B_transform(B)
A, B = self.transforms((A, B))
return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path}
def __len__(self):
"""Return the total number of images in the dataset."""
return len(self.AB_paths)
def get_path_by_indexs(self, indexs):
if isinstance(indexs, paddle.Variable):
indexs = indexs.numpy()
current_paths = []
for index in indexs:
current_paths.append(self.AB_paths[index])
return current_paths
from .transforms import RandomCrop, Resize, RandomHorizontalFlip, PairedRandomCrop, PairedRandomHorizontalFlip, Normalize, Permute
此差异已折叠。
此差异已折叠。
此差异已折叠。
from .dlib_utils import detect, crop, landmarks, crop_from_array
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -80,7 +80,7 @@ def calculate_gain(nonlinearity, param=None):
@paddle.no_grad()
def constant_(x, value):
temp_value = paddle.fill_constant(x.shape, x.dtype, value)
temp_value = paddle.full(x.shape, value, x.dtype)
x.set_value(temp_value)
return x
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册