提交 e4932b70 编写于 作者: L LielinJiang

Merge branch 'master' of https://github.com/PaddlePaddle/PaddleGAN into readme

import os
import datetime
import argparse
import numpy
import networks
modelnames = networks.__all__
# import datasets
datasetNames = ('Vimeo_90K_interp') #datasets.__all__
parser = argparse.ArgumentParser(description='DAIN')
parser.add_argument('--debug', action='store_true', help='Enable debug mode')
parser.add_argument('--netName',
type=str,
default='DAIN',
choices=modelnames,
help='model architecture: ' + ' | '.join(modelnames) +
' (default: DAIN)')
parser.add_argument('--datasetName',
default='Vimeo_90K_interp',
choices=datasetNames,
nargs='+',
help='dataset type : ' + ' | '.join(datasetNames) +
' (default: Vimeo_90K_interp)')
parser.add_argument('--video_path',
default='',
help='the path of selected videos')
parser.add_argument('--output_path', default='', help='the output root path')
parser.add_argument('--seed',
type=int,
default=1,
help='random seed (default: 1)')
parser.add_argument('--batch_size',
'-b',
type=int,
default=1,
help='batch size (default:1)')
parser.add_argument('--channels',
'-c',
type=int,
default=3,
choices=[1, 3],
help='channels of images (default:3)')
parser.add_argument('--filter_size',
'-f',
type=int,
default=4,
help='the size of filters used (default: 4)',
choices=[2, 4, 6, 5, 51])
parser.add_argument('--time_step',
type=float,
default=0.5,
help='choose the time steps')
parser.add_argument(
'--alpha',
type=float,
nargs='+',
default=[0.0, 1.0],
help=
'the ration of loss for interpolated and rectified result (default: [0.0, 1.0])'
)
parser.add_argument('--frame_rate',
type=int,
default=None,
help='frame rate of the input video')
parser.add_argument('--patience',
type=int,
default=5,
help='the patience of reduce on plateou')
parser.add_argument('--factor',
type=float,
default=0.2,
help='the factor of reduce on plateou')
parser.add_argument('--saved_model',
type=str,
default='',
help='path to the model weights')
parser.add_argument('--no-date',
action='store_true',
help='don\'t append date timestamp to folder')
parser.add_argument('--use_cuda',
default=True,
type=bool,
help='use cuda or not')
parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not')
parser.add_argument('--remove_duplicates',
default=True,
type=bool,
help='remove duplicate frames or not')
from .dain import DAIN
from .dain_slowmotion import DAIN_slowmotion
__all__ = ('DAIN', 'DAIN_slowmotion')
import paddle.fluid as fluid
import resblock
import pwcnet
class DAIN(fluid.dygraph.Layer):
def __init__(self, channel=3, filter_size=4, timestep=0.5, training=True):
# base class initialization
super(DAIN, self).__init__()
self.filter_size = filter_size
self.training = training
self.timestep = timestep
assert (timestep == 0.5)
self.numFrames = int(1.0 / timestep) - 1
ctx_ch = 3 * 64 + 3
inplanes = 3 + 3 + 3 + 2 * 1 + 2 * 2 + 16 * 2 + 2 * ctx_ch
self.rectifyNet = resblock.__dict__['MultipleBasicBlock_4'](inplanes,
64)
self.flownets = pwcnet.__dict__['pwc_dc_net']()
self.div_flow = 20.0
def forward(self, input):
"""
Parameters
----------
input: shape (3, batch, 3, width, height)
-----------
"""
losses = []
offsets = []
'''
STEP 1: sequeeze the input
'''
if self.training == True:
assert input.shape[0] == 3
input_0 = input[0]
input_1 = input[1]
input_2 = input[2]
else:
# print(input.shape[0])
assert input.shape[0] == 2
input_0 = input[0]
input_2 = input[1]
#prepare the input data of current scale
cur_input_0 = input_0
if self.training == True:
cur_input_1 = input_1
cur_input_2 = input_2
'''
STEP 3.2: concatenating the inputs.
'''
cur_offset_input = fluid.layers.concat([cur_input_0, cur_input_2],
axis=1)
'''
STEP 3.3: perform the estimation
'''
time_offsets = [
kk * self.timestep for kk in range(1, 1 + self.numFrames, 1)
]
cur_offset_outputs = [
self.forward_flownets(self.flownets,
cur_offset_input,
time_offsets=time_offsets),
self.forward_flownets(self.flownets,
fluid.layers.concat(
[cur_input_2, cur_input_0], axis=1),
time_offsets=time_offsets[::-1])
]
cur_offset_output = [cur_offset_outputs[0][0], cur_offset_outputs[1][0]]
# Warp image use warp-op in PWC-Net
ref0 = self.flownets.warp_nomask(cur_input_0, cur_offset_output[0])
ref2 = self.flownets.warp_nomask(cur_input_2, cur_offset_output[1])
cur_output = (ref0 + ref2) / 2.0
rectify_input = fluid.layers.concat([
cur_output, ref0, ref2, cur_offset_output[0], cur_offset_output[1]
],
axis=1)
cur_output_rectified = self.rectifyNet(rectify_input) + cur_output
'''
STEP 3.5: for training phase, we collect the variables to be penalized.
'''
if self.training == True:
losses += [cur_output - cur_input_1]
losses += [cur_output_rectified - cur_input_1]
offsets += [cur_offset_output]
'''
STEP 4: return the results
'''
if self.training == True:
# if in the training phase, we output the losses to be minimized.
# return losses, loss_occlusion
return losses, offsets
else:
cur_outputs = [cur_output, cur_output_rectified]
return cur_outputs, cur_offset_output
def forward_flownets(self, model, input, time_offsets=None):
if time_offsets == None:
time_offsets = [0.5]
elif type(time_offsets) == float:
time_offsets = [time_offsets]
elif type(time_offsets) == list:
pass
# this is a single direction motion results, but not a bidirectional one
temp = model(input)
# single direction to bidirection should haven it.
temps = [
self.div_flow * temp * time_offset for time_offset in time_offsets
]
# nearest interpolation won't be better i think
temps = [fluid.layers.resize_bilinear(temp, scale=4) for temp in temps]
return temps
import paddle.fluid as fluid
import resblock
import time
import pwcnet
class DAIN_slowmotion(fluid.dygraph.Layer):
def __init__(self, channel=3, filter_size=4, timestep=0.5, training=True):
# base class initialization
super(DAIN_slowmotion, self).__init__()
self.filter_size = filter_size
self.training = training
self.timestep = timestep
self.num_frames = int(1.0 / timestep) - 1
ctx_ch = 3 * 64 + 3
# inplanes = 3 + 3 + 3 + 2*1 + 2*2 + 2
inplanes = 13
self.flownets = pwcnet.__dict__['pwc_dc_net']()
self.rectifyNet = resblock.__dict__['MultipleBasicBlock_4'](inplanes,
64)
self.div_flow = 20.0
def forward(self, input):
"""
Parameters
----------
input: shape (3, batch, 3, width, height)
-----------
"""
losses = []
offsets = []
'''
STEP 1: sequeeze the input
'''
if self.training == True:
assert input.shape[0] == 3
input_0 = input[0]
input_1 = input[1]
input_2 = input[2]
else:
assert input.shape[0] == 2
input_0 = input[0]
input_2 = input[1]
#prepare the input data of current scale
cur_input_0 = input_0
if self.training == True:
cur_input_1 = input_1
cur_input_2 = input_2
'''
STEP 3.2: concatenating the inputs.
'''
cur_offset_input = fluid.layers.concat([cur_input_0, cur_input_2],
axis=1)
'''
STEP 3.3: perform the estimation
'''
time_offsets = [
kk * self.timestep for kk in range(1, 1 + self.num_frames, 1)
]
cur_offset_outputs = [
self.forward_flownets(self.flownets,
cur_offset_input,
time_offsets=time_offsets),
self.forward_flownets(self.flownets,
fluid.layers.concat(
[cur_input_2, cur_input_0], axis=1),
time_offsets=time_offsets[::-1])
]
'''
STEP 3.4: perform the frame interpolation process
'''
count = 0
for temp_0, temp_1, timeoffset in zip(cur_offset_outputs[0],
cur_offset_outputs[1],
time_offsets):
cur_offset_output = [temp_0, temp_1]
ref0 = self.flownets.warp_nomask(cur_input_0, cur_offset_output[0])
ref2 = self.flownets.warp_nomask(cur_input_2, cur_offset_output[1])
cur_output_temp = (ref0 + ref2) / 2.0
if count == 0:
cur_output = fluid.layers.unsqueeze(cur_output_temp, axes=0)
else:
cur_output_ = fluid.layers.unsqueeze(cur_output_temp, axes=0)
cur_output = fluid.layers.concat([cur_output, cur_output_],
axis=0)
rectify_input = fluid.layers.concat([
cur_output_temp, ref0, ref2, cur_offset_output[0],
cur_offset_output[1]
],
axis=1)
cur_output_rectified_temp = self.rectifyNet(
rectify_input) + cur_output_temp
if count == 0:
cur_output_rectified = fluid.layers.unsqueeze(
cur_output_rectified_temp, axes=0)
else:
cur_output_rectified_ = fluid.layers.unsqueeze(
cur_output_rectified_temp, axes=0)
cur_output_rectified = fluid.layers.concat(
[cur_output_rectified, cur_output_rectified_], axis=0)
count += 1
'''
STEP 3.5: for training phase, we collect the variables to be penalized.
'''
if self.training == True:
losses += [cur_output - cur_input_1]
losses += [cur_output_rectified - cur_input_1]
offsets += [cur_offset_output]
'''
STEP 4: return the results
'''
if self.training == True:
# if in the training phase, we output the losses to be minimized.
# return losses, loss_occlusion
return losses, offsets
else:
cur_outputs = [cur_output, cur_output_rectified]
return cur_outputs, cur_offset_output
def forward_flownets(self, model, input, time_offsets=None):
if time_offsets == None:
time_offsets = [0.5]
elif type(time_offsets) == float:
time_offsets = [time_offsets]
elif type(time_offsets) == list:
pass
# this is a single direction motion results, but not a bidirectional one
temp = model(input)
# single direction to bidirection should haven it.
temps = [
self.div_flow * temp * time_offset for time_offset in time_offsets
]
# nearest interpolation won't be better i think
temps = [fluid.layers.resize_bilinear(temp, scale=4) for temp in temps]
return temps
import os
import sys
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
import time
import glob
import numpy as np
from imageio import imread, imsave
from tqdm import tqdm
import cv2
import paddle.fluid as fluid
from paddle.utils.download import get_path_from_url
from ppgan.utils.video import video2frames, frames2video
from util import *
from my_args import parser
DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar'
def infer_engine(model_dir,
run_mode='fluid',
batch_size=1,
use_gpu=False,
min_subgraph_size=3):
if not use_gpu and not run_mode == 'fluid':
raise ValueError(
"Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
.format(run_mode, use_gpu))
precision_map = {
'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
}
config = fluid.core.AnalysisConfig(os.path.join(model_dir, 'model'),
os.path.join(model_dir, 'params'))
if use_gpu:
# initial GPU memory(M), device ID
config.enable_use_gpu(100, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
else:
config.disable_gpu()
if run_mode in precision_map.keys():
config.enable_tensorrt_engine(workspace_size=1 << 10,
max_batch_size=batch_size,
min_subgraph_size=min_subgraph_size,
precision_mode=precision_map[run_mode],
use_static=False,
use_calib_mode=False)
# disable print log when predict
config.disable_glog_info()
# enable shared memory
config.enable_memory_optim()
# disable feed, fetch OP, needed by zero_copy_run
config.switch_use_feed_fetch_ops(False)
predictor = fluid.core.create_paddle_predictor(config)
return predictor
def executor(model_dir, use_gpu=False):
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
program, feed_names, fetch_targets = fluid.io.load_inference_model(
dirname=model_dir,
executor=exe,
model_filename='model',
params_filename='params')
return exe, program, fetch_targets
class VideoFrameInterp(object):
def __init__(self,
time_step,
model_path,
video_path,
use_gpu=True,
key_frame_thread=0.,
output_path='output',
remove_duplicates=True):
self.video_path = video_path
self.output_path = os.path.join(output_path, 'DAIN')
if model_path is None:
model_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path)
self.model_path = model_path
self.time_step = time_step
self.key_frame_thread = key_frame_thread
self.exe, self.program, self.fetch_targets = executor(model_path,
use_gpu=use_gpu)
def run(self):
frame_path_input = os.path.join(self.output_path, 'frames-input')
frame_path_interpolated = os.path.join(self.output_path,
'frames-interpolated')
frame_path_combined = os.path.join(self.output_path, 'frames-combined')
video_path_output = os.path.join(self.output_path, 'videos-output')
if not os.path.exists(self.output_path):
os.makedirs(self.output_path)
if not os.path.exists(frame_path_input):
os.makedirs(frame_path_input)
if not os.path.exists(frame_path_interpolated):
os.makedirs(frame_path_interpolated)
if not os.path.exists(frame_path_combined):
os.makedirs(frame_path_combined)
if not os.path.exists(video_path_output):
os.makedirs(video_path_output)
timestep = self.time_step
num_frames = int(1.0 / timestep) - 1
if self.video_path.endswith('.mp4'):
videos = [self.video_path]
else:
videos = sorted(glob.glob(os.path.join(self.video_path, '*.mp4')))
for cnt, vid in enumerate(videos):
print("Interpolating video:", vid)
cap = cv2.VideoCapture(vid)
fps = cap.get(cv2.CAP_PROP_FPS)
print("Old fps (frame rate): ", fps)
times_interp = int(1.0 / timestep)
r2 = str(int(fps) * times_interp)
print("New fps (frame rate): ", r2)
out_path = video2frames(vid, frame_path_input)
vidname = vid.split('/')[-1].split('.')[0]
tot_timer = AverageMeter()
proc_timer = AverageMeter()
end = time.time()
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
if remove_duplicates:
frames = remove_duplicates(out_path)
img = imread(frames[0])
int_width = img.shape[1]
int_height = img.shape[0]
channel = img.shape[2]
if not channel == 3:
continue
if int_width != ((int_width >> 7) << 7):
int_width_pad = (
((int_width >> 7) + 1) << 7) # more than necessary
padding_left = int((int_width_pad - int_width) / 2)
padding_right = int_width_pad - int_width - padding_left
else:
int_width_pad = int_width
padding_left = 32
padding_right = 32
if int_height != ((int_height >> 7) << 7):
int_height_pad = (
((int_height >> 7) + 1) << 7) # more than necessary
padding_top = int((int_height_pad - int_height) / 2)
padding_bottom = int_height_pad - int_height - padding_top
else:
int_height_pad = int_height
padding_top = 32
padding_bottom = 32
frame_num = len(frames)
print('processing {} frames, from video: {}'.format(frame_num, vid))
if not os.path.exists(os.path.join(frame_path_interpolated,
vidname)):
os.makedirs(os.path.join(frame_path_interpolated, vidname))
if not os.path.exists(os.path.join(frame_path_combined, vidname)):
os.makedirs(os.path.join(frame_path_combined, vidname))
for i in tqdm(range(frame_num - 1)):
first = frames[i]
second = frames[i + 1]
img_first = imread(first)
img_second = imread(second)
'''--------------Frame change test------------------------'''
img_first_gray = np.dot(img_first[..., :3],
[0.299, 0.587, 0.114])
img_second_gray = np.dot(img_second[..., :3],
[0.299, 0.587, 0.114])
img_first_gray = img_first_gray.flatten(order='C')
img_second_gray = img_second_gray.flatten(order='C')
corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1]
key_frame = False
if corr < self.key_frame_thread:
key_frame = True
'''-------------------------------------------------------'''
X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255
assert (X0.shape[1] == X1.shape[1])
assert (X0.shape[2] == X1.shape[2])
X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X = np.concatenate((X0, X1), axis=0)
proc_end = time.time()
o = self.exe.run(self.program,
fetch_list=self.fetch_targets,
feed={"image": X})
y_ = o[0]
proc_timer.update(time.time() - proc_end)
tot_timer.update(time.time() - end)
end = time.time()
y_ = [
np.transpose(
255.0 * item.clip(
0, 1.0)[0, :, padding_top:padding_top + int_height,
padding_left:padding_left + int_width],
(1, 2, 0)) for item in y_
]
time_offsets = [
kk * timestep for kk in range(1, 1 + num_frames, 1)
]
count = 1
for item, time_offset in zip(y_, time_offsets):
out_dir = os.path.join(
frame_path_interpolated, vidname,
"{:0>6d}_{:0>4d}.png".format(i, count))
count = count + 1
imsave(out_dir, np.round(item).astype(np.uint8))
num_frames = int(1.0 / timestep) - 1
input_dir = os.path.join(frame_path_input, vidname)
interpolated_dir = os.path.join(frame_path_interpolated, vidname)
combined_dir = os.path.join(frame_path_combined, vidname)
combine_frames(input_dir, interpolated_dir, combined_dir,
num_frames)
frame_pattern_combined = os.path.join(frame_path_combined, vidname,
'%08d.png')
video_pattern_output = os.path.join(video_path_output,
vidname + '.mp4')
if os.path.exists(video_pattern_output):
os.remove(video_pattern_output)
frames2video(frame_pattern_combined, video_pattern_output, r2)
return frame_pattern_combined, video_pattern_output
if __name__ == '__main__':
args = parser.parse_args()
predictor = VideoFrameInterp(args.time_step,
args.saved_model,
args.video_path,
args.output_path,
remove_duplicates=args.remove_duplicates)
predictor.run()
自定义OP编译:
2. sh make.sh编译成correlation_lib.so动态库
3. 添加动态库路径到LD_LIBRARY_PATH:
```
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python3.7 -c 'import paddle; print(paddle.sysconfig.get_lib())'`
```
4. 添加correlation op的python路径:
```
export PYTHONPATH=$PYTHONPATH:`pwd`
```
5. python test_correlation.py运行单测,验证是否加载成功。
PS: 如果paddle whl包是从官网上下载的,需要使用gcc 4.8,即把make.sh中的g++ 改为 g++-4.8
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include <string>
#include <unordered_map>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
inline std::vector<int64_t> CorrelationOutputSize(int batch, int input_height, int input_width, int stride1, int stride2, int kernel_size, int pad_size, int max_displacement) {
std::vector<int64_t> output_shape({batch});
int kernel_radius = (kernel_size - 1) / 2;
int border_radius = kernel_radius + max_displacement;
int padded_input_height = input_height + 2 * pad_size;
int padded_input_width = input_width + 2 * pad_size;
int output_channel = ((max_displacement/stride2) * 2 + 1) * ((max_displacement/stride2) * 2 + 1);
output_shape.push_back(output_channel);
int output_height = std::ceil(static_cast<float>(padded_input_height - 2 * border_radius) / static_cast<float>(stride1));
int output_width = std::ceil(static_cast<float>(padded_input_width - 2 * border_radius) / static_cast<float>(stride1));
output_shape.push_back(output_height);
output_shape.push_back(output_width);
return output_shape;
}
class CorrelationOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override{
AddInput("Input1", "input1");
AddInput("Input2", "input2");
AddOutput("Output", "output");
AddAttr<int>("pad_size", "pad size for input1 and input2");
AddAttr<int>("kernel_size", "kernel size of input1 and input2");
AddAttr<int>("max_displacement", "max displacement of input1 and input2");
AddAttr<int>("stride1", "Input1 stride");
AddAttr<int>("stride2", "Input2 stride");
AddAttr<int>("corr_type_multiply", "correlation coefficient").SetDefault(1);
AddComment(R"DOC(Correlation of two feature map. Only support NCHW data format.)DOC");
}
};
class CorrelationOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{
PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(input1) cannot be null");
PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(input2) cannot be null");
int stride1 = ctx->Attrs().Get<int>("stride1");
int stride2 = ctx->Attrs().Get<int>("stride2");
int max_displacement = ctx->Attrs().Get<int>("max_displacement");
int pad_size = ctx->Attrs().Get<int>("pad_size");
int kernel_size = ctx->Attrs().Get<int>("kernel_size");
auto in_dims = ctx->GetInputDim("Input1");
auto in2_dims = ctx->GetInputDim("Input2");
PADDLE_ENFORCE_EQ(in_dims.size() == 4, true, "input1 must be 4-dims");
PADDLE_ENFORCE_EQ(in2_dims.size() == 4, true, "input2 must be 4-dims");
std::vector<int64_t> output_shape = CorrelationOutputSize(in_dims[0], in_dims[2], in_dims[3], stride1, stride2, kernel_size, pad_size, max_displacement);
ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override{
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input1");
PADDLE_ENFORCE_EQ(input_data_type, ctx.Input<Tensor>("Input2")->type(), "Input1 and Input2 shoule have same type");
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
};
template <typename T>
class CorrelationOpGradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("correlation_grad");
op->SetInput("Input1", this->Input("Input1"));
op->SetInput("Input2", this->Input("Input2"));
op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
op->SetOutput(framework::GradVarName("Input1"), this->InputGrad("Input1"));
op->SetOutput(framework::GradVarName("Input2"), this->InputGrad("Input2"));
op->SetAttrMap(this->Attrs());
}
};
class CorrelationOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{
PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(Input1) should not be null");
PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(Input2) should not be null");
PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Output")), true, "Input(Output@GRAD) should not be null");
auto in1_dims = ctx->GetInputDim("Input1");
auto in2_dims = ctx->GetInputDim("Input2");
ctx->SetOutputDim(framework::GradVarName("Input1"), in1_dims);
ctx->SetOutputDim(framework::GradVarName("Input2"), in1_dims);
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override{
const auto* var = ctx.InputVar(framework::GradVarName("Output"));
if (var == nullptr) {
PADDLE_THROW("cannot find Output@GRAD");
}
return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(ctx, "Input1"), ctx.GetPlace());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(correlation, ops::CorrelationOp, ops::CorrelationOpMaker,
ops::CorrelationOpGradMaker<paddle::framework::OpDesc>,
ops::CorrelationOpGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(correlation_grad, ops::CorrelationOpGrad);
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#define THREADS_PER_BLOCK 32
#define FULL_MASK 0xffffffff
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T>
__forceinline__ __device__ T warpReduceSum(T val) {
for (int offset = 16; offset > 0; offset /= 2) {
val += __shfl_down_sync(FULL_MASK, val, offset);
}
return val;
}
template <typename T>
__forceinline__ __device__ T blockReduceSum(T val) {
static __shared__ T shared[32];
int lane = threadIdx.x % warpSize;
int wid = threadIdx.x / warpSize;
val = warpReduceSum(val);
if (lane == 0)
shared[wid] = val;
__syncthreads();
val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0;
if (wid == 0)
val = warpReduceSum(val);
return val;
}
template <typename T>
__global__ void set_zero(T *x, int num) {
for(int i = blockIdx.x * blockDim.x + threadIdx.x; i < num; i += blockDim.x * gridDim.x)
x[i] = static_cast<T>(0);
}
template <typename T>
__global__ void channel_first(const T *input, T *rinput, const int channel, const int height, const int width, const int pad_size) {
int n = blockIdx.x;
int h = blockIdx.y;
int w = blockIdx.z;
int ch_off = threadIdx.x;
T value;
int dimchw = channel * height * width;
int dimhw = height * width;
int p_dimw = (width + 2 * pad_size);
int p_dimh = (height + 2 * pad_size);
int p_dimchw = channel * p_dimw * p_dimh;
int p_dimcw = channel * p_dimw;
for (int c = ch_off; c < channel; c += THREADS_PER_BLOCK) {
value = input[n * dimchw + c * dimhw + h * width + w];
rinput[n * p_dimchw + (h + pad_size) * p_dimcw + (w + pad_size) * channel + c] = value;
}
}
template <typename T>
__global__ void correlation_forward(T *output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int input_channel, const int input_height, const int input_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) {
int p_input_width = input_width + 2 * pad_size;
int p_input_height = input_height + 2 * pad_size;
int kernel_rad = (kernel_size - 1) / 2;
int displacement_rad = max_displacement / stride2;
int displacement_size = 2 * displacement_rad + 1;
int n = blockIdx.x;
int h1 = blockIdx.y * stride1 + max_displacement;
int w1 = blockIdx.z * stride1 + max_displacement;
int c = threadIdx.x;
int p_dimchw = p_input_height * p_input_width * input_channel;
int p_dimcw = p_input_width * input_channel;
int p_dimc = input_channel;
int t_dimchw = output_channel * output_height * output_width;
int t_dimhw = output_height * output_width;
int t_dimw = output_width;
int nelems = kernel_size * kernel_size * p_dimc;
for (int tj = -displacement_rad; tj <= displacement_rad; ++tj) {
for(int ti = -displacement_rad; ti <= displacement_rad; ++ti) {
int w2 = w1 + ti * stride2;
int h2 = h1 + tj * stride2;
T acc0 = 0;
for(int j = -kernel_rad; j <= kernel_rad; ++j) {
for(int i = -kernel_rad; i <= kernel_rad; ++i) {
for(int ch = c; ch < p_dimc; ch += blockDim.x) {
int index1 = n * p_dimchw + (h1 + j) * p_dimcw + (w1 + i) * p_dimc + ch;
int index2 = n * p_dimchw + (h2 + j) * p_dimcw + (w2 + i) * p_dimc + ch;
acc0 += static_cast<T>(rinput1[index1] * rinput2[index2]);
}
}
}
if (blockDim.x == warpSize) {
__syncwarp();
acc0 = warpReduceSum(acc0);
} else {
__syncthreads();
acc0 = blockReduceSum(acc0);
}
if (threadIdx.x == 0) {
int tc = (tj + displacement_rad) * displacement_size + (ti + displacement_rad);
const int t_index = n * t_dimchw + tc * t_dimhw + blockIdx.y * t_dimw + blockIdx.z;
output[t_index] = static_cast<T>(acc0 / nelems);
}
}
}
}
//class CorrelationKernel<platform::CUDADeviceContext, T>
template <typename T>
class CorrelationKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must be CUDAPlace");
auto *input1 = ctx.Input<Tensor>("Input1");
auto *input2 = ctx.Input<Tensor>("Input2");
int pad_size = ctx.Attr<int>("pad_size");
int kernel_size = ctx.Attr<int>("kernel_size");
int stride1 = ctx.Attr<int>("stride1");
int stride2 = ctx.Attr<int>("stride2");
int max_displacement = ctx.Attr<int>("max_displacement");
int corr_type_multiply = ctx.Attr<int>("corr_type_multiply");
auto *output = ctx.Output<Tensor>("Output");
output->mutable_data<T>(ctx.GetPlace());
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
// base on input1, NCHW
auto in_dims = input1->dims();
int N = in_dims[0];
int C = in_dims[1];
int H = in_dims[2];
int W = in_dims[3];
int padded_input_height = H + 2 * pad_size;
int padded_input_width = W + 2 * pad_size;
Tensor rinput1 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
rinput1.mutable_data<T>(ctx.GetPlace());
Tensor rinput2 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
rinput2.mutable_data<T>(ctx.GetPlace());
set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data<T>(), rinput1.numel());
set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data<T>(), rinput2.numel());
set_zero<<<(output->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(output->data<T>(), output->numel());
auto out_dims = output->dims();
int OC = out_dims[1];
int OH = out_dims[2];
int OW = out_dims[3];
dim3 blocks_grid(N, H, W);
dim3 threads_block(THREADS_PER_BLOCK);
channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input1->data<T>(), rinput1.data<T>(), C, H, W, pad_size);
channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input2->data<T>(), rinput2.data<T>(), C, H, W, pad_size);
dim3 threadsPerBlock(THREADS_PER_BLOCK);
dim3 totalBlocksCorr(N, OH, OW);
correlation_forward<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(output->data<T>(), OC, OH, OW, rinput1.data<T>(),
C, H, W, rinput2.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
}
};
template <typename T>
__global__ void correlation_backward_input1(int item, T *grad_input1, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) {
int n = item;
int h = blockIdx.x * stride1 + pad_size;
int w = blockIdx.y * stride1 + pad_size;
int c = blockIdx.z;
int tch_off = threadIdx.x;
int kernel_rad = (kernel_size - 1) / 2;
int displacement_rad = max_displacement / stride2;
int displacement_size = 2 * displacement_rad + 1;
int xmin = (w - kernel_rad - max_displacement) / stride1;
int ymin = (h - kernel_rad - max_displacement) / stride1;
int xmax = (w + kernel_rad - max_displacement) / stride1;
int ymax = (h + kernel_rad - max_displacement) / stride1;
if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) {
return;
}
if (xmin > xmax || ymin > ymax) {
return;
}
xmin = max(0, xmin);
xmax = min(output_width - 1, xmax);
ymin = max(0, ymin);
ymax = min(output_height - 1, ymax);
int p_input_width = input_width + 2 * pad_size;
int p_input_height = input_height + 2 * pad_size;
int p_dimchw = input_channel * p_input_height * p_input_width;
int p_dimcw = input_channel * p_input_width;
int p_dimc = input_channel;
int t_dimchw = output_channel * output_height * output_width;
int t_dimhw = output_height * output_width;
int t_dimw = output_width;
int o_dimchw = input_channel * input_height * input_width;
int o_dimhw = input_height * input_width;
int o_dimw = input_width;
int nelems = kernel_size * kernel_size * input_channel;
__shared__ T prod_sum[THREADS_PER_BLOCK];
prod_sum[tch_off] = 0;
for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) {
int i2 = (tc % displacement_size - displacement_rad) * stride2;
int j2 = (tc / displacement_size - displacement_rad) * stride2;
int index2 = n * p_dimchw + (h + j2) * p_dimcw + (w + i2) * p_dimc + c;
T val2 = rinput2[index2];
for (int j = ymin; j <= ymax; ++j) {
for (int i = xmin; i <= xmax; ++i) {
int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i;
prod_sum[tch_off] += grad_output[t_index] * val2;
}
}
}
__syncthreads();
if (tch_off == 0) {
T reduce_sum = 0;
for (int index = 0; index < THREADS_PER_BLOCK; index++) {
reduce_sum += prod_sum[index];
}
const int index1 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size);
grad_input1[index1] = static_cast<T>(reduce_sum / nelems);
}
}
template <typename T>
__global__ void correlation_backward_input2(int item, T *grad_input2, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2){
int n = item;
int h = blockIdx.x * stride1 + pad_size;
int w = blockIdx.y * stride1 + pad_size;
int c = blockIdx.z;
int tch_off = threadIdx.x;
int kernel_rad = (kernel_size - 1) / 2;
int displacement_rad = max_displacement / stride2;
int displacement_size = 2 * displacement_rad + 1;
int p_input_width = input_width + 2 * pad_size;
int p_input_height = input_height + 2 * pad_size;
int p_dimchw = input_channel * p_input_height * p_input_width;
int p_dimcw = input_channel * p_input_width;
int p_dimc = input_channel;
int t_dimchw = output_channel * output_height * output_width;
int t_dimhw = output_height * output_width;
int t_dimw = output_width;
int o_dimchw = input_channel * input_height * input_width;
int o_dimhw = input_height * input_width;
int o_dimw = input_width;
int nelems = kernel_size * kernel_size * input_channel;
__shared__ T prod_sum[THREADS_PER_BLOCK];
prod_sum[tch_off] = 0;
for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) {
int i2 = (tc % displacement_size - displacement_rad) * stride2;
int j2 = (tc / displacement_size - displacement_rad) * stride2;
int xmin = (w - kernel_rad - max_displacement - i2) / stride1;
int ymin = (h - kernel_rad - max_displacement - j2) / stride1;
int xmax = (w + kernel_rad - max_displacement - i2) / stride1;
int ymax = (h + kernel_rad - max_displacement - j2) / stride1;
if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) {
continue;
}
if (xmin > xmax || ymin > ymax) {
continue;
}
xmin = max(0, xmin);
xmax = min(output_width - 1, xmax);
ymin = max(0, ymin);
ymax = min(output_height - 1, ymax);
int index1 = n * p_dimchw + (h - j2) * p_dimcw + (w - i2) * p_dimc + c;
T val1 = rinput1[index1];
for (int j = ymin; j <= ymax; ++j) {
for (int i = xmin; i <= xmax; ++i) {
int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i;
prod_sum[tch_off] += grad_output[t_index] * val1;
}
}
}
__syncthreads();
if (tch_off == 0) {
T reduce_sum = 0;
for (int index = 0; index < THREADS_PER_BLOCK; index++) {
reduce_sum += prod_sum[index];
}
const int index2 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size);
grad_input2[index2] = static_cast<T>(reduce_sum / nelems);
}
}
template <typename T>
class CorrelationGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must use CUDAPlace.");
const auto *input1 = ctx.Input<Tensor>("Input1");
const auto *input2 = ctx.Input<Tensor>("Input2");
const auto *grad_output = ctx.Input<Tensor>(framework::GradVarName("Output"));
const int pad_size = ctx.Attr<int>("pad_size");
const int kernel_size = ctx.Attr<int>("kernel_size");
const int stride1 = ctx.Attr<int>("stride1");
const int stride2 = ctx.Attr<int>("stride2");
const int max_displacement = ctx.Attr<int>("max_displacement");
const int corr_type_multiply = ctx.Attr<int>("corr_type_multiply");
auto *grad_input1 = ctx.Output<Tensor>(framework::GradVarName("Input1"));
grad_input1->mutable_data<T>(ctx.GetPlace());
auto *grad_input2 = ctx.Output<Tensor>(framework::GradVarName("Input2"));
grad_input2->mutable_data<T>(ctx.GetPlace());
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
auto in_dims = input1->dims();
int N = in_dims[0];
int C = in_dims[1];
int H = in_dims[2];
int W = in_dims[3];
int padded_input_height = H + 2 * pad_size;
int padded_input_width = W + 2 * pad_size;
Tensor rinput1 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
rinput1.mutable_data<T>(ctx.GetPlace());
Tensor rinput2 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
rinput2.mutable_data<T>(ctx.GetPlace());
set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data<T>(), rinput1.numel());
set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data<T>(), rinput2.numel());
set_zero<<<(grad_input1->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input1->data<T>(), grad_input1->numel());
set_zero<<<(grad_input2->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input2->data<T>(), grad_input2->numel());
auto grad_out_dims = grad_output->dims();
int GOC = grad_out_dims[1];
int GOH = grad_out_dims[2];
int GOW = grad_out_dims[3];
dim3 blocks_grid(N, H, W);
dim3 threads_block(THREADS_PER_BLOCK);
channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input1->data<T>(), rinput1.data<T>(), C, H, W, pad_size);
channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input2->data<T>(), rinput2.data<T>(), C, H, W, pad_size);
dim3 threadsPerBlock(THREADS_PER_BLOCK);
dim3 totalBlocksCorr(H, W, C);
for (int n = 0; n < N; n++) {
correlation_backward_input1<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(n, grad_input1->data<T>(), C, H, W, grad_output->data<T>(), GOC, GOH, GOW, rinput2.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
}
for (int n = 0; n < N; n++) {
correlation_backward_input2<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(n, grad_input2->data<T>(), C, H, W, grad_output->data<T>(), GOC, GOH, GOW, rinput1.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
correlation, ops::CorrelationKernel<float>,
ops::CorrelationKernel<double>);
REGISTER_OP_CUDA_KERNEL(
correlation_grad, ops::CorrelationGradKernel<float>,
ops::CorrelationGradKernel<double>);
# source /ssd1/vis/liufanglong/.bashrc
#export PATH=/home/work/cuda-9.0/bin:$PATH
#export PATH=/home/work/cuda-9.0/bin:$PATH
#export LD_LIBRARY_PATH="/home/work/cuda-9.0/lib64:$LD_LIBRARY_PATH"
#export LD_LIBRARY_PATH=/home/vis/chao/local/cudnn_v7.6/cuda/lib64:$LD_LIBRARY_PATH
#export CPLUS_INCLUDE_PATH=/home/vis/chao/local/cudnn_v7.6/cuda/include:/ssd1/vis/liufanglong/local/fluid_1.1.0_for_slurm/nccl_2.3.5/include:$CPLUS_INCLUDE_PATH
#export LD_LIBRARY_PATH=/ssd1/vis/liufanglong/local/fluid_1.1.0_for_slurm/nccl_2.3.5/lib:$LD_LIBRARY_PATH
include_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_include())' )
lib_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_lib())' )
echo $include_dir
echo $lib_dir
OPS='correlation_op'
for op in ${OPS}
do
nvcc ${op}.cu -c -o ${op}.cu.o -ccbin cc -DPADDLE_WITH_CUDA -DEIGEN_USE_GPU -DPADDLE_USE_DSO -DPADDLE_WITH_MKLDNN -Xcompiler -fPIC -std=c++11 -Xcompiler -fPIC -w --expt-relaxed-constexpr -O0 -g -DNVCC \
-I ${include_dir}/third_party/ \
-I ${include_dir}
done
# g++-4.8 correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
# g++ ${OPS}.cu.o ${OPS}.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
g++ correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
-I ${include_dir}/third_party/ \
-I ${include_dir} \
-L ${lib_dir} \
-L /usr/local/cuda/lib64/ -lpaddle_framework -lcudart
# rm *.cu.o
import unittest
from correlation import correlation
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
def corr(x_1,
x_2,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1,
corr_multiply=1):
K = kernel_size
# rinput1 = np.pad(x_1, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3)
# rinput2 = np.pad(x_2, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3)
rinput1 = np.pad(x_1, ((0, 0), (0, 0), (pad_size, pad_size),
(pad_size, pad_size)),
mode='constant')
rinput2 = np.pad(x_2, ((0, 0), (0, 0), (pad_size, pad_size),
(pad_size, pad_size)),
mode='constant')
rinput1 = np.transpose(rinput1, (0, 2, 3, 1))
rinput2 = np.transpose(rinput2, (0, 2, 3, 1))
B = int(rinput1.shape[0])
H = int(x_1.shape[2])
W = int(x_2.shape[3])
d = max_displacement
D = 2 * d + 1
output = np.zeros((B, D * D, H, W), dtype=np.float32)
for b in range(B):
for i in range(H):
for j in range(W):
for k in range(-d, d + 1):
for l in range(-d, d + 1):
x1_index = i + pad_size
y1_index = j + pad_size
x2_index = x1_index + k
y2_index = y1_index + l
output[b, l + d + D * (k + d), i,
j] = np.mean(rinput1[b, x1_index:x1_index + K,
y1_index:y1_index + K] *
rinput2[b, x2_index:x2_index + K,
y2_index:y2_index + K])
return output
class TestCorrelationOp(unittest.TestCase):
def test_check_output(self):
#x_shape = (1, 196, 3, 3)
np.random.seed(13)
np.set_printoptions(threshold=np.inf)
x_shape = (2, 10, 3, 3)
x_type = 'float32'
x1 = fluid.layers.data(name='x1',
shape=x_shape,
dtype=x_type,
append_batch_size=False)
x2 = fluid.layers.data(name='x2',
shape=x_shape,
dtype=x_type,
append_batch_size=False)
x1_np = np.random.randn(2, 3, 4, 5).astype(x_type)
x2_np = np.random.randn(2, 3, 4, 5).astype(x_type)
out_np = corr(x1_np,
x2_np,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1)
out = correlation(x1,
x2,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1)
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
res = exe.run(feed={'x1': x1_np, 'x2': x2_np}, fetch_list=[out.name])
self.assertTrue(np.allclose(res[0], out_np))
class Net(fluid.dygraph.Layer):
def __init__(self, name_scope):
super(Net, self).__init__(name_scope)
def forward(self, x1, x2):
y = correlation(x1,
x2,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1)
return y
class TestCorrelationOpDyGraph(unittest.TestCase):
def test_check_output(self):
np.random.seed(13)
np.set_printoptions(threshold=np.inf)
x_shape = (2, 10, 3, 3)
x_type = 'float32'
place = fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
x1_np = np.random.randn(2, 3, 4, 5).astype(x_type)
x2_np = np.random.randn(2, 3, 4, 5).astype(x_type)
out_np = corr(x1_np,
x2_np,
pad_size=4,
kernel_size=1,
max_displacement=4,
stride1=1,
stride2=1)
x1 = to_variable(x1_np)
x2 = to_variable(x2_np)
corr_pd = Net('corr_pd')
y = corr_pd(x1, x2)
out = y.numpy()
self.assertTrue(np.allclose(out, out_np))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Conv2D, Conv2DTranspose
from paddle.fluid.contrib import correlation
__all__ = ['pwc_dc_net']
class PWCDCNet(fluid.dygraph.Layer):
def __init__(self, md=4):
super(PWCDCNet, self).__init__()
self.md = md
self.param_attr = fluid.ParamAttr(
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0004),
initializer=fluid.initializer.MSRAInitializer(uniform=True,
fan_in=None,
seed=0))
self.conv1a = Conv2D(3, 16, 3, 2, 1, param_attr=self.param_attr)
self.conv1aa = Conv2D(16, 16, 3, 1, 1, param_attr=self.param_attr)
self.conv1b = Conv2D(16, 16, 3, 1, 1, param_attr=self.param_attr)
self.conv2a = Conv2D(16, 32, 3, 2, 1, param_attr=self.param_attr)
self.conv2aa = Conv2D(32, 32, 3, 1, 1, param_attr=self.param_attr)
self.conv2b = Conv2D(32, 32, 3, 1, 1, param_attr=self.param_attr)
self.conv3a = Conv2D(32, 64, 3, 2, 1, param_attr=self.param_attr)
self.conv3aa = Conv2D(64, 64, 3, 1, 1, param_attr=self.param_attr)
self.conv3b = Conv2D(64, 64, 3, 1, 1, param_attr=self.param_attr)
self.conv4a = Conv2D(64, 96, 3, 2, 1, param_attr=self.param_attr)
self.conv4aa = Conv2D(96, 96, 3, 1, 1, param_attr=self.param_attr)
self.conv4b = Conv2D(96, 96, 3, 1, 1, param_attr=self.param_attr)
self.conv5a = Conv2D(96, 128, 3, 2, 1, param_attr=self.param_attr)
self.conv5aa = Conv2D(128, 128, 3, 1, 1, param_attr=self.param_attr)
self.conv5b = Conv2D(128, 128, 3, 1, 1, param_attr=self.param_attr)
self.conv6aa = Conv2D(128, 196, 3, 2, 1, param_attr=self.param_attr)
self.conv6a = Conv2D(196, 196, 3, 1, 1, param_attr=self.param_attr)
self.conv6b = Conv2D(196, 196, 3, 1, 1, param_attr=self.param_attr)
nd = (2 * self.md + 1)**2
dd = np.cumsum([128, 128, 96, 64, 32], dtype=np.int32).astype(np.int)
dd = [int(d) for d in dd]
od = nd
self.conv6_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr)
self.conv6_1 = Conv2D(od + dd[0],
128,
3,
1,
1,
param_attr=self.param_attr)
self.conv6_2 = Conv2D(od + dd[1],
96,
3,
1,
1,
param_attr=self.param_attr)
self.conv6_3 = Conv2D(od + dd[2],
64,
3,
1,
1,
param_attr=self.param_attr)
self.conv6_4 = Conv2D(od + dd[3],
32,
3,
1,
1,
param_attr=self.param_attr)
self.predict_flow6 = Conv2D(od + dd[4],
2,
3,
1,
1,
param_attr=self.param_attr)
self.deconv6 = Conv2DTranspose(2,
2,
4,
stride=2,
padding=1,
param_attr=self.param_attr)
self.upfeat6 = Conv2DTranspose(od + dd[4],
2,
4,
stride=2,
padding=1,
param_attr=self.param_attr)
od = nd + 128 + 4
self.conv5_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr)
self.conv5_1 = Conv2D(od + dd[0],
128,
3,
1,
1,
param_attr=self.param_attr)
self.conv5_2 = Conv2D(od + dd[1],
96,
3,
1,
1,
param_attr=self.param_attr)
self.conv5_3 = Conv2D(od + dd[2],
64,
3,
1,
1,
param_attr=self.param_attr)
self.conv5_4 = Conv2D(od + dd[3],
32,
3,
1,
1,
param_attr=self.param_attr)
self.predict_flow5 = Conv2D(od + dd[4],
2,
3,
1,
1,
param_attr=self.param_attr)
self.deconv5 = Conv2DTranspose(2,
2,
4,
stride=2,
padding=1,
param_attr=self.param_attr)
self.upfeat5 = Conv2DTranspose(od + dd[4],
2,
4,
stride=2,
padding=1,
param_attr=self.param_attr)
od = nd + 96 + 4
self.conv4_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr)
self.conv4_1 = Conv2D(od + dd[0],
128,
3,
1,
1,
param_attr=self.param_attr)
self.conv4_2 = Conv2D(od + dd[1],
96,
3,
1,
1,
param_attr=self.param_attr)
self.conv4_3 = Conv2D(od + dd[2],
64,
3,
1,
1,
param_attr=self.param_attr)
self.conv4_4 = Conv2D(od + dd[3],
32,
3,
1,
1,
param_attr=self.param_attr)
self.predict_flow4 = Conv2D(od + dd[4],
2,
3,
1,
1,
param_attr=self.param_attr)
self.deconv4 = Conv2DTranspose(2,
2,
4,
stride=2,
padding=1,
param_attr=self.param_attr)
self.upfeat4 = Conv2DTranspose(od + dd[4],
2,
4,
stride=2,
padding=1,
param_attr=self.param_attr)
od = nd + 64 + 4
self.conv3_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr)
self.conv3_1 = Conv2D(od + dd[0],
128,
3,
1,
1,
param_attr=self.param_attr)
self.conv3_2 = Conv2D(od + dd[1],
96,
3,
1,
1,
param_attr=self.param_attr)
self.conv3_3 = Conv2D(od + dd[2],
64,
3,
1,
1,
param_attr=self.param_attr)
self.conv3_4 = Conv2D(od + dd[3],
32,
3,
1,
1,
param_attr=self.param_attr)
self.predict_flow3 = Conv2D(od + dd[4],
2,
3,
1,
1,
param_attr=self.param_attr)
self.deconv3 = Conv2DTranspose(2,
2,
4,
stride=2,
padding=1,
param_attr=self.param_attr)
self.upfeat3 = Conv2DTranspose(od + dd[4],
2,
4,
stride=2,
padding=1,
param_attr=self.param_attr)
od = nd + 32 + 4
self.conv2_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr)
self.conv2_1 = Conv2D(od + dd[0],
128,
3,
1,
1,
param_attr=self.param_attr)
self.conv2_2 = Conv2D(od + dd[1],
96,
3,
1,
1,
param_attr=self.param_attr)
self.conv2_3 = Conv2D(od + dd[2],
64,
3,
1,
1,
param_attr=self.param_attr)
self.conv2_4 = Conv2D(od + dd[3],
32,
3,
1,
1,
param_attr=self.param_attr)
self.predict_flow2 = Conv2D(od + dd[4],
2,
3,
1,
1,
param_attr=self.param_attr)
# self.deconv2 = Conv2DTranspose(2, 2, 4, stride=2, padding=1, param_attr=self.param_attr)
self.dc_conv1 = Conv2D(od + dd[4],
128,
3,
1,
1,
dilation=1,
param_attr=self.param_attr)
self.dc_conv2 = Conv2D(128,
128,
3,
1,
2,
dilation=2,
param_attr=self.param_attr)
self.dc_conv3 = Conv2D(128,
128,
3,
1,
4,
dilation=4,
param_attr=self.param_attr)
self.dc_conv4 = Conv2D(128,
96,
3,
1,
8,
dilation=8,
param_attr=self.param_attr)
self.dc_conv5 = Conv2D(96,
64,
3,
1,
16,
dilation=16,
param_attr=self.param_attr)
self.dc_conv6 = Conv2D(64,
32,
3,
1,
1,
dilation=1,
param_attr=self.param_attr)
self.dc_conv7 = Conv2D(32, 2, 3, 1, 1, param_attr=self.param_attr)
def warp(self, x, flo):
"""
warp an image/tensor (im2) back to im1, according to the optical flow
x: [B, C, H, W] (im2)
flo: [B, 2, H, W] flow
"""
x_shape = fluid.layers.shape(x)
B, H, W = x_shape[0], x_shape[2], x_shape[3]
bb = fluid.layers.range(0, B, 1, 'float32')
xx = fluid.layers.range(0, W, 1, 'float32')
yy = fluid.layers.range(0, H, 1, 'float32')
_, yy, xx = paddle.tensor.meshgrid(bb, yy, xx)
yy = fluid.layers.unsqueeze(yy, [1])
xx = fluid.layers.unsqueeze(xx, [1])
grid = fluid.layers.concat(input=[xx, yy], axis=1)
flo = flo
vgrid = fluid.layers.elementwise_add(grid, flo)
vgrid_0 = 2.0 * fluid.layers.slice(
vgrid, axes=[1], starts=[0], ends=[1]) / (W - 1.) - 1.0
vgrid_1 = 2.0 * fluid.layers.slice(
vgrid, axes=[1], starts=[1], ends=[2]) / (H - 1.) - 1.0
vgrid = fluid.layers.concat(input=[vgrid_0, vgrid_1], axis=1)
vgrid = fluid.layers.transpose(vgrid, [0, 2, 3, 1])
output = fluid.layers.grid_sampler(name='grid_sample', x=x, grid=vgrid)
mask = fluid.layers.zeros_like(x)
mask = mask + 1.0
mask = fluid.layers.grid_sampler(name='grid_sample', x=mask, grid=vgrid)
mask_temp1 = fluid.layers.cast(mask < 0.9990, 'float32')
mask = mask * (1 - mask_temp1)
mask = fluid.layers.cast(mask > 0, 'float32')
outwarp = fluid.layers.elementwise_mul(output, mask)
return outwarp
def warp_nomask(self, x, flo):
"""
warp an image/tensor (im2) back to im1, according to the optical flow
x: [B, C, H, W] (im2)
flo: [B, 2, H, W] flow
"""
B, C, H, W = x.shape
# mesh grid
# xx = fluid.layers.range(0, W, 1, 'float32')
# xx = fluid.layers.reshape(xx, shape=[1, -1])
# xx = fluid.layers.expand(x=xx, expand_times=[H, 1])
# xx = fluid.layers.reshape(xx, shape=[1, 1, H, W])
# xx = fluid.layers.expand(x=xx, expand_times=[B, 1, 1, 1])
#
# yy = fluid.layers.range(0, H, 1, 'float32')
# yy = fluid.layers.reshape(yy, shape=[-1, 1])
# yy = fluid.layers.expand(x=yy, expand_times=[1, W])
# yy = fluid.layers.reshape(x=yy, shape=[1, 1, H, W])
# yy = fluid.layers.expand(x=yy, expand_times=[B, 1, 1, 1])
x_shape = fluid.layers.shape(x)
B, H, W = x_shape[0], x_shape[2], x_shape[3]
bb = fluid.layers.range(0, B, 1, 'float32')
xx = fluid.layers.range(0, W, 1, 'float32')
# xx = fluid.layers.reshape(xx, shape=[1, -1])
yy = fluid.layers.range(0, H, 1, 'float32')
# yy = fluid.layers.reshape(yy, shape=[1, -1])
_, yy, xx = paddle.tensor.meshgrid(bb, yy, xx)
yy = fluid.layers.unsqueeze(yy, [1])
xx = fluid.layers.unsqueeze(xx, [1])
grid = fluid.layers.concat(input=[xx, yy], axis=1)
flo = flo
vgrid = fluid.layers.elementwise_add(grid, flo)
#vgrid_0 = 2.0 * fluid.layers.slice(vgrid, axes=[1], starts=[0], ends=[1]) / max(W - 1, 1) - 1.0
#vgrid_1 = 2.0 * fluid.layers.slice(vgrid, axes=[1], starts=[1], ends=[2]) / max(H - 1, 1) - 1.0
vgrid_0 = 2.0 * fluid.layers.slice(
vgrid, axes=[1], starts=[0], ends=[1]) / (W - 1.) - 1.0
vgrid_1 = 2.0 * fluid.layers.slice(
vgrid, axes=[1], starts=[1], ends=[2]) / (H - 1.) - 1.0
vgrid = fluid.layers.concat(input=[vgrid_0, vgrid_1], axis=1)
vgrid = fluid.layers.transpose(vgrid, [0, 2, 3, 1])
output = fluid.layers.grid_sampler(name='grid_sample', x=x, grid=vgrid)
return output
def corr(self, x_1, x_2):
out = correlation(x_1,
x_2,
pad_size=self.md,
kernel_size=1,
max_displacement=self.md,
stride1=1,
stride2=1,
corr_type_multiply=1)
return out
def forward(self, x, output_more=False):
im1 = fluid.layers.slice(x, axes=[1], starts=[0], ends=[3])
im2 = fluid.layers.slice(x, axes=[1], starts=[3], ends=[6])
# print("\n\n********************PWC Net details *************** \n\n")
c11 = fluid.layers.leaky_relu(self.conv1a(im1), 0.1)
c11 = fluid.layers.leaky_relu(self.conv1aa(c11), 0.1)
c11 = fluid.layers.leaky_relu(self.conv1b(c11), 0.1)
c21 = fluid.layers.leaky_relu(self.conv1a(im2), 0.1)
c21 = fluid.layers.leaky_relu(self.conv1aa(c21), 0.1)
c21 = fluid.layers.leaky_relu(self.conv1b(c21), 0.1)
c12 = fluid.layers.leaky_relu(self.conv2a(c11), 0.1)
c12 = fluid.layers.leaky_relu(self.conv2aa(c12), 0.1)
c12 = fluid.layers.leaky_relu(self.conv2b(c12), 0.1)
c22 = fluid.layers.leaky_relu(self.conv2a(c21), 0.1)
c22 = fluid.layers.leaky_relu(self.conv2aa(c22), 0.1)
c22 = fluid.layers.leaky_relu(self.conv2b(c22), 0.1)
c13 = fluid.layers.leaky_relu(self.conv3a(c12), 0.1)
c13 = fluid.layers.leaky_relu(self.conv3aa(c13), 0.1)
c13 = fluid.layers.leaky_relu(self.conv3b(c13), 0.1)
c23 = fluid.layers.leaky_relu(self.conv3a(c22), 0.1)
c23 = fluid.layers.leaky_relu(self.conv3aa(c23), 0.1)
c23 = fluid.layers.leaky_relu(self.conv3b(c23), 0.1)
c14 = fluid.layers.leaky_relu(self.conv4a(c13), 0.1)
c14 = fluid.layers.leaky_relu(self.conv4aa(c14), 0.1)
c14 = fluid.layers.leaky_relu(self.conv4b(c14), 0.1)
c24 = fluid.layers.leaky_relu(self.conv4a(c23), 0.1)
c24 = fluid.layers.leaky_relu(self.conv4aa(c24), 0.1)
c24 = fluid.layers.leaky_relu(self.conv4b(c24), 0.1)
c15 = fluid.layers.leaky_relu(self.conv5a(c14), 0.1)
c15 = fluid.layers.leaky_relu(self.conv5aa(c15), 0.1)
c15 = fluid.layers.leaky_relu(self.conv5b(c15), 0.1)
c25 = fluid.layers.leaky_relu(self.conv5a(c24), 0.1)
c25 = fluid.layers.leaky_relu(self.conv5aa(c25), 0.1)
c25 = fluid.layers.leaky_relu(self.conv5b(c25), 0.1)
c16 = fluid.layers.leaky_relu(self.conv6aa(c15), 0.1)
c16 = fluid.layers.leaky_relu(self.conv6a(c16), 0.1)
c16 = fluid.layers.leaky_relu(self.conv6b(c16), 0.1)
c26 = fluid.layers.leaky_relu(self.conv6aa(c25), 0.1)
c26 = fluid.layers.leaky_relu(self.conv6a(c26), 0.1)
c26 = fluid.layers.leaky_relu(self.conv6b(c26), 0.1)
corr6 = self.corr(c16, c26)
corr6 = fluid.layers.leaky_relu(corr6, alpha=0.1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv6_0(corr6), 0.1), corr6],
axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv6_1(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv6_2(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv6_3(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv6_4(x), 0.1), x], axis=1)
flow6 = self.predict_flow6(x)
up_flow6 = self.deconv6(flow6)
up_feat6 = self.upfeat6(x)
warp5 = self.warp(c25, up_flow6 * 0.625)
corr5 = self.corr(c15, warp5)
corr5 = fluid.layers.leaky_relu(corr5, alpha=0.1)
x = fluid.layers.concat(input=[corr5, c15, up_flow6, up_feat6], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv5_0(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv5_1(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv5_2(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv5_3(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv5_4(x), 0.1), x], axis=1)
flow5 = self.predict_flow5(x)
up_flow5 = self.deconv5(flow5)
up_feat5 = self.upfeat5(x)
warp4 = self.warp(c24, up_flow5 * 1.25)
corr4 = self.corr(c14, warp4)
corr4 = fluid.layers.leaky_relu(corr4, alpha=0.1)
x = fluid.layers.concat(input=[corr4, c14, up_flow5, up_feat5], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv4_0(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv4_1(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv4_2(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv4_3(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv4_4(x), 0.1), x], axis=1)
flow4 = self.predict_flow4(x)
up_flow4 = self.deconv4(flow4)
up_feat4 = self.upfeat4(x)
warp3 = self.warp(c23, up_flow4 * 2.5)
corr3 = self.corr(c13, warp3)
corr3 = fluid.layers.leaky_relu(corr3, alpha=0.1)
x = fluid.layers.concat(input=[corr3, c13, up_flow4, up_feat4], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv3_0(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv3_1(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv3_2(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv3_3(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv3_4(x), 0.1), x], axis=1)
flow3 = self.predict_flow3(x)
up_flow3 = self.deconv3(flow3)
up_feat3 = self.upfeat3(x)
warp2 = self.warp(c22, up_flow3 * 5.0)
corr2 = self.corr(c12, warp2)
corr2 = fluid.layers.leaky_relu(corr2, alpha=0.1)
x = fluid.layers.concat(input=[corr2, c12, up_flow3, up_feat3], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv2_0(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv2_1(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv2_2(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv2_3(x), 0.1), x], axis=1)
x = fluid.layers.concat(
input=[fluid.layers.leaky_relu(self.conv2_4(x), 0.1), x], axis=1)
flow2 = self.predict_flow2(x)
x = fluid.layers.leaky_relu(
self.dc_conv4(
fluid.layers.leaky_relu(
self.dc_conv3(
fluid.layers.leaky_relu(
self.dc_conv2(
fluid.layers.leaky_relu(self.dc_conv1(x), 0.1)),
0.1)), 0.1)), 0.1)
flow2 += self.dc_conv7(
fluid.layers.leaky_relu(
self.dc_conv6(fluid.layers.leaky_relu(self.dc_conv5(x), 0.1)),
0.1))
if not output_more:
return flow2
else:
return [flow2, flow3, flow4, flow5, flow6]
def pwc_dc_net(path=None):
model = PWCDCNet()
if path is not None:
import pickle
data = pickle.load(open(path, 'rb'))
weight_list = []
for k, v in data.items():
weight_list.append(v)
param_dict = {}
for i, param in enumerate(model.parameters()):
param_dict[param.name] = weight_list[i]
model.load_dict(param_dict)
return model
import paddle.fluid as fluid
from paddle.fluid.dygraph import Conv2D
__all__ = ['MultipleBasicBlock', 'MultipleBasicBlock_4']
def conv3x3(in_planes, out_planes, dilation=1, stride=1, param_attr=None):
return Conv2D(in_planes,
out_planes,
filter_size=3,
stride=stride,
padding=int(dilation * (3 - 1) / 2),
dilation=dilation,
bias_attr=False,
param_attr=param_attr)
class BasicBlock(fluid.dygraph.Layer):
expansion = 1
def __init__(self, inplanes, planes, dilation=1, stride=1, downsample=None):
super(BasicBlock, self).__init__()
param_attr = fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=1.0, seed=0))
self.conv1 = conv3x3(inplanes, planes, dilation, stride, param_attr)
self.conv2 = conv3x3(planes, planes, param_attr=param_attr)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
# out = self.bn1(out)
out = fluid.layers.relu(out)
out = self.conv2(out)
# out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = fluid.layers.relu(out)
return out
class MultipleBasicBlock(fluid.dygraph.Layer):
def __init__(self,
input_feature,
block,
num_blocks,
intermediate_feature=64,
dense=True):
super(MultipleBasicBlock, self).__init__()
self.dense = dense
self.num_block = num_blocks
self.intermediate_feature = intermediate_feature
param_attr = fluid.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=1.0, seed=0))
self.block1 = Conv2D(input_feature,
intermediate_feature,
filter_size=7,
stride=1,
padding=3,
bias_attr=True,
param_attr=param_attr)
dim = intermediate_feature
self.block2 = block(dim, dim, dilation=1) if num_blocks >= 2 else None
self.block3 = block(dim, dim, dilation=1) if num_blocks >= 3 else None
self.block4 = block(dim, dim, dilation=1) if num_blocks >= 4 else None
self.block5 = Conv2D(dim, 3, 3, 1, 1)
def forward(self, x):
x = fluid.layers.relu(self.block1(x))
x = self.block2(x) if self.num_block >= 2 else x
x = self.block3(x) if self.num_block >= 3 else x
x = self.block4(x) if self.num_block >= 4 else x
x = self.block5(x)
return x
def MultipleBasicBlock_4(input_feature, intermediate_feature=64):
model = MultipleBasicBlock(input_feature, BasicBlock, 4,
intermediate_feature)
return model
cd pwcnet/correlation_op
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python -c 'import paddle; print(paddle.sysconfig.get_lib())'`
export PYTHONPATH=$PYTHONPATH:`pwd`
cd ../../
VID_PATH=/paddle/work/github/DAIN/data/CBA.mp4
OUT_PATH=output
MODEL_PATH=DAIN_paddle_weight
CUDA_VISIBLE_DEVICES=2 python predict.py \
--time_step 0.125 \
--video_path=$VID_PATH \
--output_path=$OUT_PATH \
--saved_model=$MODEL_PATH
\ No newline at end of file
import os, sys
import glob
import shutil
import cv2
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def combine_frames(input, interpolated, combined, num_frames):
frames1 = sorted(glob.glob(os.path.join(input, '*.png')))
frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png')))
num1 = len(frames1)
num2 = len(frames2)
# assert (num1 - 1) * num_frames == num2
for i in range(num1):
src = frames1[i]
imgname = int(src.split('/')[-1].split('.')[-2])
assert i == imgname
dst = os.path.join(combined, '{:08d}.png'.format(i * (num_frames + 1)))
shutil.copy2(src, dst)
if i < num1 - 1:
try:
for k in range(num_frames):
src = frames2[i * num_frames + k]
dst = os.path.join(
combined,
'{:08d}.png'.format(i * (num_frames + 1) + k + 1))
shutil.copy2(src, dst)
except Exception as e:
print(e)
print(len(frames2), num_frames, i, k, i * num_frames + k)
def remove_duplicates(paths):
def dhash(image, hash_size=8):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
resized = cv2.resize(gray, (hash_size + 1, hash_size))
diff = resized[:, 1:] > resized[:, :-1]
return sum([2**i for (i, v) in enumerate(diff.flatten()) if v])
hashes = {}
image_paths = sorted(glob.glob(os.path.join(paths, '*.png')))
for image_path in image_paths:
image = cv2.imread(image_path)
h = dhash(image)
p = hashes.get(h, [])
p.append(image_path)
hashes[h] = p
for (h, hashed_paths) in hashes.items():
if len(hashed_paths) > 1:
for p in hashed_paths[1:]:
os.remove(p)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
for fid, frame in enumerate(frames):
new_name = '{:08d}'.format(fid) + '.png'
new_name = os.path.join(paths, new_name)
os.rename(frame, new_name)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
return frames
import paddle
from skimage import color
import numpy as np
from PIL import Image
def convertLAB2RGB( lab ):
lab[:, :, 0:1] = lab[:, :, 0:1] * 100 # [0, 1] -> [0, 100]
lab[:, :, 1:3] = np.clip(lab[:, :, 1:3] * 255 - 128, -100, 100) # [0, 1] -> [-128, 128]
rgb = color.lab2rgb( lab.astype(np.float64) )
return rgb
def convertRGB2LABTensor( rgb ):
lab = color.rgb2lab( np.asarray( rgb ) ) # RGB -> LAB L[0, 100] a[-127, 128] b[-128, 127]
ab = np.clip(lab[:, :, 1:3] + 128, 0, 255) # AB --> [0, 255]
ab = paddle.to_tensor(ab.astype('float32')) / 255.
L = lab[:, :, 0] * 2.55 # L --> [0, 255]
L = Image.fromarray( np.uint8( L ) )
L = paddle.to_tensor(np.array(L).astype('float32')[..., np.newaxis] / 255.0)
return L, ab
def addMergin(img, target_w, target_h, background_color=(0,0,0)):
width, height = img.size
if width==target_w and height==target_h:
return img
scale = max(target_w,target_h)/max(width, height)
width = int(width*scale/16.)*16
height = int(height*scale/16.)*16
img = img.resize((width, height), Image.BICUBIC)
xp = (target_w-width)//2
yp = (target_h-height)//2
result = Image.new(img.mode, (target_w, target_h), background_color)
result.paste(img, (xp, yp))
return result
import cv2
import numpy as np
def read_img(path, size=None, is_gt=False):
"""read image by cv2
return: Numpy float32, HWC, BGR, [0,1]"""
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
img = img.astype(np.float32) / 255.
if img.ndim == 2:
img = np.expand_dims(img, axis=2)
if img.shape[2] > 3:
img = img[:, :, :3]
return img
def get_test_neighbor_frames(crt_i, N, max_n, padding='new_info'):
"""Generate an index list for reading N frames from a sequence of images
Args:
crt_i (int): current center index
max_n (int): max number of the sequence of images (calculated from 1)
N (int): reading N frames
padding (str): padding mode, one of replicate | reflection | new_info | circle
Example: crt_i = 0, N = 5
replicate: [0, 0, 0, 1, 2]
reflection: [2, 1, 0, 1, 2]
new_info: [4, 3, 0, 1, 2]
circle: [3, 4, 0, 1, 2]
Returns:
return_l (list [int]): a list of indexes
"""
max_n = max_n - 1
n_pad = N // 2
return_l = []
for i in range(crt_i - n_pad, crt_i + n_pad + 1):
if i < 0:
if padding == 'replicate':
add_idx = 0
elif padding == 'reflection':
add_idx = -i
elif padding == 'new_info':
add_idx = (crt_i + n_pad) + (-i)
elif padding == 'circle':
add_idx = N + i
else:
raise ValueError('Wrong padding mode')
elif i > max_n:
if padding == 'replicate':
add_idx = max_n
elif padding == 'reflection':
add_idx = max_n * 2 - i
elif padding == 'new_info':
add_idx = (crt_i - n_pad) - (i - max_n)
elif padding == 'circle':
add_idx = i - N
else:
raise ValueError('Wrong padding mode')
else:
add_idx = i
return_l.append(add_idx)
# name_b = '{:08d}'.format(crt_i)
return return_l
class EDVRDataset:
def __init__(self, frame_paths):
self.frames = frame_paths
def __getitem__(self, index):
indexs = get_test_neighbor_frames(index, 5, len(self.frames))
frame_list = []
for i in indexs:
img = read_img(self.frames[i])
frame_list.append(img)
img_LQs = np.stack(frame_list, axis=0)
# BGR to RGB, HWC to CHW, numpy to tensor
img_LQs = img_LQs[:, :, :, [2, 1, 0]]
img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32')
return img_LQs, self.frames[index]
def __len__(self):
return len(self.frames)
# examples of running programs:
# bash ./run.sh inference EDVR ./configs/edvr_L.yaml
# bash ./run.sh predict EDvR ./cofings/edvr_L.yaml
# configs should be ./configs/xxx.yaml
mode=$1
name=$2
configs=$3
save_inference_dir="./data/inference_model"
use_gpu=True
fix_random_seed=False
log_interval=1
valid_interval=1
weights="./weights/paddle_state_dict_L.npz"
export CUDA_VISIBLE_DEVICES=6 #0,1,5,6 fast, 2,3,4,7 slow
# export FLAGS_fast_eager_deletion_mode=1
# export FLAGS_eager_delete_tensor_gb=0.0
# export FLAGS_fraction_of_gpu_memory_to_use=0.98
if [ "$mode"x == "predict"x ]; then
echo $mode $name $configs $weights
if [ "$weights"x != ""x ]; then
python predict.py --model_name=$name \
--config=$configs \
--log_interval=$log_interval \
--video_path='' \
--use_gpu=$use_gpu
else
python predict.py --model_name=$name \
--config=$configs \
--log_interval=$log_interval \
--use_gpu=$use_gpu \
--video_path=''
fi
fi
dataset_params:
root_dir: data/vox-png
frame_shape: [256, 256, 3]
id_sampling: True
pairs_list: data/vox256.csv
augmentation_params:
flip_param:
horizontal_flip: True
time_flip: True
jitter_param:
brightness: 0.1
contrast: 0.1
saturation: 0.1
hue: 0.1
model_params:
common_params:
num_kp: 10
num_channels: 3
estimate_jacobian: True
kp_detector_params:
temperature: 0.1
block_expansion: 32
max_features: 1024
scale_factor: 0.25
num_blocks: 5
generator_params:
block_expansion: 64
max_features: 512
num_down_blocks: 2
num_bottleneck_blocks: 6
estimate_occlusion_map: True
dense_motion_params:
block_expansion: 64
max_features: 1024
num_blocks: 5
scale_factor: 0.25
discriminator_params:
scales: [1]
block_expansion: 32
max_features: 512
num_blocks: 4
sn: True
train_params:
num_epochs: 100
num_repeats: 75
epoch_milestones: [60, 90]
lr_generator: 2.0e-4
lr_discriminator: 2.0e-4
lr_kp_detector: 2.0e-4
batch_size: 40
scales: [1, 0.5, 0.25, 0.125]
checkpoint_freq: 50
transform_params:
sigma_affine: 0.05
sigma_tps: 0.005
points_tps: 5
loss_weights:
generator_gan: 0
discriminator_gan: 1
feature_matching: [10, 10, 10, 10]
perceptual: [10, 10, 10, 10, 10]
equivariance_value: 10
equivariance_jacobian: 10
reconstruction_params:
num_videos: 1000
format: '.mp4'
animate_params:
num_pairs: 50
format: '.mp4'
normalization_params:
adapt_movement_scale: False
use_relative_movement: True
use_relative_jacobian: True
visualizer_params:
kp_size: 5
draw_border: True
colormap: 'gist_rainbow'
# 模型说明
# 目前包含DAIN(插帧模型),DeOldify(上色模型),DeepRemaster(去噪与上色模型),EDVR(基于连续帧(视频)超分辨率模型),RealSR(基于图片的超分辨率模型)
# 参数说明
# input 输入视频的路径
# output 输出视频保存的路径
# proccess_order 要使用的模型及顺序
python tools/video-enhance.py \
--input input.mp4 --output output --proccess_order DeOldify RealSR
import matplotlib # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
matplotlib.use('Agg') #
import os #Licensed under the Apache License, Version 2.0 (the "License");
import sys #you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import argparse
import yaml
import pickle
from argparse import ArgumentParser
from tqdm import tqdm
import imageio
import numpy as np
from skimage.transform import resize
from skimage import img_as_ubyte
import paddle import paddle
from ppgan.apps.first_order_predictor import FirstOrderPredictor
from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator
from ppgan.modules.keypoint_detector import KPDetector parser = argparse.ArgumentParser()
from ppgan.utils.animate import normalize_kp parser.add_argument("--config", default=None, help="path to config")
from scipy.spatial import ConvexHull parser.add_argument("--weight_path",
default=None,
paddle.disable_static() help="path to checkpoint to restore")
parser.add_argument("--source_image", type=str, help="path to source image")
if sys.version_info[0] < 3: parser.add_argument("--driving_video", type=str, help="path to driving video")
raise Exception( parser.add_argument("--output", default='output', help="path to output")
"You must use Python 3 or higher. Recommended version is Python 3.7") parser.add_argument("--relative",
dest="relative",
action="store_true",
def load_checkpoints(config_path, checkpoint_path, cpu=False): help="use relative or absolute keypoint coordinates")
parser.add_argument(
with open(config_path) as f: "--adapt_scale",
config = yaml.load(f) dest="adapt_scale",
action="store_true",
generator = OcclusionAwareGenerator( help="adapt movement scale based on convex hull of keypoints")
**config['model_params']['generator_params'],
**config['model_params']['common_params']) parser.add_argument(
"--find_best_frame",
kp_detector = KPDetector(**config['model_params']['kp_detector_params'], dest="find_best_frame",
**config['model_params']['common_params']) action="store_true",
help=
checkpoint = pickle.load(open(checkpoint_path, 'rb')) "Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)"
generator.set_state_dict(checkpoint['generator']) )
kp_detector.set_state_dict(checkpoint['kp_detector']) parser.add_argument("--best_frame",
dest="best_frame",
generator.eval() type=int,
kp_detector.eval() default=None,
help="Set frame to start from.")
return generator, kp_detector parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.")
parser.set_defaults(relative=False)
def make_animation(source_image, parser.set_defaults(adapt_scale=False)
driving_video,
generator,
kp_detector,
relative=True,
adapt_movement_scale=True,
cpu=False):
with paddle.no_grad():
predictions = []
source = paddle.to_tensor(source_image[np.newaxis].astype(
np.float32)).transpose([0, 3, 1, 2])
# if not cpu:
# source = source.cuda()
driving = paddle.to_tensor(
np.array(driving_video)[np.newaxis].astype(np.float32)).transpose(
[0, 4, 1, 2, 3])
kp_source = kp_detector(source)
kp_driving_initial = kp_detector(driving[:, :, 0])
for frame_idx in tqdm(range(driving.shape[2])):
driving_frame = driving[:, :, frame_idx]
kp_driving = kp_detector(driving_frame)
kp_norm = normalize_kp(kp_source=kp_source,
kp_driving=kp_driving,
kp_driving_initial=kp_driving_initial,
use_relative_movement=relative,
use_relative_jacobian=relative,
adapt_movement_scale=adapt_movement_scale)
out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
predictions.append(
np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0])
return predictions
def find_best_frame(source, driving, cpu=False):
import face_alignment
def normalize_kp(kp):
kp = kp - kp.mean(axis=0, keepdims=True)
area = ConvexHull(kp[:, :2]).volume
area = np.sqrt(area)
kp[:, :2] = kp[:, :2] / area
return kp
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
flip_input=True,
device='cpu' if cpu else 'cuda')
kp_source = fa.get_landmarks(255 * source)[0]
kp_source = normalize_kp(kp_source)
norm = float('inf')
frame_num = 0
for i, image in tqdm(enumerate(driving)):
kp_driving = fa.get_landmarks(255 * image)[0]
kp_driving = normalize_kp(kp_driving)
new_norm = (np.abs(kp_source - kp_driving)**2).sum()
if new_norm < norm:
norm = new_norm
frame_num = i
return frame_num
if __name__ == "__main__": if __name__ == "__main__":
parser = ArgumentParser() args = parser.parse_args()
parser.add_argument("--config", required=True, help="path to config")
parser.add_argument("--checkpoint", if args.cpu:
default='vox-cpk.pth.tar', paddle.set_device('cpu')
help="path to checkpoint to restore")
predictor = FirstOrderPredictor(output=args.output,
parser.add_argument("--source_image", weight_path=args.weight_path,
default='sup-mat/source.png', config=args.config,
help="path to source image") relative=args.relative,
parser.add_argument("--driving_video", adapt_scale=args.adapt_scale,
default='sup-mat/source.png', find_best_frame=args.find_best_frame,
help="path to driving video") best_frame=args.best_frame)
parser.add_argument("--result_video", predictor.run(args.source_image, args.driving_video)
default='result.mp4',
help="path to output")
parser.add_argument("--relative",
dest="relative",
action="store_true",
help="use relative or absolute keypoint coordinates")
parser.add_argument(
"--adapt_scale",
dest="adapt_scale",
action="store_true",
help="adapt movement scale based on convex hull of keypoints")
parser.add_argument(
"--find_best_frame",
dest="find_best_frame",
action="store_true",
help=
"Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)"
)
parser.add_argument("--best_frame",
dest="best_frame",
type=int,
default=None,
help="Set frame to start from.")
parser.add_argument("--cpu",
dest="cpu",
action="store_true",
help="cpu mode.")
parser.set_defaults(relative=False)
parser.set_defaults(adapt_scale=False)
opt = parser.parse_args()
source_image = imageio.imread(opt.source_image)
reader = imageio.get_reader(opt.driving_video)
fps = reader.get_meta_data()['fps']
driving_video = []
try:
for im in reader:
driving_video.append(im)
except RuntimeError:
pass
reader.close()
source_image = resize(source_image, (256, 256))[..., :3]
driving_video = [
resize(frame, (256, 256))[..., :3] for frame in driving_video
]
generator, kp_detector = load_checkpoints(config_path=opt.config,
checkpoint_path=opt.checkpoint,
cpu=opt.cpu)
if opt.find_best_frame or opt.best_frame is not None:
i = opt.best_frame if opt.best_frame is not None else find_best_frame(
source_image, driving_video, cpu=opt.cpu)
print("Best frame: " + str(i))
driving_forward = driving_video[i:]
driving_backward = driving_video[:(i + 1)][::-1]
predictions_forward = make_animation(
source_image,
driving_forward,
generator,
kp_detector,
relative=opt.relative,
adapt_movement_scale=opt.adapt_scale,
cpu=opt.cpu)
predictions_backward = make_animation(
source_image,
driving_backward,
generator,
kp_detector,
relative=opt.relative,
adapt_movement_scale=opt.adapt_scale,
cpu=opt.cpu)
predictions = predictions_backward[::-1] + predictions_forward[1:]
else:
predictions = make_animation(source_image,
driving_video,
generator,
kp_detector,
relative=opt.relative,
adapt_movement_scale=opt.adapt_scale,
cpu=opt.cpu)
imageio.mimsave(opt.result_video,
[img_as_ubyte(frame) for frame in predictions],
fps=fps)
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import argparse
from pathlib import Path
from PIL import Image
from fire import Fire
import numpy as np
import paddle
import paddle.vision.transforms as T
import ppgan.faceutils as futils
from ppgan.utils.options import parse_args
from ppgan.utils.config import get_config
from ppgan.utils.setup import setup
from ppgan.utils.filesystem import load
from ppgan.engine.trainer import Trainer
from ppgan.models.builder import build_model
from ppgan.utils.preprocess import *
def toImage(net_output):
img = net_output.squeeze(0).transpose(
(1, 2, 0)).numpy() # [1,c,h,w]->[h,w,c]
img = (img * 255.0).clip(0, 255)
img = np.uint8(img)
img = Image.fromarray(img, mode='RGB')
return img
def mask2image(mask: np.array, format="HWC"):
H, W = mask.shape
canvas = np.zeros((H, W, 3), dtype=np.uint8)
for i in range(int(mask.max())):
color = np.random.rand(1, 1, 3) * 255
canvas += (mask == i)[:, :, None] * color.astype(np.uint8)
return canvas
class PreProcess:
def __init__(self, config, need_parser=True):
self.img_size = 256
self.transform = transform = T.Compose([
T.Resize(size=256),
T.Permute(to_rgb=False),
])
self.norm = T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])
if need_parser:
self.face_parser = futils.mask.FaceParser()
self.up_ratio = 0.6 / 0.85
self.down_ratio = 0.2 / 0.85
self.width_ratio = 0.2 / 0.85
def __call__(self, image):
face = futils.dlib.detect(image)
if not face:
return
face_on_image = face[0]
image, face, crop_face = futils.dlib.crop(image, face_on_image,
self.up_ratio,
self.down_ratio,
self.width_ratio)
np_image = np.array(image)
mask = self.face_parser.parse(
np.float32(cv2.resize(np_image, (512, 512))))
mask = cv2.resize(mask.numpy(), (self.img_size, self.img_size),
interpolation=cv2.INTER_NEAREST)
mask = mask.astype(np.uint8)
mask_color = mask2image(mask)
cv2.imwrite('mask_temp.png', mask_color)
mask_tensor = paddle.to_tensor(mask)
lms = futils.dlib.landmarks(image, face) * self.img_size / image.width
lms = lms.round()
P_np = generate_P_from_lmks(lms, self.img_size, self.img_size,
self.img_size)
mask_aug = generate_mask_aug(mask, lms)
image = self.transform(np_image)
return [
self.norm(image),
np.float32(mask_aug),
np.float32(P_np),
np.float32(mask)
], face_on_image, crop_face
class PostProcess:
def __init__(self, config):
self.denoise = True
self.img_size = 256
def __call__(self, source: Image, result: Image):
# TODO: Refract -> name, resize
source = np.array(source)
result = np.array(result)
height, width = source.shape[:2]
small_source = cv2.resize(source, (self.img_size, self.img_size))
laplacian_diff = source.astype(np.float) - cv2.resize(
small_source, (width, height)).astype(np.float)
result = (cv2.resize(result,
(width, height)) + laplacian_diff).round().clip(
0, 255).astype(np.uint8)
if self.denoise:
result = cv2.fastNlMeansDenoisingColored(result)
result = Image.fromarray(result).convert('RGB')
return result
class Inference:
def __init__(self, config, model_path=''):
self.model = build_model(config)
self.preprocess = PreProcess(config)
self.model_path = model_path
def transfer(self, source, reference, with_face=False):
source_input, face, crop_face = self.preprocess(source)
reference_input, face, crop_face = self.preprocess(reference)
consis_mask = np.float32(
calculate_consis_mask(source_input[1], reference_input[1]))
consis_mask = paddle.to_tensor(np.expand_dims(consis_mask, 0))
if not (source_input and reference_input):
if with_face:
return None, None
return
for i in range(len(source_input) - 1):
source_input[i] = paddle.to_tensor(
np.expand_dims(source_input[i], 0))
for i in range(len(reference_input) - 1):
reference_input[i] = paddle.to_tensor(
np.expand_dims(reference_input[i], 0))
input_data = {
'image_A': source_input[0],
'image_B': reference_input[0],
'mask_A_aug': source_input[1],
'mask_B_aug': reference_input[1],
'P_A': source_input[2],
'P_B': reference_input[2],
'consis_mask': consis_mask
}
state_dicts = load(self.model_path)
net = getattr(self.model, 'netG')
net.set_dict(state_dicts['netG'])
result, _ = self.model.test(input_data)
print('result shape: ', result.shape)
min_, max_ = result.min(), result.max()
result += -min_
result = paddle.divide(result, max_ - min_ + 1e-5)
img = toImage(result)
if with_face:
return img, crop_face
img.save('before.png')
return img
def main(args, cfg, save_path='transferred_image.png'):
setup(args, cfg)
inference = Inference(cfg, args.model_path)
postprocess = PostProcess(cfg)
source = Image.open(args.source_path).convert("RGB")
reference_paths = list(Path(args.reference_dir).glob("*"))
np.random.shuffle(reference_paths)
for reference_path in reference_paths:
if not reference_path.is_file():
print(reference_path, "is not a valid file.")
continue
reference = Image.open(reference_path).convert("RGB")
# Transfer the psgan from reference to source.
image, face = inference.transfer(source, reference, with_face=True)
image.save('before.png')
source_crop = source.crop(
(face.left(), face.top(), face.right(), face.bottom()))
image = postprocess(source_crop, image)
image.save(save_path)
if __name__ == '__main__':
args = parse_args()
cfg = get_config(args.config_file)
main(args, cfg)
import sys
sys.path.append('.')
import argparse
import paddle
from DAIN.predict import VideoFrameInterp
from DeepRemaster.predict import DeepReasterPredictor
from DeOldify.predict import DeOldifyPredictor
from RealSR.predict import RealSRPredictor
from EDVR.predict import EDVRPredictor
parser = argparse.ArgumentParser(description='Fix video')
parser.add_argument('--input', type=str, default=None, help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--DAIN_weight',
type=str,
default=None,
help='Path to model weight')
parser.add_argument('--DeepRemaster_weight',
type=str,
default=None,
help='Path to model weight')
parser.add_argument('--DeOldify_weight',
type=str,
default=None,
help='Path to model weight')
parser.add_argument('--RealSR_weight',
type=str,
default=None,
help='Path to model weight')
parser.add_argument('--EDVR_weight',
type=str,
default=None,
help='Path to model weight')
# DAIN args
parser.add_argument('--time_step',
type=float,
default=0.5,
help='choose the time steps')
# DeepRemaster args
parser.add_argument('--reference_dir',
type=str,
default=None,
help='Path to the reference image directory')
parser.add_argument('--colorization',
action='store_true',
default=False,
help='Remaster with colorization')
parser.add_argument('--mindim',
type=int,
default=360,
help='Length of minimum image edges')
# DeOldify args
parser.add_argument('--render_factor',
type=int,
default=32,
help='model inputsize=render_factor*16')
#process order support model name:[DAIN, DeepRemaster, DeOldify, RealSR, EDVR]
parser.add_argument('--proccess_order',
type=str,
default='none',
nargs='+',
help='Process order')
if __name__ == "__main__":
args = parser.parse_args()
orders = args.proccess_order
temp_video_path = None
for order in orders:
print('Model {} proccess start..'.format(order))
if temp_video_path is None:
temp_video_path = args.input
if order == 'DAIN':
predictor = VideoFrameInterp(args.time_step,
args.DAIN_weight,
temp_video_path,
output_path=args.output)
frames_path, temp_video_path = predictor.run()
elif order == 'DeepRemaster':
paddle.disable_static()
predictor = DeepReasterPredictor(
temp_video_path,
args.output,
weight_path=args.DeepRemaster_weight,
colorization=args.colorization,
reference_dir=args.reference_dir,
mindim=args.mindim)
frames_path, temp_video_path = predictor.run()
paddle.enable_static()
elif order == 'DeOldify':
paddle.disable_static()
predictor = DeOldifyPredictor(temp_video_path,
args.output,
weight_path=args.DeOldify_weight)
frames_path, temp_video_path = predictor.run()
paddle.enable_static()
elif order == 'RealSR':
paddle.disable_static()
predictor = RealSRPredictor(temp_video_path,
args.output,
weight_path=args.RealSR_weight)
frames_path, temp_video_path = predictor.run()
paddle.enable_static()
elif order == 'EDVR':
predictor = EDVRPredictor(temp_video_path,
args.output,
weight_path=args.EDVR_weight)
frames_path, temp_video_path = predictor.run()
print('Model {} output frames path:'.format(order), frames_path)
print('Model {} output video path:'.format(order), temp_video_path)
print('Model {} proccess done!'.format(order))
...@@ -36,16 +36,18 @@ dataset: ...@@ -36,16 +36,18 @@ dataset:
output_nc: 3 output_nc: 3
serial_batches: False serial_batches: False
pool_size: 50 pool_size: 50
transform: transforms:
load_size: 286 - name: Resize
crop_size: 256 size: [286, 286]
preprocess: resize_and_crop interpolation: 2 #cv2.INTER_CUBIC
no_flip: False - name: RandomCrop
normalize: output_size: [256, 256]
mean: - name: RandomHorizontalFlip
(127.5, 127.5, 127.5) prob: 0.5
std: - name: Permute
(127.5, 127.5, 127.5) - name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
test: test:
name: SingleDataset name: SingleDataset
dataroot: data/cityscapes/testB dataroot: data/cityscapes/testB
...@@ -55,17 +57,14 @@ dataset: ...@@ -55,17 +57,14 @@ dataset:
output_nc: 3 output_nc: 3
serial_batches: False serial_batches: False
pool_size: 50 pool_size: 50
transform: transforms:
load_size: 256 - name: Resize
crop_size: 256 size: [256, 256]
preprocess: resize_and_crop interpolation: 2 #cv2.INTER_CUBIC
no_flip: True - name: Permute
normalize: - name: Normalize
mean: mean: [127.5, 127.5, 127.5]
(127.5, 127.5, 127.5) std: [127.5, 127.5, 127.5]
std:
(127.5, 127.5, 127.5)
optimizer: optimizer:
name: Adam name: Adam
......
...@@ -35,16 +35,18 @@ dataset: ...@@ -35,16 +35,18 @@ dataset:
output_nc: 3 output_nc: 3
serial_batches: False serial_batches: False
pool_size: 50 pool_size: 50
transform: transforms:
load_size: 286 - name: Resize
crop_size: 256 size: [286, 286]
preprocess: resize_and_crop interpolation: 2 #cv2.INTER_CUBIC
no_flip: False - name: RandomCrop
normalize: output_size: [256, 256]
mean: - name: RandomHorizontalFlip
(127.5, 127.5, 127.5) prob: 0.5
std: - name: Permute
(127.5, 127.5, 127.5) - name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
test: test:
name: SingleDataset name: SingleDataset
dataroot: data/horse2zebra/testA dataroot: data/horse2zebra/testA
...@@ -55,15 +57,14 @@ dataset: ...@@ -55,15 +57,14 @@ dataset:
serial_batches: False serial_batches: False
pool_size: 50 pool_size: 50
transform: transform:
load_size: 256 transform:
crop_size: 256 - name: Resize
preprocess: resize_and_crop size: [256, 256]
no_flip: True interpolation: 2 #cv2.INTER_CUBIC
normalize: - name: Permute
mean: - name: Normalize
(127.5, 127.5, 127.5) mean: [127.5, 127.5, 127.5]
std: std: [127.5, 127.5, 127.5]
(127.5, 127.5, 127.5)
optimizer: optimizer:
name: Adam name: Adam
......
epochs: 100
isTrain: True
output_dir: tmp
checkpoints_dir: checkpoints
lambda_A: 10.0
lambda_B: 10.0
lambda_identity: 0.5
model:
name: MakeupModel
generator:
name: GeneratorPSGANAttention
conv_dim: 64
repeat_num: 6
discriminator:
name: NLayerDiscriminator
ndf: 64
n_layers: 3
input_nc: 3
norm_type: spectral
gan_mode: lsgan
dataset:
train:
name: MakeupDataset
trans_size: 256
dataroot: MT-Dataset
cls_list: [non-makeup, makeup]
phase: train
pool_size: 16
test:
name: MakeupDataset
trans_size: 256
dataroot: MT-Dataset
cls_list: [non-makeup, makeup]
phase: test
pool_size: 16
optimizer:
name: Adam
beta1: 0.5
lr_scheduler:
name: linear
learning_rate: 0.0002
start_epoch: 100
decay_epochs: 100
log_config:
interval: 10
visiual_interval: 500
snapshot_config:
interval: 1
...@@ -33,16 +33,23 @@ dataset: ...@@ -33,16 +33,23 @@ dataset:
output_nc: 3 output_nc: 3
serial_batches: False serial_batches: False
pool_size: 0 pool_size: 0
transform: transforms:
load_size: 286 - name: Resize
crop_size: 256 size: [286, 286]
preprocess: resize_and_crop interpolation: 2 #cv2.INTER_CUBIC
no_flip: False keys: [image, image]
normalize: - name: PairedRandomCrop
mean: output_size: [256, 256]
(127.5, 127.5, 127.5) keys: [image, image]
std: - name: PairedRandomHorizontalFlip
(127.5, 127.5, 127.5) prob: 0.5
keys: [image, image]
- name: Permute
keys: [image, image]
- name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
keys: [image, image]
test: test:
name: PairedDataset name: PairedDataset
dataroot: data/cityscapes/ dataroot: data/cityscapes/
...@@ -53,16 +60,18 @@ dataset: ...@@ -53,16 +60,18 @@ dataset:
output_nc: 3 output_nc: 3
serial_batches: True serial_batches: True
pool_size: 50 pool_size: 50
transform: transforms:
load_size: 256 - name: Resize
crop_size: 256 size: [256, 256]
preprocess: resize_and_crop interpolation: 2 #cv2.INTER_CUBIC
no_flip: True keys: [image, image]
normalize: - name: Permute
mean: keys: [image, image]
(127.5, 127.5, 127.5) - name: Normalize
std: mean: [127.5, 127.5, 127.5]
(127.5, 127.5, 127.5) std: [127.5, 127.5, 127.5]
keys: [image, image]
optimizer: optimizer:
name: Adam name: Adam
......
...@@ -32,16 +32,23 @@ dataset: ...@@ -32,16 +32,23 @@ dataset:
output_nc: 3 output_nc: 3
serial_batches: False serial_batches: False
pool_size: 0 pool_size: 0
transform: transforms:
load_size: 286 - name: Resize
crop_size: 256 size: [286, 286]
preprocess: resize_and_crop interpolation: 2 #cv2.INTER_CUBIC
no_flip: False keys: [image, image]
normalize: - name: PairedRandomCrop
mean: output_size: [256, 256]
(127.5, 127.5, 127.5) keys: [image, image]
std: - name: PairedRandomHorizontalFlip
(127.5, 127.5, 127.5) prob: 0.5
keys: [image, image]
- name: Permute
keys: [image, image]
- name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
keys: [image, image]
test: test:
name: PairedDataset name: PairedDataset
dataroot: data/cityscapes/ dataroot: data/cityscapes/
...@@ -52,16 +59,17 @@ dataset: ...@@ -52,16 +59,17 @@ dataset:
output_nc: 3 output_nc: 3
serial_batches: True serial_batches: True
pool_size: 50 pool_size: 50
transform: transforms:
load_size: 256 - name: Resize
crop_size: 256 size: [256, 256]
preprocess: resize_and_crop interpolation: 2 #cv2.INTER_CUBIC
no_flip: True keys: [image, image]
normalize: - name: Permute
mean: keys: [image, image]
(127.5, 127.5, 127.5) - name: Normalize
std: mean: [127.5, 127.5, 127.5]
(127.5, 127.5, 127.5) std: [127.5, 127.5, 127.5]
keys: [image, image]
optimizer: optimizer:
name: Adam name: Adam
......
...@@ -32,16 +32,23 @@ dataset: ...@@ -32,16 +32,23 @@ dataset:
output_nc: 3 output_nc: 3
serial_batches: False serial_batches: False
pool_size: 0 pool_size: 0
transform: transforms:
load_size: 286 - name: Resize
crop_size: 256 size: [286, 286]
preprocess: resize_and_crop interpolation: 2 #cv2.INTER_CUBIC
no_flip: False keys: [image, image]
normalize: - name: PairedRandomCrop
mean: output_size: [256, 256]
(127.5, 127.5, 127.5) keys: [image, image]
std: - name: PairedRandomHorizontalFlip
(127.5, 127.5, 127.5) prob: 0.5
keys: [image, image]
- name: Permute
keys: [image, image]
- name: Normalize
mean: [127.5, 127.5, 127.5]
std: [127.5, 127.5, 127.5]
keys: [image, image]
test: test:
name: PairedDataset name: PairedDataset
dataroot: data/facades/ dataroot: data/facades/
...@@ -52,16 +59,17 @@ dataset: ...@@ -52,16 +59,17 @@ dataset:
output_nc: 3 output_nc: 3
serial_batches: True serial_batches: True
pool_size: 50 pool_size: 50
transform: transforms:
load_size: 256 - name: Resize
crop_size: 256 size: [256, 256]
preprocess: resize_and_crop interpolation: 2 #cv2.INTER_CUBIC
no_flip: True keys: [image, image]
normalize: - name: Permute
mean: keys: [image, image]
(127.5, 127.5, 127.5) - name: Normalize
std: mean: [127.5, 127.5, 127.5]
(127.5, 127.5, 127.5) std: [127.5, 127.5, 127.5]
keys: [image, image]
optimizer: optimizer:
name: Adam name: Adam
......
from .dain_predictor import DAINPredictor
from .deepremaster_predictor import DeepRemasterPredictor
from .deoldify_predictor import DeOldifyPredictor
from .realsr_predictor import RealSRPredictor
from .edvr_predictor import EDVRPredictor
from .first_order_predictor import FirstOrderPredictor
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2
import paddle
class BasePredictor(object):
def __init__(self):
pass
def build_inference_model(self):
if paddle.in_dynamic_mode():
# todo self.model = build_model(self.cfg)
pass
else:
place = paddle.fluid.framework._current_expected_place()
self.exe = paddle.fluid.Executor(place)
file_names = os.listdir(self.weight_path)
for file_name in file_names:
if file_name.find('model') > -1:
model_file = file_name
elif file_name.find('param') > -1:
param_file = file_name
self.program, self.feed_names, self.fetch_targets = paddle.static.load_inference_model(
dirname=self.weight_path,
executor=self.exe,
model_filename=model_file,
params_filename=param_file)
print(self.feed_names)
def base_forward(self, inputs):
if paddle.in_dynamic_mode():
out = self.model(inputs)
else:
feed_dict = {}
if isinstance(inputs, dict):
feed_dict = inputs
elif isinstance(inputs, (list, tuple)):
for i, feed_name in enumerate(self.feed_names):
feed_dict[feed_name] = inputs[i]
else:
feed_dict[self.feed_names[0]] = inputs
out = self.exe.run(self.program,
fetch_list=self.fetch_targets,
feed=feed_dict)
return out
def is_video(self, input):
try:
cv2.VideoCapture(input)
return True
except:
return False
def run(self):
raise NotImplementedError
import os, sys # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
import math #
import random #Licensed under the Apache License, Version 2.0 (the "License");
import time #you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2
import glob import glob
import shutil import shutil
import numpy as np import numpy as np
from tqdm import tqdm
from imageio import imread, imsave from imageio import imread, imsave
import cv2
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.utils.download import get_path_from_url
from ppgan.utils.video import video2frames, frames2video
from .base_predictor import BasePredictor
DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar'
class DAINPredictor(BasePredictor):
def __init__(self,
output_path='output',
weight_path=None,
time_step=None,
use_gpu=True,
key_frame_thread=0.,
remove_duplicates=False):
self.output_path = os.path.join(output_path, 'DAIN')
if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path)
self.weight_path = weight_path
self.time_step = time_step
self.key_frame_thread = key_frame_thread
self.remove_duplicates = remove_duplicates
self.build_inference_model()
def run(self, video_path):
frame_path_input = os.path.join(self.output_path, 'frames-input')
frame_path_interpolated = os.path.join(self.output_path,
'frames-interpolated')
frame_path_combined = os.path.join(self.output_path, 'frames-combined')
video_path_output = os.path.join(self.output_path, 'videos-output')
if not os.path.exists(self.output_path):
os.makedirs(self.output_path)
if not os.path.exists(frame_path_input):
os.makedirs(frame_path_input)
if not os.path.exists(frame_path_interpolated):
os.makedirs(frame_path_interpolated)
if not os.path.exists(frame_path_combined):
os.makedirs(frame_path_combined)
if not os.path.exists(video_path_output):
os.makedirs(video_path_output)
timestep = self.time_step
num_frames = int(1.0 / timestep) - 1
import networks cap = cv2.VideoCapture(video_path)
from util import *
from my_args import args
if __name__ == '__main__':
DO_MiddleBurryOther = True
video_path = args.video_path
output_path = args.output_path
frame_path_input = os.path.join(output_path, 'frames-input')
frame_path_interpolated = os.path.join(output_path, 'frames-interpolated')
frame_path_combined = os.path.join(output_path, 'frames-combined')
video_path_input = os.path.join(output_path, 'videos-input')
video_path_output = os.path.join(output_path, 'videos-output')
if not os.path.exists(output_path):
os.makedirs(output_path)
if not os.path.exists(frame_path_input):
os.makedirs(frame_path_input)
if not os.path.exists(frame_path_interpolated):
os.makedirs(frame_path_interpolated)
if not os.path.exists(frame_path_combined):
os.makedirs(frame_path_combined)
if not os.path.exists(video_path_input):
os.makedirs(video_path_input)
if not os.path.exists(video_path_output):
os.makedirs(video_path_output)
args.KEY_FRAME_THREAD = 0.
saved_model = args.saved_model
timestep = args.time_step
num_frames = int(1.0 / timestep) - 1
image = fluid.data(name='image',
shape=[2, 1, args.channels, -1, -1],
dtype='float32')
DAIN = networks.__dict__["DAIN_slowmotion"](channel=args.channels,
filter_size=args.filter_size,
timestep=args.time_step,
training=False)
out = DAIN(image)
out = out[0][1]
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
fetch_list = [out.name]
inference_program = fluid.default_main_program().clone(for_test=True)
inference_program = fluid.io.load_persistables(exe, saved_model,
inference_program)
if not DO_MiddleBurryOther:
sys.exit()
if video_path.endswith('.mp4'):
videos = [video_path]
else:
videos = sorted(glob.glob(os.path.join(video_path, '*.mp4')))
for cnt, vid in enumerate(videos):
print("Interpolating video:", vid)
cap = cv2.VideoCapture(vid)
fps = cap.get(cv2.CAP_PROP_FPS) fps = cap.get(cv2.CAP_PROP_FPS)
print("Old fps (frame rate): ", fps) print("Old fps (frame rate): ", fps)
timestep = args.time_step
times_interp = int(1.0 / timestep) times_interp = int(1.0 / timestep)
r2 = str(int(fps) * times_interp) r2 = str(int(fps) * times_interp)
print("New fps (frame rate): ", r2) print("New fps (frame rate): ", r2)
# set start and end of video out_path = video2frames(video_path, frame_path_input)
#ss = 0
#t = 10
#ss = time.strftime('%H:%M:%S', time.gmtime(ss))
#t = time.strftime('%H:%M:%S', time.gmtime(t))
#print(r, ss, t)
r = None
ss = None
t = None
out_path = dump_frames_ffmpeg(vid, frame_path_input, r, ss, t) vidname = video_path.split('/')[-1].split('.')[0]
vidname = vid.split('/')[-1].split('.')[0]
tot_timer = AverageMeter()
proc_timer = AverageMeter()
end = time.time()
frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
orig_frames = len(frames)
need_frames = orig_frames * times_interp
if self.remove_duplicates:
frames = self.remove_duplicate_frames(out_path)
left_frames = len(frames)
timestep = left_frames / need_frames
num_frames = int(1.0 / timestep) - 1
img = imread(frames[0]) img = imread(frames[0])
...@@ -110,7 +99,7 @@ if __name__ == '__main__': ...@@ -110,7 +99,7 @@ if __name__ == '__main__':
int_height = img.shape[0] int_height = img.shape[0]
channel = img.shape[2] channel = img.shape[2]
if not channel == 3: if not channel == 3:
continue return
if int_width != ((int_width >> 7) << 7): if int_width != ((int_width >> 7) << 7):
int_width_pad = (((int_width >> 7) + 1) << 7) # more than necessary int_width_pad = (((int_width >> 7) + 1) << 7) # more than necessary
...@@ -132,16 +121,13 @@ if __name__ == '__main__': ...@@ -132,16 +121,13 @@ if __name__ == '__main__':
padding_bottom = 32 padding_bottom = 32
frame_num = len(frames) frame_num = len(frames)
print(os.path.join(frame_path_input, vidname, '*.png'))
print('processing {} frames, from video: {}'.format(frame_num, vid))
if not os.path.exists(os.path.join(frame_path_interpolated, vidname)): if not os.path.exists(os.path.join(frame_path_interpolated, vidname)):
os.makedirs(os.path.join(frame_path_interpolated, vidname)) os.makedirs(os.path.join(frame_path_interpolated, vidname))
if not os.path.exists(os.path.join(frame_path_combined, vidname)): if not os.path.exists(os.path.join(frame_path_combined, vidname)):
os.makedirs(os.path.join(frame_path_combined, vidname)) os.makedirs(os.path.join(frame_path_combined, vidname))
for i in range(frame_num - 1): for i in tqdm(range(frame_num - 1)):
print(frames[i])
first = frames[i] first = frames[i]
second = frames[i + 1] second = frames[i + 1]
...@@ -155,79 +141,116 @@ if __name__ == '__main__': ...@@ -155,79 +141,116 @@ if __name__ == '__main__':
img_second_gray = img_second_gray.flatten(order='C') img_second_gray = img_second_gray.flatten(order='C')
corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1] corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1]
key_frame = False key_frame = False
if corr < args.KEY_FRAME_THREAD: if corr < self.key_frame_thread:
key_frame = True key_frame = True
'''-------------------------------------------------------''' '''-------------------------------------------------------'''
X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255 X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255 X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255
if key_frame: assert (X0.shape[1] == X1.shape[1])
y_ = [ assert (X0.shape[2] == X1.shape[2])
np.transpose(255.0 * X0.clip(0, 1.0), (1, 2, 0))
for i in range(num_frames) X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
] (padding_left, padding_right)), mode='edge')
else: X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
assert (X0.shape[1] == X1.shape[1]) (padding_left, padding_right)), mode='edge')
assert (X0.shape[2] == X1.shape[2])
X0 = np.expand_dims(X0, axis=0)
print("size before padding ", X0.shape) X1 = np.expand_dims(X1, axis=0)
X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge') X0 = np.expand_dims(X0, axis=0)
X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \ X1 = np.expand_dims(X1, axis=0)
(padding_left, padding_right)), mode='edge')
print("size after padding ", X0.shape) X = np.concatenate((X0, X1), axis=0)
X0 = np.expand_dims(X0, axis=0) o = self.base_forward(X)
X1 = np.expand_dims(X1, axis=0)
y_ = o[0]
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0) y_ = [
np.transpose(
X = np.concatenate((X0, X1), axis=0) 255.0 * item.clip(
0, 1.0)[0, :, padding_top:padding_top + int_height,
proc_end = time.time() padding_left:padding_left + int_width],
o = exe.run(inference_program, (1, 2, 0)) for item in y_
fetch_list=fetch_list, ]
feed={"image": X}) time_offsets = [kk * timestep for kk in range(1, 1 + num_frames, 1)]
y_ = o[0]
count = 1
proc_timer.update(time.time() - proc_end) for item, time_offset in zip(y_, time_offsets):
tot_timer.update(time.time() - end) out_dir = os.path.join(frame_path_interpolated, vidname,
end = time.time() "{:0>6d}_{:0>4d}.png".format(i, count))
print("*******current image process time \t " + count = count + 1
str(time.time() - proc_end) + "s ******") imsave(out_dir, np.round(item).astype(np.uint8))
y_ = [
np.transpose(
255.0 * item.clip(
0, 1.0)[0, :, padding_top:padding_top + int_height,
padding_left:padding_left + int_width],
(1, 2, 0)) for item in y_
]
time_offsets = [
kk * timestep for kk in range(1, 1 + num_frames, 1)
]
count = 1
for item, time_offset in zip(y_, time_offsets):
out_dir = os.path.join(
frame_path_interpolated, vidname,
"{:0>4d}_{:0>4d}.png".format(i, count))
count = count + 1
imsave(out_dir, np.round(item).astype(np.uint8))
timestep = args.time_step
num_frames = int(1.0 / timestep) - 1 num_frames = int(1.0 / timestep) - 1
input_dir = os.path.join(frame_path_input, vidname) input_dir = os.path.join(frame_path_input, vidname)
interpolated_dir = os.path.join(frame_path_interpolated, vidname) interpolated_dir = os.path.join(frame_path_interpolated, vidname)
combined_dir = os.path.join(frame_path_combined, vidname) combined_dir = os.path.join(frame_path_combined, vidname)
combine_frames(input_dir, interpolated_dir, combined_dir, num_frames) self.combine_frames(input_dir, interpolated_dir, combined_dir,
num_frames)
frame_pattern_combined = os.path.join(frame_path_combined, vidname, frame_pattern_combined = os.path.join(frame_path_combined, vidname,
'%08d.png') '%08d.png')
video_pattern_output = os.path.join(video_path_output, vidname + '.mp4') video_pattern_output = os.path.join(video_path_output, vidname + '.mp4')
if os.path.exists(video_pattern_output): if os.path.exists(video_pattern_output):
os.remove(video_pattern_output) os.remove(video_pattern_output)
frames_to_video_ffmpeg(frame_pattern_combined, video_pattern_output, r2) frames2video(frame_pattern_combined, video_pattern_output, r2)
return frame_pattern_combined, video_pattern_output
def combine_frames(self, input, interpolated, combined, num_frames):
frames1 = sorted(glob.glob(os.path.join(input, '*.png')))
frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png')))
num1 = len(frames1)
num2 = len(frames2)
for i in range(num1):
src = frames1[i]
imgname = int(src.split('/')[-1].split('.')[-2])
assert i == imgname
dst = os.path.join(combined,
'{:08d}.png'.format(i * (num_frames + 1)))
shutil.copy2(src, dst)
if i < num1 - 1:
try:
for k in range(num_frames):
src = frames2[i * num_frames + k]
dst = os.path.join(
combined,
'{:08d}.png'.format(i * (num_frames + 1) + k + 1))
shutil.copy2(src, dst)
except Exception as e:
print(e)
def remove_duplicate_frames(self, paths):
def dhash(image, hash_size=8):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
resized = cv2.resize(gray, (hash_size + 1, hash_size))
diff = resized[:, 1:] > resized[:, :-1]
return sum([2**i for (i, v) in enumerate(diff.flatten()) if v])
hashes = {}
image_paths = sorted(glob.glob(os.path.join(paths, '*.png')))
for image_path in image_paths:
image = cv2.imread(image_path)
h = dhash(image)
p = hashes.get(h, [])
p.append(image_path)
hashes[h] = p
for (h, hashed_paths) in hashes.items():
if len(hashed_paths) > 1:
for p in hashed_paths[1:]:
os.remove(p)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
for fid, frame in enumerate(frames):
new_name = '{:08d}'.format(fid) + '.png'
new_name = os.path.join(paths, new_name)
os.rename(frame, new_name)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
return frames
import os # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
import sys #
#Licensed under the Apache License, Version 2.0 (the "License");
cur_path = os.path.abspath(os.path.dirname(__file__)) #you may not use this file except in compliance with the License.
sys.path.append(cur_path) #You may obtain a copy of the License at
#
import paddle # http://www.apache.org/licenses/LICENSE-2.0
import paddle.nn as nn #
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2 import cv2
from PIL import Image import subprocess
import numpy as np import numpy as np
from tqdm import tqdm from tqdm import tqdm
import argparse from PIL import Image
import subprocess from skimage import color
import utils
import paddle
from ppgan.models.generators.remaster import NetworkR, NetworkC from ppgan.models.generators.remaster import NetworkR, NetworkC
from paddle.utils.download import get_path_from_url from paddle.utils.download import get_path_from_url
from .base_predictor import BasePredictor
DEEPREMASTER_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/deep_remaster.pdparams' DEEPREMASTER_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/deep_remaster.pdparams'
parser = argparse.ArgumentParser(description='Remastering')
parser.add_argument('--input', type=str, default=None, help='Input video') def convertLAB2RGB(lab):
parser.add_argument('--output', type=str, default='output', help='output dir') lab[:, :, 0:1] = lab[:, :, 0:1] * 100 # [0, 1] -> [0, 100]
parser.add_argument('--reference_dir', lab[:, :, 1:3] = np.clip(lab[:, :, 1:3] * 255 - 128, -100,
type=str, 100) # [0, 1] -> [-128, 128]
default=None, rgb = color.lab2rgb(lab.astype(np.float64))
help='Path to the reference image directory') return rgb
parser.add_argument('--colorization',
action='store_true',
default=False, def convertRGB2LABTensor(rgb):
help='Remaster without colorization') lab = color.rgb2lab(
parser.add_argument('--mindim', np.asarray(rgb)) # RGB -> LAB L[0, 100] a[-127, 128] b[-128, 127]
type=int, ab = np.clip(lab[:, :, 1:3] + 128, 0, 255) # AB --> [0, 255]
default='360', ab = paddle.to_tensor(ab.astype('float32')) / 255.
help='Length of minimum image edges') L = lab[:, :, 0] * 2.55 # L --> [0, 255]
L = Image.fromarray(np.uint8(L))
class DeepReasterPredictor: L = paddle.to_tensor(np.array(L).astype('float32')[..., np.newaxis] / 255.0)
return L, ab
def addMergin(img, target_w, target_h, background_color=(0, 0, 0)):
width, height = img.size
if width == target_w and height == target_h:
return img
scale = max(target_w, target_h) / max(width, height)
width = int(width * scale / 16.) * 16
height = int(height * scale / 16.) * 16
img = img.resize((width, height), Image.BICUBIC)
xp = (target_w - width) // 2
yp = (target_h - height) // 2
result = Image.new(img.mode, (target_w, target_h), background_color)
result.paste(img, (xp, yp))
return result
class DeepRemasterPredictor(BasePredictor):
def __init__(self, def __init__(self,
input, output='output',
output,
weight_path=None, weight_path=None,
colorization=False, colorization=False,
reference_dir=None, reference_dir=None,
mindim=360): mindim=360):
self.input = input
self.output = os.path.join(output, 'DeepRemaster') self.output = os.path.join(output, 'DeepRemaster')
self.colorization = colorization self.colorization = colorization
self.reference_dir = reference_dir self.reference_dir = reference_dir
self.mindim = mindim self.mindim = mindim
if weight_path is None: if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(DEEPREMASTER_WEIGHT_URL, cur_path) weight_path = get_path_from_url(DEEPREMASTER_WEIGHT_URL, cur_path)
state_dict, _ = paddle.load(weight_path) self.weight_path = weight_path
state_dict = paddle.load(weight_path)
self.modelR = NetworkR() self.modelR = NetworkR()
self.modelR.load_dict(state_dict['modelR']) self.modelR.load_dict(state_dict['modelR'])
...@@ -63,7 +92,7 @@ class DeepReasterPredictor: ...@@ -63,7 +92,7 @@ class DeepReasterPredictor:
self.modelC.load_dict(state_dict['modelC']) self.modelC.load_dict(state_dict['modelC'])
self.modelC.eval() self.modelC.eval()
def run(self): def run(self, video_path):
outputdir = self.output outputdir = self.output
outputdir_in = os.path.join(outputdir, 'input/') outputdir_in = os.path.join(outputdir, 'input/')
os.makedirs(outputdir_in, exist_ok=True) os.makedirs(outputdir_in, exist_ok=True)
...@@ -94,9 +123,7 @@ class DeepReasterPredictor: ...@@ -94,9 +123,7 @@ class DeepReasterPredictor:
refimgs = [] refimgs = []
for i, v in enumerate(refs): for i, v in enumerate(refs):
refimg = utils.addMergin(v, refimg = addMergin(v, target_w=target_w, target_h=target_h)
target_w=target_w,
target_h=target_h)
refimg = np.array(refimg).astype('float32').transpose( refimg = np.array(refimg).astype('float32').transpose(
2, 0, 1) / 255.0 2, 0, 1) / 255.0
refimgs.append(refimg) refimgs.append(refimg)
...@@ -105,7 +132,7 @@ class DeepReasterPredictor: ...@@ -105,7 +132,7 @@ class DeepReasterPredictor:
refimgs = paddle.unsqueeze(refimgs, 0) refimgs = paddle.unsqueeze(refimgs, 0)
# Load video # Load video
cap = cv2.VideoCapture(self.input) cap = cv2.VideoCapture(video_path)
nframes = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) nframes = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
v_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH) v_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
v_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) v_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
...@@ -156,7 +183,7 @@ class DeepReasterPredictor: ...@@ -156,7 +183,7 @@ class DeepReasterPredictor:
elif nchannels == 3: elif nchannels == 3:
cv2.imwrite(outputdir_in + '%07d.png' % index, frame) cv2.imwrite(outputdir_in + '%07d.png' % index, frame)
frame = frame[:, :, ::-1] ## BGR -> RGB frame = frame[:, :, ::-1] ## BGR -> RGB
frame_l, frame_ab = utils.convertRGB2LABTensor(frame) frame_l, frame_ab = convertRGB2LABTensor(frame)
frame_l = frame_l.transpose([2, 0, 1]) frame_l = frame_l.transpose([2, 0, 1])
frame_ab = frame_ab.transpose([2, 0, 1]) frame_ab = frame_ab.transpose([2, 0, 1])
frame_l = frame_l.reshape([ frame_l = frame_l.reshape([
...@@ -193,7 +220,7 @@ class DeepReasterPredictor: ...@@ -193,7 +220,7 @@ class DeepReasterPredictor:
(out_l, out_ab), (out_l, out_ab),
axis=0).detach().numpy().transpose((1, 2, 0)) axis=0).detach().numpy().transpose((1, 2, 0))
out = Image.fromarray( out = Image.fromarray(
np.uint8(utils.convertLAB2RGB(out) * 255)) np.uint8(convertLAB2RGB(out) * 255))
out.save(outputdir_out + '%07d.png' % (index)) out.save(outputdir_out + '%07d.png' % (index))
else: else:
raise ValueError('channels of imag3 must be 3!') raise ValueError('channels of imag3 must be 3!')
...@@ -214,7 +241,7 @@ class DeepReasterPredictor: ...@@ -214,7 +241,7 @@ class DeepReasterPredictor:
output = paddle.concat( output = paddle.concat(
(out_l, out_c), axis=0).numpy().transpose((1, 2, 0)) (out_l, out_c), axis=0).numpy().transpose((1, 2, 0))
output = Image.fromarray( output = Image.fromarray(
np.uint8(utils.convertLAB2RGB(output) * 255)) np.uint8(convertLAB2RGB(output) * 255))
output.save(outputdir_out + '%07d.png' % index) output.save(outputdir_out + '%07d.png' % index)
it = it + 1 it = it + 1
...@@ -222,7 +249,7 @@ class DeepReasterPredictor: ...@@ -222,7 +249,7 @@ class DeepReasterPredictor:
# Save result videos # Save result videos
outfile = os.path.join(outputdir, outfile = os.path.join(outputdir,
self.input.split('/')[-1].split('.')[0]) video_path.split('/')[-1].split('.')[0])
cmd = 'ffmpeg -y -r %d -i %s%%07d.png -vcodec libx264 -pix_fmt yuv420p -r %d %s_in.mp4' % ( cmd = 'ffmpeg -y -r %d -i %s%%07d.png -vcodec libx264 -pix_fmt yuv420p -r %d %s_in.mp4' % (
fps, outputdir_in, fps, outfile) fps, outputdir_in, fps, outfile)
subprocess.call(cmd, shell=True) subprocess.call(cmd, shell=True)
...@@ -236,14 +263,3 @@ class DeepReasterPredictor: ...@@ -236,14 +263,3 @@ class DeepReasterPredictor:
cap.release() cap.release()
pbar.close() pbar.close()
return outputdir_out, '%s_out.mp4' % outfile return outputdir_out, '%s_out.mp4' % outfile
if __name__ == "__main__":
args = parser.parse_args()
paddle.disable_static()
predictor = DeepReasterPredictor(args.input,
args.output,
colorization=args.colorization,
reference_dir=args.reference_dir,
mindim=args.mindim)
predictor.run()
import os # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
import sys #
#Licensed under the Apache License, Version 2.0 (the "License");
cur_path = os.path.abspath(os.path.dirname(__file__)) #you may not use this file except in compliance with the License.
sys.path.append(cur_path) #You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2 import cv2
import glob import glob
import argparse
import numpy as np import numpy as np
import paddle
import pickle
from PIL import Image from PIL import Image
from tqdm import tqdm from tqdm import tqdm
from paddle import fluid
import paddle
from paddle.utils.download import get_path_from_url from paddle.utils.download import get_path_from_url
from ppgan.utils.video import frames2video, video2frames from ppgan.utils.video import frames2video, video2frames
from ppgan.models.generators.deoldify import build_model from ppgan.models.generators.deoldify import build_model
parser = argparse.ArgumentParser(description='DeOldify') from .base_predictor import BasePredictor
parser.add_argument('--input', type=str, default='none', help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--render_factor',
type=int,
default=32,
help='model inputsize=render_factor*16')
parser.add_argument('--weight_path',
type=str,
default=None,
help='Path to the reference image directory')
DEOLDIFY_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams' DEOLDIFY_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams'
class DeOldifyPredictor(): class DeOldifyPredictor(BasePredictor):
def __init__(self, def __init__(self, output='output', weight_path=None, render_factor=32):
input, # self.input = input
output,
batch_size=1,
weight_path=None,
render_factor=32):
self.input = input
self.output = os.path.join(output, 'DeOldify') self.output = os.path.join(output, 'DeOldify')
self.render_factor = render_factor self.render_factor = render_factor
self.model = build_model() self.model = build_model()
if weight_path is None: if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(DEOLDIFY_WEIGHT_URL, cur_path) weight_path = get_path_from_url(DEOLDIFY_WEIGHT_URL, cur_path)
state_dict, _ = paddle.load(weight_path) state_dict = paddle.load(weight_path)
self.model.load_dict(state_dict) self.model.load_dict(state_dict)
self.model.eval() self.model.eval()
...@@ -85,8 +77,14 @@ class DeOldifyPredictor(): ...@@ -85,8 +77,14 @@ class DeOldifyPredictor():
final = Image.fromarray(final) final = Image.fromarray(final)
return final return final
def run_single(self, img_path): def run_image(self, img):
ori_img = Image.open(img_path).convert('LA').convert('RGB') if isinstance(img, str):
ori_img = Image.open(img).convert('LA').convert('RGB')
elif isinstance(img, np.ndarray):
ori_img = Image.fromarray(img).convert('LA').convert('RGB')
elif isinstance(img, Image.Image):
ori_img = img
img = self.norm(ori_img, self.render_factor) img = self.norm(ori_img, self.render_factor)
x = paddle.to_tensor(img[np.newaxis, ...]) x = paddle.to_tensor(img[np.newaxis, ...])
out = self.model(x) out = self.model(x)
...@@ -97,9 +95,8 @@ class DeOldifyPredictor(): ...@@ -97,9 +95,8 @@ class DeOldifyPredictor():
pred_img = self.post_process(pred_img, ori_img) pred_img = self.post_process(pred_img, ori_img)
return pred_img return pred_img
def run(self): def run_video(self, video):
vid = self.input base_name = os.path.basename(video).split('.')[0]
base_name = os.path.basename(vid).split('.')[0]
output_path = os.path.join(self.output, base_name) output_path = os.path.join(self.output, base_name)
pred_frame_path = os.path.join(output_path, 'frames_pred') pred_frame_path = os.path.join(output_path, 'frames_pred')
...@@ -109,15 +106,15 @@ class DeOldifyPredictor(): ...@@ -109,15 +106,15 @@ class DeOldifyPredictor():
if not os.path.exists(pred_frame_path): if not os.path.exists(pred_frame_path):
os.makedirs(pred_frame_path) os.makedirs(pred_frame_path)
cap = cv2.VideoCapture(vid) cap = cv2.VideoCapture(video)
fps = cap.get(cv2.CAP_PROP_FPS) fps = cap.get(cv2.CAP_PROP_FPS)
out_path = video2frames(vid, output_path) out_path = video2frames(video, output_path)
frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
for frame in tqdm(frames): for frame in tqdm(frames):
pred_img = self.run_single(frame) pred_img = self.run_image(frame)
frame_name = os.path.basename(frame) frame_name = os.path.basename(frame)
pred_img.save(os.path.join(pred_frame_path, frame_name)) pred_img.save(os.path.join(pred_frame_path, frame_name))
...@@ -130,15 +127,14 @@ class DeOldifyPredictor(): ...@@ -130,15 +127,14 @@ class DeOldifyPredictor():
return frame_pattern_combined, vid_out_path return frame_pattern_combined, vid_out_path
def run(self, input):
if self.is_video(input):
return self.run_video(input)
else:
pred_img = self.run_image(input)
if __name__ == '__main__': if self.output:
paddle.disable_static() base_name = os.path.basename(input)
args = parser.parse_args() pred_img.save(os.path.join(self.output, base_name + '.png'))
predictor = DeOldifyPredictor(args.input,
args.output,
weight_path=args.weight_path,
render_factor=args.render_factor)
frames_path, temp_video_path = predictor.run()
print('output video path:', temp_video_path) return pred_img
...@@ -13,44 +13,18 @@ ...@@ -13,44 +13,18 @@
#limitations under the License. #limitations under the License.
import os import os
import sys import cv2
cur_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(cur_path)
import time import time
import argparse
import ast
import glob import glob
import numpy as np import numpy as np
import paddle.fluid as fluid
import cv2
from tqdm import tqdm from tqdm import tqdm
from data import EDVRDataset
from paddle.utils.download import get_path_from_url from paddle.utils.download import get_path_from_url
from ppgan.utils.video import frames2video, video2frames from ppgan.utils.video import frames2video, video2frames
EDVR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar' from .base_predictor import BasePredictor
def parse_args(): EDVR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar'
parser = argparse.ArgumentParser()
parser.add_argument('--input',
type=str,
default=None,
help='input video path')
parser.add_argument('--output',
type=str,
default='output',
help='output path')
parser.add_argument('--weight_path',
type=str,
default=None,
help='weight path')
args = parser.parse_args()
return args
def get_img(pred): def get_img(pred):
...@@ -72,29 +46,107 @@ def save_img(img, framename): ...@@ -72,29 +46,107 @@ def save_img(img, framename):
cv2.imwrite(framename, img) cv2.imwrite(framename, img)
class EDVRPredictor: def read_img(path, size=None, is_gt=False):
def __init__(self, input, output, weight_path=None): """read image by cv2
return: Numpy float32, HWC, BGR, [0,1]"""
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
img = img.astype(np.float32) / 255.
if img.ndim == 2:
img = np.expand_dims(img, axis=2)
if img.shape[2] > 3:
img = img[:, :, :3]
return img
def get_test_neighbor_frames(crt_i, N, max_n, padding='new_info'):
"""Generate an index list for reading N frames from a sequence of images
Args:
crt_i (int): current center index
max_n (int): max number of the sequence of images (calculated from 1)
N (int): reading N frames
padding (str): padding mode, one of replicate | reflection | new_info | circle
Example: crt_i = 0, N = 5
replicate: [0, 0, 0, 1, 2]
reflection: [2, 1, 0, 1, 2]
new_info: [4, 3, 0, 1, 2]
circle: [3, 4, 0, 1, 2]
Returns:
return_l (list [int]): a list of indexes
"""
max_n = max_n - 1
n_pad = N // 2
return_l = []
for i in range(crt_i - n_pad, crt_i + n_pad + 1):
if i < 0:
if padding == 'replicate':
add_idx = 0
elif padding == 'reflection':
add_idx = -i
elif padding == 'new_info':
add_idx = (crt_i + n_pad) + (-i)
elif padding == 'circle':
add_idx = N + i
else:
raise ValueError('Wrong padding mode')
elif i > max_n:
if padding == 'replicate':
add_idx = max_n
elif padding == 'reflection':
add_idx = max_n * 2 - i
elif padding == 'new_info':
add_idx = (crt_i - n_pad) - (i - max_n)
elif padding == 'circle':
add_idx = i - N
else:
raise ValueError('Wrong padding mode')
else:
add_idx = i
return_l.append(add_idx)
return return_l
class EDVRDataset:
def __init__(self, frame_paths):
self.frames = frame_paths
def __getitem__(self, index):
indexs = get_test_neighbor_frames(index, 5, len(self.frames))
frame_list = []
for i in indexs:
img = read_img(self.frames[i])
frame_list.append(img)
img_LQs = np.stack(frame_list, axis=0)
# BGR to RGB, HWC to CHW, numpy to tensor
img_LQs = img_LQs[:, :, :, [2, 1, 0]]
img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32')
return img_LQs, self.frames[index]
def __len__(self):
return len(self.frames)
class EDVRPredictor(BasePredictor):
def __init__(self, output='output', weight_path=None):
self.input = input self.input = input
self.output = os.path.join(output, 'EDVR') self.output = os.path.join(output, 'EDVR')
place = fluid.CUDAPlace(
0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
self.exe = fluid.Executor(place)
if weight_path is None: if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(EDVR_WEIGHT_URL, cur_path) weight_path = get_path_from_url(EDVR_WEIGHT_URL, cur_path)
model_filename = 'EDVR_model.pdmodel' self.weight_path = weight_path
params_filename = 'EDVR_params.pdparams'
out = fluid.io.load_inference_model(dirname=weight_path, self.build_inference_model()
model_filename=model_filename,
params_filename=params_filename,
executor=self.exe)
self.infer_prog, self.feed_list, self.fetch_list = out
def run(self): def run(self, video_path):
vid = self.input vid = video_path
base_name = os.path.basename(vid).split('.')[0] base_name = os.path.basename(vid).split('.')[0]
output_path = os.path.join(self.output, base_name) output_path = os.path.join(self.output, base_name)
pred_frame_path = os.path.join(output_path, 'frames_pred') pred_frame_path = os.path.join(output_path, 'frames_pred')
...@@ -119,11 +171,9 @@ class EDVRPredictor: ...@@ -119,11 +171,9 @@ class EDVRPredictor:
for infer_iter, data in enumerate(tqdm(dataset)): for infer_iter, data in enumerate(tqdm(dataset)):
data_feed_in = [data[0]] data_feed_in = [data[0]]
infer_outs = self.exe.run( outs = self.base_forward(np.array(data_feed_in))
self.infer_prog,
fetch_list=self.fetch_list, infer_result_list = [item for item in outs]
feed={self.feed_list[0]: np.array(data_feed_in)})
infer_result_list = [item for item in infer_outs]
frame_path = data[1] frame_path = data[1]
...@@ -144,9 +194,3 @@ class EDVRPredictor: ...@@ -144,9 +194,3 @@ class EDVRPredictor:
frames2video(frame_pattern_combined, vid_out_path, str(int(fps))) frames2video(frame_pattern_combined, vid_out_path, str(int(fps)))
return frame_pattern_combined, vid_out_path return frame_pattern_combined, vid_out_path
if __name__ == "__main__":
args = parse_args()
predictor = EDVRPredictor(args.input, args.output, args.weight_path)
predictor.run()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import sys
import yaml
import pickle
import imageio
import numpy as np
from tqdm import tqdm
from skimage import img_as_ubyte
from skimage.transform import resize
from scipy.spatial import ConvexHull
import paddle
from paddle.utils.download import get_path_from_url
from ppgan.utils.animate import normalize_kp
from ppgan.modules.keypoint_detector import KPDetector
from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator
from .base_predictor import BasePredictor
class FirstOrderPredictor(BasePredictor):
def __init__(self,
output='output',
weight_path=None,
config=None,
relative=False,
adapt_scale=False,
find_best_frame=False,
best_frame=None):
if config is not None and isinstance(config, str):
self.cfg = yaml.load(config)
elif isinstance(config, dict):
self.cfg = config
elif config is None:
self.cfg = {
'model_params': {
'common_params': {
'num_kp': 10,
'num_channels': 3,
'estimate_jacobian': True
},
'kp_detector_params': {
'temperature': 0.1,
'block_expansion': 32,
'max_features': 1024,
'scale_factor': 0.25,
'num_blocks': 5
},
'generator_params': {
'block_expansion': 64,
'max_features': 512,
'num_down_blocks': 2,
'num_bottleneck_blocks': 6,
'estimate_occlusion_map': True,
'dense_motion_params': {
'block_expansion': 64,
'max_features': 1024,
'num_blocks': 5,
'scale_factor': 0.25
}
}
}
}
if weight_path is None:
vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-cpk.pdparams'
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(vox_cpk_weight_url, cur_path)
self.weight_path = weight_path
self.output = output
self.relative = relative
self.adapt_scale = adapt_scale
self.find_best_frame = find_best_frame
self.best_frame = best_frame
self.generator, self.kp_detector = self.load_checkpoints(
self.cfg, self.weight_path)
def run(self, source_image, driving_video):
source_image = imageio.imread(source_image)
reader = imageio.get_reader(driving_video)
fps = reader.get_meta_data()['fps']
driving_video = []
try:
for im in reader:
driving_video.append(im)
except RuntimeError:
pass
reader.close()
source_image = resize(source_image, (256, 256))[..., :3]
driving_video = [
resize(frame, (256, 256))[..., :3] for frame in driving_video
]
if self.find_best_frame or self.best_frame is not None:
i = self.best_frame if self.best_frame is not None else self.find_best_frame_func(
source_image, driving_video)
print("Best frame: " + str(i))
driving_forward = driving_video[i:]
driving_backward = driving_video[:(i + 1)][::-1]
predictions_forward = self.make_animation(
source_image,
driving_forward,
self.generator,
self.kp_detector,
relative=self.relative,
adapt_movement_scale=self.adapt_scale)
predictions_backward = self.make_animation(
source_image,
driving_backward,
self.generator,
self.kp_detector,
relative=self.relative,
adapt_movement_scale=self.adapt_scale)
predictions = predictions_backward[::-1] + predictions_forward[1:]
else:
predictions = self.make_animation(
source_image,
driving_video,
self.generator,
self.kp_detector,
relative=self.relative,
adapt_movement_scale=self.adapt_scale)
imageio.mimsave(os.path.join(self.output, 'result.mp4'),
[img_as_ubyte(frame) for frame in predictions],
fps=fps)
def load_checkpoints(self, config, checkpoint_path):
generator = OcclusionAwareGenerator(
**config['model_params']['generator_params'],
**config['model_params']['common_params'])
kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
**config['model_params']['common_params'])
checkpoint = paddle.load(self.weight_path)
generator.set_state_dict(checkpoint['generator'])
kp_detector.set_state_dict(checkpoint['kp_detector'])
generator.eval()
kp_detector.eval()
return generator, kp_detector
def make_animation(self,
source_image,
driving_video,
generator,
kp_detector,
relative=True,
adapt_movement_scale=True):
with paddle.no_grad():
predictions = []
source = paddle.to_tensor(source_image[np.newaxis].astype(
np.float32)).transpose([0, 3, 1, 2])
driving = paddle.to_tensor(
np.array(driving_video)[np.newaxis].astype(
np.float32)).transpose([0, 4, 1, 2, 3])
kp_source = kp_detector(source)
kp_driving_initial = kp_detector(driving[:, :, 0])
for frame_idx in tqdm(range(driving.shape[2])):
driving_frame = driving[:, :, frame_idx]
kp_driving = kp_detector(driving_frame)
kp_norm = normalize_kp(
kp_source=kp_source,
kp_driving=kp_driving,
kp_driving_initial=kp_driving_initial,
use_relative_movement=relative,
use_relative_jacobian=relative,
adapt_movement_scale=adapt_movement_scale)
out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
predictions.append(
np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0])
return predictions
def find_best_frame_func(self, source, driving):
import face_alignment
def normalize_kp(kp):
kp = kp - kp.mean(axis=0, keepdims=True)
area = ConvexHull(kp[:, :2]).volume
area = np.sqrt(area)
kp[:, :2] = kp[:, :2] / area
return kp
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
flip_input=True)
kp_source = fa.get_landmarks(255 * source)[0]
kp_source = normalize_kp(kp_source)
norm = float('inf')
frame_num = 0
for i, image in tqdm(enumerate(driving)):
kp_driving = fa.get_landmarks(255 * image)[0]
kp_driving = normalize_kp(kp_driving)
new_norm = (np.abs(kp_source - kp_driving)**2).sum()
if new_norm < norm:
norm = new_norm
frame_num = i
return frame_num
import os # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
import sys #
#Licensed under the Apache License, Version 2.0 (the "License");
cur_path = os.path.abspath(os.path.dirname(__file__)) #you may not use this file except in compliance with the License.
sys.path.append(cur_path) #You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import cv2 import cv2
import glob import glob
import argparse
import numpy as np import numpy as np
import paddle
import pickle
from PIL import Image from PIL import Image
from tqdm import tqdm from tqdm import tqdm
import paddle
from ppgan.models.generators import RRDBNet from ppgan.models.generators import RRDBNet
from ppgan.utils.video import frames2video, video2frames from ppgan.utils.video import frames2video, video2frames
from paddle.utils.download import get_path_from_url from paddle.utils.download import get_path_from_url
from .base_predictor import BasePredictor
parser = argparse.ArgumentParser(description='RealSR')
parser.add_argument('--input', type=str, default='none', help='Input video')
parser.add_argument('--output', type=str, default='output', help='output dir')
parser.add_argument('--weight_path',
type=str,
default=None,
help='Path to the reference image directory')
REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DF2K_JPEG.pdparams' REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DF2K_JPEG.pdparams'
class RealSRPredictor(): class RealSRPredictor(BasePredictor):
def __init__(self, input, output, batch_size=1, weight_path=None): def __init__(self, output='output', weight_path=None):
self.input = input self.input = input
self.output = os.path.join(output, 'RealSR') self.output = os.path.join(output, 'RealSR')
self.model = RRDBNet(3, 3, 64, 23) self.model = RRDBNet(3, 3, 64, 23)
if weight_path is None: if weight_path is None:
cur_path = os.path.abspath(os.path.dirname(__file__))
weight_path = get_path_from_url(REALSR_WEIGHT_URL, cur_path) weight_path = get_path_from_url(REALSR_WEIGHT_URL, cur_path)
state_dict, _ = paddle.load(weight_path) state_dict = paddle.load(weight_path)
self.model.load_dict(state_dict) self.model.load_dict(state_dict)
self.model.eval() self.model.eval()
...@@ -49,8 +49,14 @@ class RealSRPredictor(): ...@@ -49,8 +49,14 @@ class RealSRPredictor():
img = img.transpose((1, 2, 0)) img = img.transpose((1, 2, 0))
return (img * 255).clip(0, 255).astype('uint8') return (img * 255).clip(0, 255).astype('uint8')
def run_single(self, img_path): def run_image(self, img):
ori_img = Image.open(img_path).convert('RGB') if isinstance(img, str):
ori_img = Image.open(img).convert('RGB')
elif isinstance(img, np.ndarray):
ori_img = Image.fromarray(img).convert('RGB')
elif isinstance(img, Image.Image):
ori_img = img
img = self.norm(ori_img) img = self.norm(ori_img)
x = paddle.to_tensor(img[np.newaxis, ...]) x = paddle.to_tensor(img[np.newaxis, ...])
out = self.model(x) out = self.model(x)
...@@ -59,9 +65,8 @@ class RealSRPredictor(): ...@@ -59,9 +65,8 @@ class RealSRPredictor():
pred_img = Image.fromarray(pred_img) pred_img = Image.fromarray(pred_img)
return pred_img return pred_img
def run(self): def run_video(self, video):
vid = self.input base_name = os.path.basename(video).split('.')[0]
base_name = os.path.basename(vid).split('.')[0]
output_path = os.path.join(self.output, base_name) output_path = os.path.join(self.output, base_name)
pred_frame_path = os.path.join(output_path, 'frames_pred') pred_frame_path = os.path.join(output_path, 'frames_pred')
...@@ -71,15 +76,15 @@ class RealSRPredictor(): ...@@ -71,15 +76,15 @@ class RealSRPredictor():
if not os.path.exists(pred_frame_path): if not os.path.exists(pred_frame_path):
os.makedirs(pred_frame_path) os.makedirs(pred_frame_path)
cap = cv2.VideoCapture(vid) cap = cv2.VideoCapture(video)
fps = cap.get(cv2.CAP_PROP_FPS) fps = cap.get(cv2.CAP_PROP_FPS)
out_path = video2frames(vid, output_path) out_path = video2frames(video, output_path)
frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
for frame in tqdm(frames): for frame in tqdm(frames):
pred_img = self.run_single(frame) pred_img = self.run_image(frame)
frame_name = os.path.basename(frame) frame_name = os.path.basename(frame)
pred_img.save(os.path.join(pred_frame_path, frame_name)) pred_img.save(os.path.join(pred_frame_path, frame_name))
...@@ -92,14 +97,14 @@ class RealSRPredictor(): ...@@ -92,14 +97,14 @@ class RealSRPredictor():
return frame_pattern_combined, vid_out_path return frame_pattern_combined, vid_out_path
def run(self, input):
if self.is_video(input):
return self.run_video(input)
else:
pred_img = self.run_image(input)
if __name__ == '__main__': if self.output:
paddle.disable_static() base_name = os.path.basename(input)
args = parser.parse_args() pred_img.save(os.path.join(self.output, base_name + '.png'))
predictor = RealSRPredictor(args.input,
args.output,
weight_path=args.weight_path)
frames_path, temp_video_path = predictor.run()
print('output video path:', temp_video_path) return pred_img
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .unpaired_dataset import UnpairedDataset from .unpaired_dataset import UnpairedDataset
from .single_dataset import SingleDataset from .single_dataset import SingleDataset
from .paired_dataset import PairedDataset from .paired_dataset import PairedDataset
from .sr_image_dataset import SRImageDataset from .sr_image_dataset import SRImageDataset
\ No newline at end of file from .makeup_dataset import MakeupDataset
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import os.path
from .base_dataset import BaseDataset, get_transform
from .transforms.makeup_transforms import get_makeup_transform
import paddle.vision.transforms as T
from PIL import Image
import random
import numpy as np
from ..utils.preprocess import *
from .builder import DATASETS
@DATASETS.register()
class MakeupDataset(BaseDataset):
def __init__(self, cfg):
"""Initialize this dataset class.
Parameters:
opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
"""
BaseDataset.__init__(self, cfg)
self.image_path = cfg.dataroot
self.mode = cfg.phase
self.transform = get_makeup_transform(cfg)
self.norm = T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])
self.transform_mask = get_makeup_transform(cfg, pic="mask")
self.trans_size = cfg.trans_size
self.cls_list = cfg.cls_list
self.cls_A = self.cls_list[0]
self.cls_B = self.cls_list[1]
for cls in self.cls_list:
setattr(
self, cls + "_list_path",
os.path.join(self.image_path, self.mode + '_' + cls + ".txt"))
setattr(self, cls + "_lines",
open(getattr(self, cls + "_list_path"), 'r').readlines())
setattr(self, "num_of_" + cls + "_data",
len(getattr(self, cls + "_lines")))
print('Start preprocessing dataset..!')
self.preprocess()
print('Finished preprocessing dataset..!')
def preprocess(self):
"""preprocess image"""
for cls in self.cls_list:
setattr(self, cls + "_filenames", [])
setattr(self, cls + "_mask_filenames", [])
setattr(self, cls + "_lmks_filenames", [])
lines = getattr(self, cls + "_lines")
random.shuffle(lines)
for i, line in enumerate(lines):
splits = line.split()
getattr(self, cls + "_filenames").append(splits[0])
getattr(self, cls + "_mask_filenames").append(splits[1])
getattr(self, cls + "_lmks_filenames").append(splits[2])
def __getitem__(self, index):
"""Return MANet and MDNet needed params.
Parameters:
index (int) -- a random integer for data indexing
Returns a dictionary that contains needed params.
"""
try:
index_A = random.randint(
0, getattr(self, "num_of_" + self.cls_A + "_data"))
index_B = random.randint(
0, getattr(self, "num_of_" + self.cls_B + "_data"))
if self.mode == 'test':
num_b = getattr(self, 'num_of_' + self.cls_list[1] + '_data')
index_A = int(index / num_b)
index_B = int(index % num_b)
image_A = Image.open(
os.path.join(self.image_path,
getattr(self, self.cls_A +
"_filenames")[index_A])).convert("RGB")
image_B = Image.open(
os.path.join(self.image_path,
getattr(self, self.cls_B +
"_filenames")[index_B])).convert("RGB")
mask_A = np.array(
Image.open(
os.path.join(
self.image_path,
getattr(self,
self.cls_A + "_mask_filenames")[index_A])))
mask_B = np.array(
Image.open(
os.path.join(
self.image_path,
getattr(self, self.cls_B +
"_mask_filenames")[index_B])).convert('L'))
image_A = np.array(image_A)
image_B = np.array(image_B)
image_A = self.transform(image_A)
image_B = self.transform(image_B)
mask_A = cv2.resize(mask_A, (256, 256),
interpolation=cv2.INTER_NEAREST)
mask_B = cv2.resize(mask_B, (256, 256),
interpolation=cv2.INTER_NEAREST)
lmks_A = np.loadtxt(
os.path.join(
self.image_path,
getattr(self, self.cls_A + "_lmks_filenames")[index_A]))
lmks_B = np.loadtxt(
os.path.join(
self.image_path,
getattr(self, self.cls_B + "_lmks_filenames")[index_B]))
lmks_A = lmks_A / image_A.shape[:2] * self.trans_size
lmks_B = lmks_B / image_B.shape[:2] * self.trans_size
P_A = generate_P_from_lmks(lmks_A, self.trans_size,
image_A.shape[0], image_A.shape[1])
P_B = generate_P_from_lmks(lmks_B, self.trans_size,
image_B.shape[0], image_B.shape[1])
mask_A_aug = generate_mask_aug(mask_A, lmks_A)
mask_B_aug = generate_mask_aug(mask_B, lmks_B)
consis_mask = calculate_consis_mask(mask_A_aug, mask_B_aug)
consis_mask_idt_A = calculate_consis_mask(mask_A_aug, mask_A_aug)
consis_mask_idt_B = calculate_consis_mask(mask_A_aug, mask_B_aug)
except Exception as e:
print(e)
return self.__getitem__(index + 1)
return {
'image_A': self.norm(image_A),
'image_B': self.norm(image_B),
'mask_A': np.float32(mask_A),
'mask_B': np.float32(mask_B),
'consis_mask': np.float32(consis_mask),
'P_A': np.float32(P_A),
'P_B': np.float32(P_B),
'consis_mask_idt_A': np.float32(consis_mask_idt_A),
'consis_mask_idt_B': np.float32(consis_mask_idt_B),
'mask_A_aug': np.float32(mask_A_aug),
'mask_B_aug': np.float32(mask_B_aug)
}
def __len__(self):
"""Return the total number of images in the dataset.
As we have two datasets with potentially different number of images,
we take a maximum of
"""
if self.mode == 'train':
num_A = getattr(self, 'num_of_' + self.cls_list[0] + '_data')
num_B = getattr(self, 'num_of_' + self.cls_list[1] + '_data')
return max(num_A, num_B)
elif self.mode == "test":
num_A = getattr(self, 'num_of_' + self.cls_list[0] + '_data')
num_B = getattr(self, 'num_of_' + self.cls_list[1] + '_data')
return num_A * num_B
return max(self.A_size, self.B_size)
...@@ -5,13 +5,13 @@ from .base_dataset import BaseDataset, get_params, get_transform ...@@ -5,13 +5,13 @@ from .base_dataset import BaseDataset, get_params, get_transform
from .image_folder import make_dataset from .image_folder import make_dataset
from .builder import DATASETS from .builder import DATASETS
from .transforms.builder import build_transforms
@DATASETS.register() @DATASETS.register()
class PairedDataset(BaseDataset): class PairedDataset(BaseDataset):
"""A dataset class for paired image dataset. """A dataset class for paired image dataset.
""" """
def __init__(self, cfg): def __init__(self, cfg):
"""Initialize this dataset class. """Initialize this dataset class.
...@@ -19,11 +19,14 @@ class PairedDataset(BaseDataset): ...@@ -19,11 +19,14 @@ class PairedDataset(BaseDataset):
cfg (dict) -- stores all the experiment flags cfg (dict) -- stores all the experiment flags
""" """
BaseDataset.__init__(self, cfg) BaseDataset.__init__(self, cfg)
self.dir_AB = os.path.join(cfg.dataroot, cfg.phase) # get the image directory self.dir_AB = os.path.join(cfg.dataroot,
self.AB_paths = sorted(make_dataset(self.dir_AB, cfg.max_dataset_size)) # get image paths cfg.phase) # get the image directory
assert(self.cfg.transform.load_size >= self.cfg.transform.crop_size) # crop_size should be smaller than the size of loaded image self.AB_paths = sorted(make_dataset(
self.dir_AB, cfg.max_dataset_size)) # get image paths
self.input_nc = self.cfg.output_nc if self.cfg.direction == 'BtoA' else self.cfg.input_nc self.input_nc = self.cfg.output_nc if self.cfg.direction == 'BtoA' else self.cfg.input_nc
self.output_nc = self.cfg.input_nc if self.cfg.direction == 'BtoA' else self.cfg.output_nc self.output_nc = self.cfg.input_nc if self.cfg.direction == 'BtoA' else self.cfg.output_nc
self.transforms = build_transforms(cfg.transforms)
def __getitem__(self, index): def __getitem__(self, index):
"""Return a data point and its metadata information. """Return a data point and its metadata information.
...@@ -49,27 +52,11 @@ class PairedDataset(BaseDataset): ...@@ -49,27 +52,11 @@ class PairedDataset(BaseDataset):
A = AB[:h, :w2, :] A = AB[:h, :w2, :]
B = AB[:h, w2:, :] B = AB[:h, w2:, :]
# apply the same transform to both A and B # apply the same transform to both A and B
# transform_params = get_params(self.opt, A.size) A, B = self.transforms((A, B))
transform_params = get_params(self.cfg.transform, (w2, h))
A_transform = get_transform(self.cfg.transform, transform_params, grayscale=(self.input_nc == 1))
B_transform = get_transform(self.cfg.transform, transform_params, grayscale=(self.output_nc == 1))
A = A_transform(A)
B = B_transform(B)
return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path} return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path}
def __len__(self): def __len__(self):
"""Return the total number of images in the dataset.""" """Return the total number of images in the dataset."""
return len(self.AB_paths) return len(self.AB_paths)
def get_path_by_indexs(self, indexs):
if isinstance(indexs, paddle.Variable):
indexs = indexs.numpy()
current_paths = []
for index in indexs:
current_paths.append(self.AB_paths[index])
return current_paths
from .transforms import RandomCrop, Resize, RandomHorizontalFlip, PairedRandomCrop, PairedRandomHorizontalFlip, Normalize, Permute
import copy
import traceback
import paddle
from ...utils.registry import Registry
TRANSFORMS = Registry("TRANSFORMS")
class Compose(object):
"""
Composes several transforms together use for composing list of transforms
together for a dataset transform.
Args:
transforms (list): List of transforms to compose.
Returns:
A compose object which is callable, __call__ for this Compose
object will call each given :attr:`transforms` sequencely.
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, data):
for f in self.transforms:
try:
data = f(data)
except Exception as e:
stack_info = traceback.format_exc()
print("fail to perform transform [{}] with error: "
"{} and stack:\n{}".format(f, e, str(stack_info)))
raise e
return data
def build_transforms(cfg):
transforms = []
for trans_cfg in cfg:
temp_trans_cfg = copy.deepcopy(trans_cfg)
name = temp_trans_cfg.pop('name')
transforms.append(TRANSFORMS.get(name)(**temp_trans_cfg))
transforms = Compose(transforms)
return transforms
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -11,36 +11,19 @@ ...@@ -11,36 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid
import os
file_dir = os.path.dirname(os.path.abspath(__file__))
fluid.load_op_library(os.path.join(file_dir, 'correlation_lib.so'))
from paddle.fluid.layer_helper import LayerHelper import paddle.vision.transforms as T
import cv2
def correlation(input1, def get_makeup_transform(cfg, pic="image"):
input2, if pic == "image":
pad_size, transform = T.Compose([
kernel_size, T.Resize(size=cfg.trans_size),
max_displacement, T.Permute(to_rgb=False),
stride1, ])
stride2, else:
corr_type_multiply=1): transform = T.Resize(size=cfg.trans_size,
helper = LayerHelper("correlation", **locals()) interpolation=cv2.INTER_NEAREST)
output = helper.create_variable_for_type_inference(dtype=input1.dtype)
helper.append_op(type="correlation", return transform
inputs={
"Input1": input1,
"Input2": input2
},
attrs={
"pad_size": pad_size,
"kernel_size": kernel_size,
"max_displacement": max_displacement,
"stride1": stride1,
"stride2": stride2,
"corr_type_multiply": corr_type_multiply
},
outputs={"Output": output})
return output
import sys
import random import random
import numbers
import collections
import numpy as np
from paddle.utils import try_import
import paddle.vision.transforms.functional as F
class RandomCrop(object): from .builder import TRANSFORMS
def __init__(self, output_size): if sys.version_info < (3, 3):
Sequence = collections.Sequence
Iterable = collections.Iterable
else:
Sequence = collections.abc.Sequence
Iterable = collections.abc.Iterable
class Transform():
def _set_attributes(self, args):
"""
Set attributes from the input list of parameters.
Args:
args (list): list of parameters.
"""
if args:
for k, v in args.items():
if k != "self" and not k.startswith("_"):
setattr(self, k, v)
def apply_image(self, input):
raise NotImplementedError
def __call__(self, inputs):
if isinstance(inputs, tuple):
inputs = list(inputs)
if self.keys is not None:
for i, key in enumerate(self.keys):
if isinstance(inputs, dict):
inputs[key] = getattr(self, 'apply_' + key)(inputs[key])
elif isinstance(inputs, (list, tuple)):
inputs[i] = getattr(self, 'apply_' + key)(inputs[i])
else:
inputs = self.apply_image(inputs)
if isinstance(inputs, list):
inputs = tuple(inputs)
return inputs
@TRANSFORMS.register()
class Resize(Transform):
"""Resize the input Image to the given size.
Args:
size (int|list|tuple): Desired output size. If size is a sequence like
(h, w), output size will be matched to this. If size is an int,
smaller edge of the image will be matched to this number.
i.e, if height > width, then image will be rescaled to
(size * height / width, size)
interpolation (int, optional): Interpolation mode of resize. Default: 1.
0 : cv2.INTER_NEAREST
1 : cv2.INTER_LINEAR
2 : cv2.INTER_CUBIC
3 : cv2.INTER_AREA
4 : cv2.INTER_LANCZOS4
5 : cv2.INTER_LINEAR_EXACT
7 : cv2.INTER_MAX
8 : cv2.WARP_FILL_OUTLIERS
16: cv2.WARP_INVERSE_MAP
"""
def __init__(self, size, interpolation=1, keys=None):
super().__init__()
assert isinstance(size, int) or (isinstance(size, Iterable)
and len(size) == 2)
self._set_attributes(locals())
if isinstance(self.size, Iterable):
self.size = tuple(size)
def apply_image(self, img):
return F.resize(img, self.size, self.interpolation)
@TRANSFORMS.register()
class RandomCrop(Transform):
def __init__(self, output_size, keys=None):
super().__init__()
self._set_attributes(locals())
if isinstance(output_size, int): if isinstance(output_size, int):
self.output_size = (output_size, output_size) self.output_size = (output_size, output_size)
else: else:
...@@ -19,12 +105,162 @@ class RandomCrop(object): ...@@ -19,12 +105,162 @@ class RandomCrop(object):
j = random.randint(0, w - tw) j = random.randint(0, w - tw)
return i, j, th, tw return i, j, th, tw
def __call__(self, img): def apply_image(self, img):
i, j, h, w = self._get_params(img) i, j, h, w = self._get_params(img)
cropped_img = img[i:i + h, j:j + w] cropped_img = img[i:i + h, j:j + w]
return cropped_img return cropped_img
@TRANSFORMS.register()
class PairedRandomCrop(RandomCrop):
def __init__(self, output_size, keys=None):
super().__init__(output_size, keys)
if isinstance(output_size, int):
self.output_size = (output_size, output_size)
else:
self.output_size = output_size
def apply_image(self, img, crop_prams=None):
if crop_prams is not None:
i, j, h, w = crop_prams
else:
i, j, h, w = self._get_params(img)
cropped_img = img[i:i + h, j:j + w]
return cropped_img
def __call__(self, inputs):
if isinstance(inputs, tuple):
inputs = list(inputs)
if self.keys is not None:
if isinstance(inputs, dict):
crop_params = self._get_params(inputs[self.keys[0]])
elif isinstance(inputs, (list, tuple)):
crop_params = self._get_params(inputs[0])
for i, key in enumerate(self.keys):
if isinstance(inputs, dict):
inputs[key] = getattr(self, 'apply_' + key)(inputs[key],
crop_params)
elif isinstance(inputs, (list, tuple)):
inputs[i] = getattr(self, 'apply_' + key)(inputs[i],
crop_params)
else:
crop_params = self._get_params(inputs)
inputs = self.apply_image(inputs, crop_params)
if isinstance(inputs, list):
inputs = tuple(inputs)
return inputs
@TRANSFORMS.register()
class RandomHorizontalFlip(Transform):
"""Horizontally flip the input data randomly with a given probability.
Args:
prob (float): Probability of the input data being flipped. Default: 0.5
"""
def __init__(self, prob=0.5, keys=None):
super().__init__()
self._set_attributes(locals())
def apply_image(self, img):
if np.random.random() < self.prob:
return F.flip(img, code=1)
return img
@TRANSFORMS.register()
class PairedRandomHorizontalFlip(RandomHorizontalFlip):
def __init__(self, prob=0.5, keys=None):
super().__init__()
self._set_attributes(locals())
def apply_image(self, img, flip):
if flip:
return F.flip(img, code=1)
return img
def __call__(self, inputs):
if isinstance(inputs, tuple):
inputs = list(inputs)
flip = np.random.random() < self.prob
if self.keys is not None:
for i, key in enumerate(self.keys):
if isinstance(inputs, dict):
inputs[key] = getattr(self, 'apply_' + key)(inputs[key],
flip)
elif isinstance(inputs, (list, tuple)):
inputs[i] = getattr(self, 'apply_' + key)(inputs[i], flip)
else:
inputs = self.apply_image(inputs, flip)
if isinstance(inputs, list):
inputs = tuple(inputs)
return inputs
@TRANSFORMS.register()
class Normalize(Transform):
"""Normalize the input data with mean and standard deviation.
Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels,
this transform will normalize each channel of the input data.
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
Args:
mean (int|float|list): Sequence of means for each channel.
std (int|float|list): Sequence of standard deviations for each channel.
"""
def __init__(self, mean=0.0, std=1.0, keys=None):
super().__init__()
self._set_attributes(locals())
if isinstance(mean, numbers.Number):
mean = [mean, mean, mean]
if isinstance(std, numbers.Number):
std = [std, std, std]
self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1)
self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1)
def apply_image(self, img):
return (img - self.mean) / self.std
@TRANSFORMS.register()
class Permute(Transform):
"""Change input data to a target mode.
For example, most transforms use HWC mode image,
while the Neural Network might use CHW mode input tensor.
Input image should be HWC mode and an instance of numpy.ndarray.
Args:
mode (str): Output mode of input. Default: "CHW".
to_rgb (bool): Convert 'bgr' image to 'rgb'. Default: True.
"""
def __init__(self, mode="CHW", to_rgb=True, keys=None):
super().__init__()
self._set_attributes(locals())
assert mode in [
"CHW"
], "Only support 'CHW' mode, but received mode: {}".format(mode)
self.mode = mode
self.to_rgb = to_rgb
def apply_image(self, img):
if self.to_rgb:
img = img[..., ::-1]
if self.mode == "CHW":
return img.transpose((2, 0, 1))
return img
class Crop(): class Crop():
def __init__(self, pos, size): def __init__(self, pos, size):
self.pos = pos self.pos = pos
...@@ -35,6 +271,6 @@ class Crop(): ...@@ -35,6 +271,6 @@ class Crop():
x, y = self.pos x, y = self.pos
th = tw = self.size th = tw = self.size
if (ow > tw or oh > th): if (ow > tw or oh > th):
return img[y: y + th, x: x + tw] return img[y:y + th, x:x + tw]
return img return img
\ No newline at end of file
...@@ -5,13 +5,13 @@ from .base_dataset import BaseDataset, get_transform ...@@ -5,13 +5,13 @@ from .base_dataset import BaseDataset, get_transform
from .image_folder import make_dataset from .image_folder import make_dataset
from .builder import DATASETS from .builder import DATASETS
from .transforms.builder import build_transforms
@DATASETS.register() @DATASETS.register()
class UnpairedDataset(BaseDataset): class UnpairedDataset(BaseDataset):
""" """
""" """
def __init__(self, cfg): def __init__(self, cfg):
"""Initialize this dataset class. """Initialize this dataset class.
...@@ -19,18 +19,25 @@ class UnpairedDataset(BaseDataset): ...@@ -19,18 +19,25 @@ class UnpairedDataset(BaseDataset):
cfg (dict) -- stores all the experiment flags cfg (dict) -- stores all the experiment flags
""" """
BaseDataset.__init__(self, cfg) BaseDataset.__init__(self, cfg)
self.dir_A = os.path.join(cfg.dataroot, cfg.phase + 'A') # create a path '/path/to/data/trainA' self.dir_A = os.path.join(cfg.dataroot, cfg.phase +
self.dir_B = os.path.join(cfg.dataroot, cfg.phase + 'B') # create a path '/path/to/data/trainB' 'A') # create a path '/path/to/data/trainA'
self.dir_B = os.path.join(cfg.dataroot, cfg.phase +
'B') # create a path '/path/to/data/trainB'
self.A_paths = sorted(make_dataset(self.dir_A, cfg.max_dataset_size)) # load images from '/path/to/data/trainA' self.A_paths = sorted(make_dataset(
self.B_paths = sorted(make_dataset(self.dir_B, cfg.max_dataset_size)) # load images from '/path/to/data/trainB' self.dir_A,
cfg.max_dataset_size)) # load images from '/path/to/data/trainA'
self.B_paths = sorted(make_dataset(
self.dir_B,
cfg.max_dataset_size)) # load images from '/path/to/data/trainB'
self.A_size = len(self.A_paths) # get the size of dataset A self.A_size = len(self.A_paths) # get the size of dataset A
self.B_size = len(self.B_paths) # get the size of dataset B self.B_size = len(self.B_paths) # get the size of dataset B
btoA = self.cfg.direction == 'BtoA' btoA = self.cfg.direction == 'BtoA'
input_nc = self.cfg.output_nc if btoA else self.cfg.input_nc # get the number of channels of input image input_nc = self.cfg.output_nc if btoA else self.cfg.input_nc # get the number of channels of input image
output_nc = self.cfg.input_nc if btoA else self.cfg.output_nc # get the number of channels of output image output_nc = self.cfg.input_nc if btoA else self.cfg.output_nc # get the number of channels of output image
self.transform_A = get_transform(self.cfg.transform, grayscale=(input_nc == 1))
self.transform_B = get_transform(self.cfg.transform, grayscale=(output_nc == 1)) self.transform_A = build_transforms(self.cfg.transforms)
self.transform_B = build_transforms(self.cfg.transforms)
self.reset_paths() self.reset_paths()
...@@ -49,10 +56,11 @@ class UnpairedDataset(BaseDataset): ...@@ -49,10 +56,11 @@ class UnpairedDataset(BaseDataset):
A_paths (str) -- image paths A_paths (str) -- image paths
B_paths (str) -- image paths B_paths (str) -- image paths
""" """
A_path = self.A_paths[index % self.A_size] # make sure index is within then range A_path = self.A_paths[
if self.cfg.serial_batches: # make sure index is within then range index % self.A_size] # make sure index is within then range
if self.cfg.serial_batches: # make sure index is within then range
index_B = index % self.B_size index_B = index % self.B_size
else: # randomize the index for domain B to avoid fixed pairs. else: # randomize the index for domain B to avoid fixed pairs.
index_B = random.randint(0, self.B_size - 1) index_B = random.randint(0, self.B_size - 1)
B_path = self.B_paths[index_B] B_path = self.B_paths[index_B]
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import dlibutils as dlib
from . import mask
from . import image
from .dlib_utils import detect, crop, landmarks, crop_from_array
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os.path as osp
import numpy as np
from PIL import Image
import dlib
import cv2
from ..image import resize_by_max
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(
osp.split(osp.realpath(__file__))[0] + '/lms.dat')
def detect(image: Image):
image = np.asarray(image)
h, w = image.shape[:2]
image = resize_by_max(image, 361)
actual_h, actual_w = image.shape[:2]
faces_on_small = detector(image, 1)
faces = dlib.rectangles()
for face in faces_on_small:
faces.append(
dlib.rectangle(int(face.left() / actual_w * w + 0.5),
int(face.top() / actual_h * h + 0.5),
int(face.right() / actual_w * w + 0.5),
int(face.bottom() / actual_h * h + 0.5)))
return faces
def crop(image: Image, face, up_ratio, down_ratio, width_ratio):
width, height = image.size
face_height = face.height()
face_width = face.width()
delta_up = up_ratio * face_height
delta_down = down_ratio * face_height
delta_width = width_ratio * width
img_left = int(max(0, face.left() - delta_width))
img_top = int(max(0, face.top() - delta_up))
img_right = int(min(width, face.right() + delta_width))
img_bottom = int(min(height, face.bottom() + delta_down))
image = image.crop((img_left, img_top, img_right, img_bottom))
face = dlib.rectangle(face.left() - img_left,
face.top() - img_top,
face.right() - img_left,
face.bottom() - img_top)
face_expand = dlib.rectangle(img_left, img_top, img_right, img_bottom)
center = face_expand.center()
width, height = image.size
crop_left = img_left
crop_top = img_top
crop_right = img_right
crop_bottom = img_bottom
if width > height:
left = int(center.x - height / 2)
right = int(center.x + height / 2)
if left < 0:
left, right = 0, height
elif right > width:
left, right = width - height, width
image = image.crop((left, 0, right, height))
face = dlib.rectangle(face.left() - left, face.top(),
face.right() - left, face.bottom())
crop_left += left
crop_right = crop_left + height
elif width < height:
top = int(center.y - width / 2)
bottom = int(center.y + width / 2)
if top < 0:
top, bottom = 0, width
elif bottom > height:
top, bottom = height - width, height
image = image.crop((0, top, width, bottom))
face = dlib.rectangle(face.left(),
face.top() - top, face.right(),
face.bottom() - top)
crop_top += top
crop_bottom = crop_top + width
crop_face = dlib.rectangle(crop_left, crop_top, crop_right, crop_bottom)
return image, face, crop_face
def crop_by_image_size(image: Image, face):
center = face.center()
width, height = image.size
if width > height:
left = int(center.x - height / 2)
right = int(center.x + height / 2)
if left < 0:
left, right = 0, height
elif right > width:
left, right = width - height, width
image = image.crop((left, 0, right, height))
face = dlib.rectangle(face.left() - left, face.top(),
face.right() - left, face.bottom())
elif width < height:
top = int(center.y - width / 2)
bottom = int(center.y + width / 2)
if top < 0:
top, bottom = 0, width
elif bottom > height:
top, bottom = height - width, height
image = image.crop((0, top, width, bottom))
face = dlib.rectangle(face.left(),
face.top() - top, face.right(),
face.bottom() - top)
return image, face
def landmarks(image: Image, face):
shape = predictor(np.asarray(image), face).parts()
return np.array([[p.y, p.x] for p in shape])
def crop_from_array(image: np.array, face):
ratio = 0.20 / 0.85 # delta_size / face_size
height, width = image.shape[:2]
face_height = face.height()
face_width = face.width()
delta_height = ratio * face_height
delta_width = ratio * width
img_left = int(max(0, face.left() - delta_width))
img_top = int(max(0, face.top() - delta_height))
img_right = int(min(width, face.right() + delta_width))
img_bottom = int(min(height, face.bottom() + delta_height))
image = image[img_top:img_bottom, img_left:img_right]
face = dlib.rectangle(face.left() - img_left,
face.top() - img_top,
face.right() - img_left,
face.bottom() - img_top)
center = face.center()
height, width = image.shape[:2]
if width > height:
left = int(center.x - height / 2)
right = int(center.x + height / 2)
if left < 0:
left, right = 0, height
elif right > width:
left, right = width - height, width
image = image[0:height, left:right]
face = dlib.rectangle(face.left() - left, face.top(),
face.right() - left, face.bottom())
elif width < height:
top = int(center.y - width / 2)
bottom = int(center.y + width / 2)
if top < 0:
top, bottom = 0, width
elif bottom > height:
top, bottom = height - width, height
image = image[top:bottom, 0:width]
face = dlib.rectangle(face.left(),
face.top() - top, face.right(),
face.bottom() - top)
return image, face
import numpy as np
import cv2
from io import BytesIO
def resize_by_max(image, max_side=512, force=False):
h, w = image.shape[:2]
if max(h, w) < max_side and not force:
return image
ratio = max(h, w) / max_side
w = int(w / ratio + 0.5)
h = int(h / ratio + 0.5)
return cv2.resize(image, (w, h))
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .face_parser import FaceParser
import os.path as osp
import numpy as np
import cv2
from PIL import Image
import paddle
import paddle.vision.transforms as T
import pickle
from .model import BiSeNet
class FaceParser:
def __init__(self, device="cpu"):
self.mapper = {
0: 0,
1: 1,
2: 2,
3: 3,
4: 4,
5: 5,
6: 0,
7: 11,
8: 12,
9: 0,
10: 6,
11: 8,
12: 7,
13: 9,
14: 13,
15: 0,
16: 0,
17: 10,
18: 0
}
#self.dict = paddle.to_tensor(mapper)
self.save_pth = osp.split(
osp.realpath(__file__))[0] + '/resnet.pdparams'
self.net = BiSeNet(n_classes=19)
self.transforms = T.Compose([
T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
def parse(self, image):
assert image.shape[:2] == (512, 512)
image = image / 255.0
image = image.transpose((2, 0, 1))
image = self.transforms(image)
state_dict, _ = paddle.load(self.save_pth)
self.net.set_dict(state_dict)
self.net.eval()
with paddle.no_grad():
image = paddle.to_tensor(image)
image = image.unsqueeze(0)
out = self.net(image)[0]
parsing = out.squeeze(0).argmax(0) #argmax(0).astype('float32')
#parsing = paddle.nn.functional.embedding(x=self.dict, weight=parsing)
parse_np = parsing.numpy()
h, w = parse_np.shape
result = np.zeros((h, w))
for i in range(h):
for j in range(w):
result[i][j] = self.mapper[parse_np[i][j]]
result = paddle.to_tensor(result).astype('float32')
return result
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle.utils.download import get_weights_path_from_url
import numpy as np
from .resnet import resnet18
class ConvBNReLU(paddle.nn.Layer):
def __init__(self,
in_chan,
out_chan,
ks=3,
stride=1,
padding=1,
*args,
**kwargs):
super(ConvBNReLU, self).__init__()
self.conv = nn.Conv2d(in_chan,
out_chan,
kernel_size=ks,
stride=stride,
padding=padding,
bias_attr=False)
self.bn = nn.BatchNorm2d(out_chan)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class BiSeNetOutput(paddle.nn.Layer):
def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
super(BiSeNetOutput, self).__init__()
self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
self.conv_out = nn.Conv2d(mid_chan,
n_classes,
kernel_size=1,
bias_attr=False)
def forward(self, x):
x = self.conv(x)
x = self.conv_out(x)
return x
class AttentionRefinementModule(paddle.nn.Layer):
def __init__(self, in_chan, out_chan, *args, **kwargs):
super(AttentionRefinementModule, self).__init__()
self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
self.conv_atten = nn.Conv2d(out_chan,
out_chan,
kernel_size=1,
bias_attr=False)
self.bn_atten = nn.BatchNorm(out_chan)
self.sigmoid_atten = nn.Sigmoid()
def forward(self, x):
feat = self.conv(x)
atten = F.avg_pool2d(feat, feat.shape[2:])
atten = self.conv_atten(atten)
atten = self.bn_atten(atten)
atten = self.sigmoid_atten(atten)
out = feat * atten
return out
class ContextPath(paddle.nn.Layer):
def __init__(self, *args, **kwargs):
super(ContextPath, self).__init__()
self.resnet = resnet18()
self.arm16 = AttentionRefinementModule(256, 128)
self.arm32 = AttentionRefinementModule(512, 128)
self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
def forward(self, x):
H0, W0 = x.shape[2:]
feat8, feat16, feat32 = self.resnet(x)
H8, W8 = feat8.shape[2:]
H16, W16 = feat16.shape[2:]
H32, W32 = feat32.shape[2:]
avg = F.avg_pool2d(feat32, feat32.shape[2:])
avg = self.conv_avg(avg)
avg_up = F.interpolate(avg, size=(H32, W32), mode='nearest')
feat32_arm = self.arm32(feat32)
feat32_sum = feat32_arm + avg_up
feat32_up = F.interpolate(feat32_sum, size=(H16, W16), mode='nearest')
feat32_up = self.conv_head32(feat32_up)
feat16_arm = self.arm16(feat16)
feat16_sum = feat16_arm + feat32_up
feat16_up = F.interpolate(feat16_sum, size=(H8, W8), mode='nearest')
feat16_up = self.conv_head16(feat16_up)
return feat8, feat16_up, feat32_up # x8, x8, x16
class SpatialPath(paddle.nn.Layer):
def __init__(self, *args, **kwargs):
super(SpatialPath, self).__init__()
self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
def forward(self, x):
feat = self.conv1(x)
feat = self.conv2(feat)
feat = self.conv3(feat)
feat = self.conv_out(feat)
return feat
class FeatureFusionModule(paddle.nn.Layer):
def __init__(self, in_chan, out_chan, *args, **kwargs):
super(FeatureFusionModule, self).__init__()
self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
self.conv1 = nn.Conv2d(out_chan,
out_chan // 4,
kernel_size=1,
stride=1,
padding=0,
bias_attr=False)
self.conv2 = nn.Conv2d(out_chan // 4,
out_chan,
kernel_size=1,
stride=1,
padding=0,
bias_attr=False)
self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()
def forward(self, fsp, fcp):
fcat = paddle.concat([fsp, fcp], axis=1)
feat = self.convblk(fcat)
atten = F.avg_pool2d(feat, feat.shape[2:])
atten = self.conv1(atten)
atten = self.relu(atten)
atten = self.conv2(atten)
atten = self.sigmoid(atten)
feat_atten = feat * atten
feat_out = feat_atten + feat
return feat_out
class BiSeNet(paddle.nn.Layer):
def __init__(self, n_classes, *args, **kwargs):
super(BiSeNet, self).__init__()
self.cp = ContextPath()
self.ffm = FeatureFusionModule(256, 256)
self.conv_out = BiSeNetOutput(256, 256, n_classes)
self.conv_out16 = BiSeNetOutput(128, 64, n_classes)
self.conv_out32 = BiSeNetOutput(128, 64, n_classes)
def forward(self, x):
H, W = x.shape[2:]
feat_res8, feat_cp8, feat_cp16 = self.cp(
x) # here return res3b1 feature
feat_sp = feat_res8 # use res3b1 feature to replace spatial path feature
feat_fuse = self.ffm(feat_sp, feat_cp8)
feat_out = self.conv_out(feat_fuse)
feat_out16 = self.conv_out16(feat_cp8)
feat_out32 = self.conv_out32(feat_cp16)
feat_out = F.interpolate(feat_out, size=(H, W))
feat_out16 = F.interpolate(feat_out16, size=(H, W))
feat_out32 = F.interpolate(feat_out32, size=(H, W))
return feat_out, feat_out16, feat_out32
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle.utils.download import get_weights_path_from_url
import numpy as np
import math
model_urls = {
'resnet18': ('https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams',
'0ba53eea9bc970962d0ef96f7b94057e'),
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias_attr=False)
class BasicBlock(paddle.nn.Layer):
def __init__(self, in_chan, out_chan, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(in_chan, out_chan, stride)
self.bn1 = nn.BatchNorm(out_chan)
self.conv2 = conv3x3(out_chan, out_chan)
self.bn2 = nn.BatchNorm(out_chan)
self.relu = nn.ReLU()
self.downsample = None
if in_chan != out_chan or stride != 1:
self.downsample = nn.Sequential(
nn.Conv2d(in_chan,
out_chan,
kernel_size=1,
stride=stride,
bias_attr=False),
nn.BatchNorm(out_chan),
)
def forward(self, x):
residual = self.conv1(x)
residual = self.relu(self.bn1(residual))
residual = self.conv2(residual)
residual = self.bn2(residual)
shortcut = x
if self.downsample is not None:
shortcut = self.downsample(x)
out = shortcut + residual
out = self.relu(out)
return out
def create_layer_basic(in_chan, out_chan, bnum, stride=1):
layers = [BasicBlock(in_chan, out_chan, stride=stride)]
for i in range(bnum - 1):
layers.append(BasicBlock(out_chan, out_chan, stride=1))
return nn.Sequential(*layers)
class Resnet18(paddle.nn.Layer):
def __init__(self):
super(Resnet18, self).__init__()
self.conv1 = nn.Conv2d(3,
64,
kernel_size=7,
stride=2,
padding=3,
bias_attr=False)
self.bn1 = nn.BatchNorm(64)
self.relu = nn.ReLU()
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
def forward(self, x):
x = self.conv1(x)
x = self.relu(self.bn1(x))
x = self.maxpool(x)
x = self.layer1(x)
feat8 = self.layer2(x) # 1/8
feat16 = self.layer3(feat8) # 1/16
feat32 = self.layer4(feat16) # 1/32
return feat8, feat16, feat32
def resnet18(pretrained=False, **kwargs):
model = Resnet18()
arch = 'resnet18'
if pretrained:
weight_path = './resnet.pdparams'
param, _ = paddle.load(weight_path)
model.set_dict(param)
return model
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .base_model import BaseModel from .base_model import BaseModel
from .cycle_gan_model import CycleGANModel from .cycle_gan_model import CycleGANModel
from .pix2pix_model import Pix2PixModel from .pix2pix_model import Pix2PixModel
from .srgan_model import SRGANModel from .srgan_model import SRGANModel
from .sr_model import SRModel from .sr_model import SRModel
from .makeup_model import MakeupModel
from .vgg import vgg16
...@@ -26,7 +26,7 @@ class BaseModel(ABC): ...@@ -26,7 +26,7 @@ class BaseModel(ABC):
When creating your custom class, you need to implement your own initialization. When creating your custom class, you need to implement your own initialization.
In this function, you should first call <BaseModel.__init__(self, opt)> In this function, you should first call <BaseModel.__init__(self, opt)>
Then, you need to define four lists: Then, you need to define four lists:
-- self.loss_names (str list): specify the training losses that you want to plot and save. -- self.losses (str list): specify the training losses that you want to plot and save.
-- self.model_names (str list): define networks used in our training. -- self.model_names (str list): define networks used in our training.
-- self.visual_names (str list): specify the images that you want to display and save. -- self.visual_names (str list): specify the images that you want to display and save.
-- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an example. -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an example.
...@@ -37,7 +37,7 @@ class BaseModel(ABC): ...@@ -37,7 +37,7 @@ class BaseModel(ABC):
opt.output_dir, opt.output_dir,
opt.model.name) # save all the checkpoints to save_dir opt.model.name) # save all the checkpoints to save_dir
self.loss_names = [] self.losses = OrderedDict()
self.model_names = [] self.model_names = []
self.visual_names = [] self.visual_names = []
self.optimizers = [] self.optimizers = []
...@@ -115,13 +115,7 @@ class BaseModel(ABC): ...@@ -115,13 +115,7 @@ class BaseModel(ABC):
def get_current_losses(self): def get_current_losses(self):
"""Return traning losses / errors. train.py will print out these errors on console, and save them to a file""" """Return traning losses / errors. train.py will print out these errors on console, and save them to a file"""
errors_ret = OrderedDict() return self.losses
for name in self.loss_names:
if isinstance(name, str):
errors_ret[name] = float(
getattr(self, 'loss_' + name)
) # float(...) works for both scalar tensor and float number
return errors_ret
def set_requires_grad(self, nets, requires_grad=False): def set_requires_grad(self, nets, requires_grad=False):
"""Set requies_grad=Fasle for all the networks to avoid unnecessary computations """Set requies_grad=Fasle for all the networks to avoid unnecessary computations
......
...@@ -2,18 +2,9 @@ import paddle ...@@ -2,18 +2,9 @@ import paddle
from ..utils.registry import Registry from ..utils.registry import Registry
MODELS = Registry("MODEL") MODELS = Registry("MODEL")
def build_model(cfg): def build_model(cfg):
# dataset = MODELS.get(cfg.MODEL.name)(cfg.MODEL)
# place = paddle.CUDAPlace(0)
# dataloader = paddle.io.DataLoader(dataset,
# batch_size=1, #opt.batch_size,
# places=place,
# shuffle=True, #not opt.serial_batches,
# num_workers=0)#int(opt.num_threads))
model = MODELS.get(cfg.model.name)(cfg) model = MODELS.get(cfg.model.name)(cfg)
return model return model
# pass
\ No newline at end of file
...@@ -31,10 +31,6 @@ class CycleGANModel(BaseModel): ...@@ -31,10 +31,6 @@ class CycleGANModel(BaseModel):
opt (config)-- stores all the experiment flags; needs to be a subclass of Dict opt (config)-- stores all the experiment flags; needs to be a subclass of Dict
""" """
BaseModel.__init__(self, opt) BaseModel.__init__(self, opt)
# specify the training losses you want to print out. The training/test scripts will call <BaseModel.get_current_losses>
self.loss_names = [
'D_A', 'G_A', 'cycle_A', 'idt_A', 'D_B', 'G_B', 'cycle_B', 'idt_B'
]
# specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals> # specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals>
visual_names_A = ['real_A', 'fake_B', 'rec_A'] visual_names_A = ['real_A', 'fake_B', 'rec_A']
visual_names_B = ['real_B', 'fake_A', 'rec_B'] visual_names_B = ['real_B', 'fake_A', 'rec_B']
...@@ -165,11 +161,13 @@ class CycleGANModel(BaseModel): ...@@ -165,11 +161,13 @@ class CycleGANModel(BaseModel):
"""Calculate GAN loss for discriminator D_A""" """Calculate GAN loss for discriminator D_A"""
fake_B = self.fake_B_pool.query(self.fake_B) fake_B = self.fake_B_pool.query(self.fake_B)
self.loss_D_A = self.backward_D_basic(self.netD_A, self.real_B, fake_B) self.loss_D_A = self.backward_D_basic(self.netD_A, self.real_B, fake_B)
self.losses['D_A_loss'] = self.loss_D_A
def backward_D_B(self): def backward_D_B(self):
"""Calculate GAN loss for discriminator D_B""" """Calculate GAN loss for discriminator D_B"""
fake_A = self.fake_A_pool.query(self.fake_A) fake_A = self.fake_A_pool.query(self.fake_A)
self.loss_D_B = self.backward_D_basic(self.netD_B, self.real_A, fake_A) self.loss_D_B = self.backward_D_basic(self.netD_B, self.real_A, fake_A)
self.losses['D_B_loss'] = self.loss_D_B
def backward_G(self): def backward_G(self):
"""Calculate the loss for generators G_A and G_B""" """Calculate the loss for generators G_A and G_B"""
...@@ -200,6 +198,13 @@ class CycleGANModel(BaseModel): ...@@ -200,6 +198,13 @@ class CycleGANModel(BaseModel):
# Backward cycle loss || G_A(G_B(B)) - B|| # Backward cycle loss || G_A(G_B(B)) - B||
self.loss_cycle_B = self.criterionCycle(self.rec_B, self.loss_cycle_B = self.criterionCycle(self.rec_B,
self.real_B) * lambda_B self.real_B) * lambda_B
self.losses['G_idt_A_loss'] = self.loss_idt_A
self.losses['G_idt_B_loss'] = self.loss_idt_B
self.losses['G_A_adv_loss'] = self.loss_G_A
self.losses['G_B_adv_loss'] = self.loss_G_B
self.losses['G_A_cycle_loss'] = self.loss_cycle_A
self.losses['G_B_cycle_loss'] = self.loss_cycle_B
# combined loss and calculate gradients # combined loss and calculate gradients
self.loss_G = self.loss_G_A + self.loss_G_B + self.loss_cycle_A + self.loss_cycle_B + self.loss_idt_A + self.loss_idt_B self.loss_G = self.loss_G_A + self.loss_G_B + self.loss_cycle_A + self.loss_cycle_B + self.loss_idt_A + self.loss_idt_B
......
import functools # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
import numpy as np #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle import paddle
import functools
import numpy as np
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F
from ...modules.nn import Spectralnorm
from ...modules.norm import build_norm_layer from ...modules.norm import build_norm_layer
from .builder import DISCRIMINATORS from .builder import DISCRIMINATORS
...@@ -14,7 +30,7 @@ class NLayerDiscriminator(nn.Layer): ...@@ -14,7 +30,7 @@ class NLayerDiscriminator(nn.Layer):
def __init__(self, input_nc, ndf=64, n_layers=3, norm_type='instance'): def __init__(self, input_nc, ndf=64, n_layers=3, norm_type='instance'):
"""Construct a PatchGAN discriminator """Construct a PatchGAN discriminator
Args: Parameters:
input_nc (int) -- the number of channels in input images input_nc (int) -- the number of channels in input images
ndf (int) -- the number of filters in the last conv layer ndf (int) -- the number of filters in the last conv layer
n_layers (int) -- the number of conv layers in the discriminator n_layers (int) -- the number of conv layers in the discriminator
...@@ -22,49 +38,107 @@ class NLayerDiscriminator(nn.Layer): ...@@ -22,49 +38,107 @@ class NLayerDiscriminator(nn.Layer):
""" """
super(NLayerDiscriminator, self).__init__() super(NLayerDiscriminator, self).__init__()
norm_layer = build_norm_layer(norm_type) norm_layer = build_norm_layer(norm_type)
if type(norm_layer) == functools.partial: if type(
use_bias = norm_layer.func == nn.InstanceNorm norm_layer
) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters
use_bias = norm_layer.func == nn.InstanceNorm2d
else: else:
use_bias = norm_layer == nn.InstanceNorm use_bias = norm_layer == nn.InstanceNorm2d
kw = 4 kw = 4
padw = 1 padw = 1
sequence = [
nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), if norm_type == 'spectral':
nn.LeakyReLU(0.2) sequence = [
] Spectralnorm(
nn.Conv2d(input_nc,
ndf,
kernel_size=kw,
stride=2,
padding=padw)),
nn.LeakyReLU(0.01)
]
else:
sequence = [
nn.Conv2d(input_nc,
ndf,
kernel_size=kw,
stride=2,
padding=padw,
bias_attr=use_bias),
nn.LeakyReLU(0.2)
]
nf_mult = 1 nf_mult = 1
nf_mult_prev = 1 nf_mult_prev = 1
for n in range(1, n_layers): for n in range(1, n_layers): # gradually increase the number of filters
nf_mult_prev = nf_mult nf_mult_prev = nf_mult
nf_mult = min(2**n, 8) nf_mult = min(2**n, 8)
if norm_type == 'spectral':
sequence += [
Spectralnorm(
nn.Conv2d(ndf * nf_mult_prev,
ndf * nf_mult,
kernel_size=kw,
stride=2,
padding=padw)),
nn.LeakyReLU(0.01)
]
else:
sequence += [
nn.Conv2d(ndf * nf_mult_prev,
ndf * nf_mult,
kernel_size=kw,
stride=2,
padding=padw,
bias_attr=use_bias),
norm_layer(ndf * nf_mult),
nn.LeakyReLU(0.2)
]
nf_mult_prev = nf_mult
nf_mult = min(2**n_layers, 8)
if norm_type == 'spectral':
sequence += [
Spectralnorm(
nn.Conv2d(ndf * nf_mult_prev,
ndf * nf_mult,
kernel_size=kw,
stride=1,
padding=padw)),
nn.LeakyReLU(0.01)
]
else:
sequence += [ sequence += [
nn.Conv2d(ndf * nf_mult_prev, nn.Conv2d(ndf * nf_mult_prev,
ndf * nf_mult, ndf * nf_mult,
kernel_size=kw, kernel_size=kw,
stride=2, stride=1,
padding=padw, padding=padw,
bias_attr=use_bias), bias_attr=use_bias),
norm_layer(ndf * nf_mult), norm_layer(ndf * nf_mult),
nn.LeakyReLU(0.2) nn.LeakyReLU(0.2)
] ]
nf_mult_prev = nf_mult if norm_type == 'spectral':
nf_mult = min(2**n_layers, 8) sequence += [
sequence += [ Spectralnorm(
nn.Conv2d(ndf * nf_mult_prev, nn.Conv2d(ndf * nf_mult,
ndf * nf_mult, 1,
kernel_size=kw, kernel_size=kw,
stride=1, stride=1,
padding=padw, padding=padw,
bias_attr=use_bias), bias_attr=False))
norm_layer(ndf * nf_mult), ] # output 1 channel prediction map
nn.LeakyReLU(0.2) else:
] sequence += [
nn.Conv2d(ndf * nf_mult,
1,
kernel_size=kw,
stride=1,
padding=padw,
bias_attr=False)
] # output 1 channel prediction map
sequence += [
nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)
]
self.model = nn.Sequential(*sequence) self.model = nn.Sequential(*sequence)
def forward(self, input): def forward(self, input):
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .resnet import ResnetGenerator from .resnet import ResnetGenerator
from .unet import UnetGenerator from .unet import UnetGenerator
from .rrdb_net import RRDBNet from .rrdb_net import RRDBNet
\ No newline at end of file from .makeup import GeneratorPSGANAttention
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
import functools
import numpy as np
from ...modules.norm import build_norm_layer
from .builder import GENERATORS
class PONO(paddle.nn.Layer):
def __init__(self, eps=1e-5):
super(PONO, self).__init__()
self.eps = eps
def forward(self, x):
mean = paddle.mean(x, axis=1, keepdim=True)
var = paddle.mean(paddle.square(x - mean), axis=1, keepdim=True)
tmp = (x - mean) / paddle.sqrt(var + self.eps)
return tmp
class ResidualBlock(paddle.nn.Layer):
"""Residual Block with instance normalization."""
def __init__(self, dim_in, dim_out, mode=None):
super(ResidualBlock, self).__init__()
if mode == 't':
weight_attr = False
bias_attr = False
elif mode == 'p' or (mode is None):
weight_attr = None
bias_attr = None
self.main = nn.Sequential(
nn.Conv2d(dim_in,
dim_out,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False),
nn.InstanceNorm2d(dim_out,
weight_attr=weight_attr,
bias_attr=bias_attr), nn.ReLU(),
nn.Conv2d(dim_out,
dim_out,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False),
nn.InstanceNorm2d(dim_out,
weight_attr=weight_attr,
bias_attr=bias_attr))
def forward(self, x):
"""forward"""
return x + self.main(x)
class StyleResidualBlock(paddle.nn.Layer):
"""Residual Block with instance normalization."""
def __init__(self, dim_in, dim_out):
super(StyleResidualBlock, self).__init__()
self.block1 = nn.Sequential(
nn.Conv2d(dim_in,
dim_out,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False), PONO())
ks = 3
pw = ks // 2
self.beta1 = nn.Conv2d(dim_in, dim_out, kernel_size=ks, padding=pw)
self.gamma1 = nn.Conv2d(dim_in, dim_out, kernel_size=ks, padding=pw)
self.block2 = nn.Sequential(
nn.ReLU(),
nn.Conv2d(dim_out,
dim_out,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False), PONO())
self.beta2 = nn.Conv2d(dim_in, dim_out, kernel_size=ks, padding=pw)
self.gamma2 = nn.Conv2d(dim_in, dim_out, kernel_size=ks, padding=pw)
def forward(self, x, y):
"""forward"""
x_ = self.block1(x)
b = self.beta1(y)
g = self.gamma1(y)
x_ = (g + 1) * x_ + b
x_ = self.block2(x_)
b = self.beta2(y)
g = self.gamma2(y)
x_ = (g + 1) * x_ + b
return x + x_
class MDNet(paddle.nn.Layer):
"""MDNet in PSGAN"""
def __init__(self, conv_dim=64, repeat_num=3):
super(MDNet, self).__init__()
layers = []
layers.append(
nn.Conv2d(3,
conv_dim,
kernel_size=7,
stride=1,
padding=3,
bias_attr=False))
layers.append(
nn.InstanceNorm2d(conv_dim, weight_attr=None, bias_attr=None))
layers.append(nn.ReLU())
# Down-Sampling
curr_dim = conv_dim
for i in range(2):
layers.append(
nn.Conv2d(curr_dim,
curr_dim * 2,
kernel_size=4,
stride=2,
padding=1,
bias_attr=False))
layers.append(
nn.InstanceNorm2d(curr_dim * 2,
weight_attr=None,
bias_attr=None))
layers.append(nn.ReLU())
curr_dim = curr_dim * 2
# Bottleneck
for i in range(repeat_num):
layers.append(ResidualBlock(dim_in=curr_dim, dim_out=curr_dim))
self.main = nn.Sequential(*layers)
def forward(self, x):
"""forward"""
out = self.main(x)
return out
class TNetDown(paddle.nn.Layer):
"""MDNet in PSGAN"""
def __init__(self, conv_dim=64, repeat_num=3):
super(TNetDown, self).__init__()
layers = []
layers.append(
nn.Conv2d(3,
conv_dim,
kernel_size=7,
stride=1,
padding=3,
bias_attr=False))
layers.append(
nn.InstanceNorm2d(conv_dim, weight_attr=False, bias_attr=False))
layers.append(nn.ReLU())
# Down-Sampling
curr_dim = conv_dim
for i in range(2):
layers.append(
nn.Conv2d(curr_dim,
curr_dim * 2,
kernel_size=4,
stride=2,
padding=1,
bias_attr=False))
layers.append(
nn.InstanceNorm2d(curr_dim * 2,
weight_attr=False,
bias_attr=False))
layers.append(nn.ReLU())
curr_dim = curr_dim * 2
# Bottleneck
for i in range(repeat_num):
layers.append(
ResidualBlock(dim_in=curr_dim, dim_out=curr_dim, mode='t'))
self.main = nn.Sequential(*layers)
def forward(self, x):
"""forward"""
out = self.main(x)
return out
class GetMatrix(paddle.fluid.dygraph.Layer):
def __init__(self, dim_in, dim_out):
super(GetMatrix, self).__init__()
self.get_gamma = nn.Conv2d(dim_in,
dim_out,
kernel_size=1,
stride=1,
padding=0,
bias_attr=False)
self.get_beta = nn.Conv2d(dim_in,
dim_out,
kernel_size=1,
stride=1,
padding=0,
bias_attr=False)
def forward(self, x):
gamma = self.get_gamma(x)
beta = self.get_beta(x)
return gamma, beta
class MANet(paddle.nn.Layer):
"""MANet in PSGAN"""
def __init__(self, conv_dim=64, repeat_num=3, w=0.01):
super(MANet, self).__init__()
self.encoder = TNetDown(conv_dim=conv_dim, repeat_num=repeat_num)
curr_dim = conv_dim * 4
self.w = w
self.beta = nn.Conv2d(curr_dim, curr_dim, kernel_size=3, padding=1)
self.gamma = nn.Conv2d(curr_dim, curr_dim, kernel_size=3, padding=1)
self.simple_spade = GetMatrix(curr_dim, 1) # get the makeup matrix
self.repeat_num = repeat_num
for i in range(repeat_num):
setattr(self, "bottlenecks_" + str(i),
ResidualBlock(dim_in=curr_dim, dim_out=curr_dim, mode='t'))
# Up-Sampling
self.upsamplers = []
self.up_betas = []
self.up_gammas = []
self.up_acts = []
y_dim = curr_dim
for i in range(2):
layers = []
layers.append(
nn.ConvTranspose2d(curr_dim,
curr_dim // 2,
kernel_size=4,
stride=2,
padding=1,
bias_attr=False))
layers.append(
nn.InstanceNorm2d(curr_dim // 2,
weight_attr=False,
bias_attr=False))
setattr(self, "up_acts_" + str(i), nn.ReLU())
setattr(
self, "up_betas_" + str(i),
nn.ConvTranspose2d(y_dim,
curr_dim // 2,
kernel_size=4,
stride=2,
padding=1))
setattr(
self, "up_gammas_" + str(i),
nn.ConvTranspose2d(y_dim,
curr_dim // 2,
kernel_size=4,
stride=2,
padding=1))
setattr(self, "up_samplers_" + str(i), nn.Sequential(*layers))
curr_dim = curr_dim // 2
self.img_reg = [
nn.Conv2d(curr_dim,
3,
kernel_size=7,
stride=1,
padding=3,
bias_attr=False)
]
self.img_reg = nn.Sequential(*self.img_reg)
def forward(self, x, y, x_p, y_p, consistency_mask, mask_x, mask_y):
"""forward"""
# y -> ref feature
# x -> src img
x = self.encoder(x)
_, c, h, w = x.shape
x_flat = x.reshape([-1, c, h * w])
x_flat = self.w * x_flat
if x_p is not None:
x_flat = paddle.concat([x_flat, x_p], axis=1)
_, c2, h2, w2 = y.shape
y_flat = y.reshape([-1, c2, h2 * w2])
y_flat = self.w * y_flat
if y_p is not None:
y_flat = paddle.concat([y_flat, y_p], axis=1)
a_ = paddle.matmul(x_flat, y_flat, transpose_x=True) * 200.0
# mask softmax
if consistency_mask is not None:
a_ = a_ - 100.0 * (1 - consistency_mask)
a = F.softmax(a_, axis=-1)
gamma, beta = self.simple_spade(y)
beta = beta.reshape([-1, h2 * w2, 1])
beta = paddle.matmul(a, beta)
beta = beta.reshape([-1, 1, h2, w2])
gamma = gamma.reshape([-1, h2 * w2, 1])
gamma = paddle.matmul(a, gamma)
gamma = gamma.reshape([-1, 1, h2, w2])
x = x * (1 + gamma) + beta
for i in range(self.repeat_num):
layer = getattr(self, "bottlenecks_" + str(i))
x = layer(x)
for idx in range(2):
layer = getattr(self, "up_samplers_" + str(idx))
x = layer(x)
layer = getattr(self, "up_acts_" + str(idx))
x = layer(x)
x = self.img_reg(x)
x = paddle.tanh(x)
return x, a
@GENERATORS.register()
class GeneratorPSGANAttention(paddle.nn.Layer):
def __init__(self, conv_dim=64, repeat_num=3):
super(GeneratorPSGANAttention, self).__init__()
self.ma_net = MANet(conv_dim=conv_dim, repeat_num=repeat_num)
self.md_net = MDNet(conv_dim=conv_dim, repeat_num=repeat_num)
def forward(self, x, y, x_p, y_p, consistency_mask, mask_x, mask_y):
"""forward"""
y = self.md_net(y)
out, a = self.ma_net(x, y, x_p, y_p, consistency_mask, mask_x, mask_y)
return out, a
...@@ -45,19 +45,18 @@ class GANLoss(nn.Layer): ...@@ -45,19 +45,18 @@ class GANLoss(nn.Layer):
Returns: Returns:
A label tensor filled with ground truth label, and with the size of the input A label tensor filled with ground truth label, and with the size of the input
""" """
if target_is_real: if target_is_real:
if not hasattr(self, 'target_real_tensor'): if not hasattr(self, 'target_real_tensor'):
self.target_real_tensor = paddle.fill_constant( self.target_real_tensor = paddle.full(
shape=paddle.shape(prediction), shape=paddle.shape(prediction),
value=self.target_real_label, fill_value=self.target_real_label,
dtype='float32') dtype='float32')
target_tensor = self.target_real_tensor target_tensor = self.target_real_tensor
else: else:
if not hasattr(self, 'target_fake_tensor'): if not hasattr(self, 'target_fake_tensor'):
self.target_fake_tensor = paddle.fill_constant( self.target_fake_tensor = paddle.full(
shape=paddle.shape(prediction), shape=paddle.shape(prediction),
value=self.target_fake_label, fill_value=self.target_fake_label,
dtype='float32') dtype='float32')
target_tensor = self.target_fake_tensor target_tensor = self.target_fake_tensor
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from .base_model import BaseModel
from .builder import MODELS
from .generators.builder import build_generator
from .discriminators.builder import build_discriminator
from .losses import GANLoss
from ..modules.init import init_weights
from ..solver import build_optimizer
from ..utils.image_pool import ImagePool
from ..utils.preprocess import *
from ..datasets.makeup_dataset import MakeupDataset
import numpy as np
from .vgg import vgg16
@MODELS.register()
class MakeupModel(BaseModel):
"""
This class implements the CycleGAN model, for learning image-to-image translation without paired data.
The model training requires '--dataset_mode unaligned' dataset.
By default, it uses a '--netG resnet_9blocks' ResNet generator,
a '--netD basic' discriminator (PatchGAN introduced by pix2pix),
and a least-square GANs objective ('--gan_mode lsgan').
CycleGAN paper: https://arxiv.org/pdf/1703.10593.pdf
"""
def __init__(self, opt):
"""Initialize the CycleGAN class.
Parameters:
opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions
"""
BaseModel.__init__(self, opt)
# specify the training losses you want to print out. The training/test scripts will call <BaseModel.get_current_losses>
# specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals>
visual_names_A = ['real_A', 'fake_A', 'rec_A']
visual_names_B = ['real_B', 'fake_B', 'rec_B']
if self.isTrain and self.opt.lambda_identity > 0.0: # if identity loss is used, we also visualize idt_B=G_A(B) ad idt_A=G_A(B)
visual_names_A.append('idt_B')
visual_names_B.append('idt_A')
self.visual_names = visual_names_A + visual_names_B # combine visualizations for A and B
self.vgg = vgg16(pretrained=True)
# specify the models you want to save to the disk. The training/test scripts will call <BaseModel.save_networks> and <BaseModel.load_networks>.
if self.isTrain:
self.model_names = ['G', 'D_A', 'D_B']
else: # during test time, only load Gs
self.model_names = ['G']
# define networks (both Generators and discriminators)
# The naming is different from those used in the paper.
# Code (vs. paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X)
self.netG = build_generator(opt.model.generator)
init_weights(self.netG, init_type='xavier', init_gain=1.0)
if self.isTrain: # define discriminators
self.netD_A = build_discriminator(opt.model.discriminator)
self.netD_B = build_discriminator(opt.model.discriminator)
init_weights(self.netD_A, init_type='xavier', init_gain=1.0)
init_weights(self.netD_B, init_type='xavier', init_gain=1.0)
if self.isTrain:
self.fake_A_pool = ImagePool(
opt.dataset.train.pool_size
) # create image buffer to store previously generated images
self.fake_B_pool = ImagePool(
opt.dataset.train.pool_size
) # create image buffer to store previously generated images
# define loss functions
self.criterionGAN = GANLoss(
opt.model.gan_mode) #.to(self.device) # define GAN loss.
self.criterionCycle = paddle.nn.L1Loss()
self.criterionIdt = paddle.nn.L1Loss()
self.criterionL1 = paddle.nn.L1Loss()
self.criterionL2 = paddle.nn.MSELoss()
self.build_lr_scheduler()
self.optimizer_G = build_optimizer(
opt.optimizer,
self.lr_scheduler,
parameter_list=self.netG.parameters())
# self.optimizer_D = paddle.optimizer.Adam(learning_rate=lr_scheduler_d, parameter_list=self.netD_A.parameters() + self.netD_B.parameters(), beta1=opt.beta1)
self.optimizer_DA = build_optimizer(
opt.optimizer,
self.lr_scheduler,
parameter_list=self.netD_A.parameters())
self.optimizer_DB = build_optimizer(
opt.optimizer,
self.lr_scheduler,
parameter_list=self.netD_B.parameters())
self.optimizers.append(self.optimizer_G)
# self.optimizers.append(self.optimizer_D)
self.optimizers.append(self.optimizer_DA)
self.optimizers.append(self.optimizer_DB)
self.optimizer_names.extend(
['optimizer_G', 'optimizer_DA', 'optimizer_DB'])
def set_input(self, input):
"""Unpack input data from the dataloader and perform necessary pre-processing steps.
Parameters:
input (dict): include the data itself and its metadata information.
The option 'direction' can be used to swap domain A and domain B.
"""
self.real_A = paddle.to_tensor(input['image_A'])
self.real_B = paddle.to_tensor(input['image_B'])
self.c_m = paddle.to_tensor(input['consis_mask'])
self.P_A = paddle.to_tensor(input['P_A'])
self.P_B = paddle.to_tensor(input['P_B'])
self.mask_A_aug = paddle.to_tensor(input['mask_A_aug'])
self.mask_B_aug = paddle.to_tensor(input['mask_B_aug'])
self.c_m_t = paddle.transpose(self.c_m, perm=[0, 2, 1])
if self.isTrain:
self.mask_A = paddle.to_tensor(input['mask_A'])
self.mask_B = paddle.to_tensor(input['mask_B'])
self.c_m_idt_a = paddle.to_tensor(input['consis_mask_idt_A'])
self.c_m_idt_b = paddle.to_tensor(input['consis_mask_idt_B'])
#self.hm_gt_A = self.hm_gt_A_lip + self.hm_gt_A_skin + self.hm_gt_A_eye
#self.hm_gt_B = self.hm_gt_B_lip + self.hm_gt_B_skin + self.hm_gt_B_eye
def forward(self):
"""Run forward pass; called by both functions <optimize_parameters> and <test>."""
self.fake_A, amm = self.netG(self.real_A, self.real_B, self.P_A,
self.P_B, self.c_m, self.mask_A_aug,
self.mask_B_aug) # G_A(A)
self.fake_B, _ = self.netG(self.real_B, self.real_A, self.P_B, self.P_A,
self.c_m_t, self.mask_A_aug,
self.mask_B_aug) # G_A(A)
self.rec_A, _ = self.netG(self.fake_A, self.real_A, self.P_A, self.P_A,
self.c_m_idt_a, self.mask_A_aug,
self.mask_B_aug) # G_A(A)
self.rec_B, _ = self.netG(self.fake_B, self.real_B, self.P_B, self.P_B,
self.c_m_idt_b, self.mask_A_aug,
self.mask_B_aug) # G_A(A)
def forward_test(self, input):
'''
not implement now
'''
return self.netG(input['image_A'], input['image_B'], input['P_A'],
input['P_B'], input['consis_mask'],
input['mask_A_aug'], input['mask_B_aug'])
def test(self, input):
"""Forward function used in test time.
This function wraps <forward> function in no_grad() so we don't save intermediate steps for backprop
It also calls <compute_visuals> to produce additional visualization results
"""
with paddle.no_grad():
return self.forward_test(input)
def backward_D_basic(self, netD, real, fake):
"""Calculate GAN loss for the discriminator
Parameters:
netD (network) -- the discriminator D
real (tensor array) -- real images
fake (tensor array) -- images generated by a generator
Return the discriminator loss.
We also call loss_D.backward() to calculate the gradients.
"""
# Real
pred_real = netD(real)
loss_D_real = self.criterionGAN(pred_real, True)
# Fake
pred_fake = netD(fake.detach())
loss_D_fake = self.criterionGAN(pred_fake, False)
# Combined loss and calculate gradients
loss_D = (loss_D_real + loss_D_fake) * 0.5
loss_D.backward()
return loss_D
def backward_D_A(self):
"""Calculate GAN loss for discriminator D_A"""
fake_B = self.fake_B_pool.query(self.fake_B)
self.loss_D_A = self.backward_D_basic(self.netD_A, self.real_B, fake_B)
self.losses['D_A_loss'] = self.loss_D_A
def backward_D_B(self):
"""Calculate GAN loss for discriminator D_B"""
fake_A = self.fake_A_pool.query(self.fake_A)
self.loss_D_B = self.backward_D_basic(self.netD_B, self.real_A, fake_A)
self.losses['D_B_loss'] = self.loss_D_B
def backward_G(self):
"""Calculate the loss for generators G_A and G_B"""
'''
self.loss_names = [
'G_A_vgg',
'G_B_vgg',
'G_bg_consis'
]
# specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals>
visual_names_A = ['real_A', 'fake_B', 'rec_A', 'amm_a']
visual_names_B = ['real_B', 'fake_A', 'rec_B', 'amm_b']
'''
lambda_idt = self.opt.lambda_identity
lambda_A = self.opt.lambda_A
lambda_B = self.opt.lambda_B
lambda_vgg = 5e-3
# Identity loss
if lambda_idt > 0:
self.idt_A, _ = self.netG(self.real_A, self.real_A, self.P_A,
self.P_A, self.c_m_idt_a, self.mask_A_aug,
self.mask_B_aug) # G_A(A)
self.loss_idt_A = self.criterionIdt(
self.idt_A, self.real_A) * lambda_A * lambda_idt
self.idt_B, _ = self.netG(self.real_B, self.real_B, self.P_B,
self.P_B, self.c_m_idt_b, self.mask_A_aug,
self.mask_B_aug) # G_A(A)
self.loss_idt_B = self.criterionIdt(
self.idt_B, self.real_B) * lambda_B * lambda_idt
else:
self.loss_idt_A = 0
self.loss_idt_B = 0
# GAN loss D_A(G_A(A))
self.loss_G_A = self.criterionGAN(self.netD_A(self.fake_A), True)
# GAN loss D_B(G_B(B))
self.loss_G_B = self.criterionGAN(self.netD_B(self.fake_B), True)
# Forward cycle loss || G_B(G_A(A)) - A||
self.loss_cycle_A = self.criterionCycle(self.rec_A,
self.real_A) * lambda_A
# Backward cycle loss || G_A(G_B(B)) - B||
self.loss_cycle_B = self.criterionCycle(self.rec_B,
self.real_B) * lambda_B
self.losses['G_A_adv_loss'] = self.loss_G_A
self.losses['G_B_adv_loss'] = self.loss_G_B
mask_A_lip = self.mask_A_aug[:, 0].unsqueeze(1)
mask_B_lip = self.mask_B_aug[:, 0].unsqueeze(1)
mask_A_lip_np = mask_A_lip.numpy().squeeze()
mask_B_lip_np = mask_B_lip.numpy().squeeze()
mask_A_lip_np, mask_B_lip_np, index_A_lip, index_B_lip = mask_preprocess(
mask_A_lip_np, mask_B_lip_np)
real_A = paddle.nn.clip((self.real_A + 1.0) / 2.0, 0.0, 1.0) * 255.0
real_A_np = real_A.numpy().squeeze()
real_B = paddle.nn.clip((self.real_B + 1.0) / 2.0, 0.0, 1.0) * 255.0
real_B_np = real_B.numpy().squeeze()
fake_A = paddle.nn.clip((self.fake_A + 1.0) / 2.0, 0.0, 1.0) * 255.0
fake_A_np = fake_A.numpy().squeeze()
fake_B = paddle.nn.clip((self.fake_B + 1.0) / 2.0, 0.0, 1.0) * 255.0
fake_B_np = fake_B.numpy().squeeze()
fake_match_lip_A = hisMatch(fake_A_np, real_B_np, mask_A_lip_np,
mask_B_lip_np, index_A_lip)
fake_match_lip_B = hisMatch(fake_B_np, real_A_np, mask_B_lip_np,
mask_A_lip_np, index_B_lip)
fake_match_lip_A = paddle.to_tensor(fake_match_lip_A)
fake_match_lip_A.stop_gradient = True
fake_match_lip_A = fake_match_lip_A.unsqueeze(0)
fake_match_lip_B = paddle.to_tensor(fake_match_lip_B)
fake_match_lip_B.stop_gradient = True
fake_match_lip_B = fake_match_lip_B.unsqueeze(0)
fake_A_lip_masked = fake_A * mask_A_lip
fake_B_lip_masked = fake_B * mask_B_lip
g_A_lip_loss_his = self.criterionL1(fake_A_lip_masked, fake_match_lip_A)
g_B_lip_loss_his = self.criterionL1(fake_B_lip_masked, fake_match_lip_B)
#skin
mask_A_skin = self.mask_A_aug[:, 1].unsqueeze(1)
mask_B_skin = self.mask_B_aug[:, 1].unsqueeze(1)
mask_A_skin_np = mask_A_skin.numpy().squeeze()
mask_B_skin_np = mask_B_skin.numpy().squeeze()
mask_A_skin_np, mask_B_skin_np, index_A_skin, index_B_skin = mask_preprocess(
mask_A_skin_np, mask_B_skin_np)
fake_match_skin_A = hisMatch(fake_A_np, real_B_np, mask_A_skin_np,
mask_B_skin_np, index_A_skin)
fake_match_skin_B = hisMatch(fake_B_np, real_A_np, mask_B_skin_np,
mask_A_skin_np, index_B_skin)
fake_match_skin_A = paddle.to_tensor(fake_match_skin_A)
fake_match_skin_A.stop_gradient = True
fake_match_skin_A = fake_match_skin_A.unsqueeze(0)
fake_match_skin_B = paddle.to_tensor(fake_match_skin_B)
fake_match_skin_B.stop_gradient = True
fake_match_skin_B = fake_match_skin_B.unsqueeze(0)
fake_A_skin_masked = fake_A * mask_A_skin
fake_B_skin_masked = fake_B * mask_B_skin
g_A_skin_loss_his = self.criterionL1(fake_A_skin_masked,
fake_match_skin_A)
g_B_skin_loss_his = self.criterionL1(fake_B_skin_masked,
fake_match_skin_B)
#eye
mask_A_eye = self.mask_A_aug[:, 2].unsqueeze(1)
mask_B_eye = self.mask_B_aug[:, 2].unsqueeze(1)
mask_A_eye_np = mask_A_eye.numpy().squeeze()
mask_B_eye_np = mask_B_eye.numpy().squeeze()
mask_A_eye_np, mask_B_eye_np, index_A_eye, index_B_eye = mask_preprocess(
mask_A_eye_np, mask_B_eye_np)
fake_match_eye_A = hisMatch(fake_A_np, real_B_np, mask_A_eye_np,
mask_B_eye_np, index_A_eye)
fake_match_eye_B = hisMatch(fake_B_np, real_A_np, mask_B_eye_np,
mask_A_eye_np, index_B_eye)
fake_match_eye_A = paddle.to_tensor(fake_match_eye_A)
fake_match_eye_A.stop_gradient = True
fake_match_eye_A = fake_match_eye_A.unsqueeze(0)
fake_match_eye_B = paddle.to_tensor(fake_match_eye_B)
fake_match_eye_B.stop_gradient = True
fake_match_eye_B = fake_match_eye_B.unsqueeze(0)
fake_A_eye_masked = fake_A * mask_A_eye
fake_B_eye_masked = fake_B * mask_B_eye
g_A_eye_loss_his = self.criterionL1(fake_A_eye_masked, fake_match_eye_A)
g_B_eye_loss_his = self.criterionL1(fake_B_eye_masked, fake_match_eye_B)
self.loss_G_A_his = (g_A_eye_loss_his + g_A_lip_loss_his +
g_A_skin_loss_his * 0.1) * 0.01
self.loss_G_B_his = (g_B_eye_loss_his + g_B_lip_loss_his +
g_B_skin_loss_his * 0.1) * 0.01
self.losses['G_A_his_loss'] = self.loss_G_A_his
self.losses['G_B_his_loss'] = self.loss_G_A_his
#vgg loss
vgg_s = self.vgg(self.real_A)
vgg_s.stop_gradient = True
vgg_fake_A = self.vgg(self.fake_A)
self.loss_A_vgg = self.criterionL2(vgg_fake_A,
vgg_s) * lambda_A * lambda_vgg
vgg_r = self.vgg(self.real_B)
vgg_r.stop_gradient = True
vgg_fake_B = self.vgg(self.fake_B)
self.loss_B_vgg = self.criterionL2(vgg_fake_B,
vgg_r) * lambda_B * lambda_vgg
self.loss_rec = (self.loss_cycle_A + self.loss_cycle_B +
self.loss_A_vgg + self.loss_B_vgg) * 0.2
self.loss_idt = (self.loss_idt_A + self.loss_idt_B) * 0.2
self.losses['G_A_vgg_loss'] = self.loss_A_vgg
self.losses['G_B_vgg_loss'] = self.loss_B_vgg
self.losses['G_rec_loss'] = self.loss_rec
self.losses['G_idt_loss'] = self.loss_idt
# bg consistency loss
mask_A_consis = paddle.cast(
(self.mask_A == 0), dtype='float32') + paddle.cast(
(self.mask_A == 10), dtype='float32') + paddle.cast(
(self.mask_A == 8), dtype='float32')
mask_A_consis = paddle.unsqueeze(paddle.clip(mask_A_consis, 0, 1), 1)
self.loss_G_bg_consis = self.criterionL1(
self.real_A * mask_A_consis, self.fake_A * mask_A_consis) * 0.1
# combined loss and calculate gradients
self.loss_G = self.loss_G_A + self.loss_G_B + self.loss_rec + self.loss_idt + self.loss_G_A_his + self.loss_G_B_his + self.loss_G_bg_consis
self.loss_G.backward()
def optimize_parameters(self):
"""Calculate losses, gradients, and update network weights; called in every training iteration"""
# forward
self.forward() # compute fake images and reconstruction images.
# G_A and G_B
self.set_requires_grad(
[self.netD_A, self.netD_B],
False) # Ds require no gradients when optimizing Gs
# self.optimizer_G.clear_gradients() #zero_grad() # set G_A and G_B's gradients to zero
self.backward_G() # calculate gradients for G_A and G_B
self.optimizer_G.minimize(
self.loss_G) #step() # update G_A and G_B's weights
self.optimizer_G.clear_gradients()
# self.optimizer_G.clear_gradients()
# D_A and D_B
# self.set_requires_grad([self.netD_A, self.netD_B], True)
self.set_requires_grad(self.netD_A, True)
# self.optimizer_D.clear_gradients() #zero_grad() # set D_A and D_B's gradients to zero
self.backward_D_A() # calculate gradients for D_A
self.optimizer_DA.minimize(
self.loss_D_A) #step() # update D_A and D_B's weights
self.optimizer_DA.clear_gradients() #zero_g
self.set_requires_grad(self.netD_B, True)
# self.optimizer_DB.clear_gradients() #zero_grad() # set D_A and D_B's gradients to zero
self.backward_D_B() # calculate graidents for D_B
self.optimizer_DB.minimize(
self.loss_D_B) #step() # update D_A and D_B's weights
self.optimizer_DB.clear_gradients(
) #zero_grad() # set D_A and D_B's gradients to zero
...@@ -31,7 +31,6 @@ class Pix2PixModel(BaseModel): ...@@ -31,7 +31,6 @@ class Pix2PixModel(BaseModel):
""" """
BaseModel.__init__(self, opt) BaseModel.__init__(self, opt)
# specify the training losses you want to print out. The training/test scripts will call <BaseModel.get_current_losses> # specify the training losses you want to print out. The training/test scripts will call <BaseModel.get_current_losses>
self.loss_names = ['G_GAN', 'G_L1', 'D_real', 'D_fake']
# specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals> # specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals>
self.visual_names = ['real_A', 'fake_B', 'real_B'] self.visual_names = ['real_A', 'fake_B', 'real_B']
# specify the models you want to save to the disk. # specify the models you want to save to the disk.
...@@ -81,8 +80,8 @@ class Pix2PixModel(BaseModel): ...@@ -81,8 +80,8 @@ class Pix2PixModel(BaseModel):
""" """
AtoB = self.opt.dataset.train.direction == 'AtoB' AtoB = self.opt.dataset.train.direction == 'AtoB'
self.real_A = paddle.to_variable(input['A' if AtoB else 'B']) self.real_A = paddle.to_tensor(input['A' if AtoB else 'B'])
self.real_B = paddle.to_variable(input['B' if AtoB else 'A']) self.real_B = paddle.to_tensor(input['B' if AtoB else 'A'])
self.image_paths = input['A_paths' if AtoB else 'B_paths'] self.image_paths = input['A_paths' if AtoB else 'B_paths']
...@@ -114,6 +113,9 @@ class Pix2PixModel(BaseModel): ...@@ -114,6 +113,9 @@ class Pix2PixModel(BaseModel):
else: else:
self.loss_D.backward() self.loss_D.backward()
self.losses['D_fake_loss'] = self.loss_D_fake
self.losses['D_real_loss'] = self.loss_D_real
def backward_G(self): def backward_G(self):
"""Calculate GAN and L1 loss for the generator""" """Calculate GAN and L1 loss for the generator"""
# First, G(A) should fake the discriminator # First, G(A) should fake the discriminator
...@@ -134,6 +136,9 @@ class Pix2PixModel(BaseModel): ...@@ -134,6 +136,9 @@ class Pix2PixModel(BaseModel):
else: else:
self.loss_G.backward() self.loss_G.backward()
self.losses['G_adv_loss'] = self.loss_G_GAN
self.losses['G_L1_loss'] = self.loss_G_L1
def optimize_parameters(self): def optimize_parameters(self):
# compute fake images: G(A) # compute fake images: G(A)
self.forward() self.forward()
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
from paddle.utils.download import get_weights_path_from_url
from paddle.vision.models.vgg import make_layers
cfg = [
64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512,
512, 512, 'M'
]
model_urls = {
'vgg16': ('https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams',
'89bbffc0f87d260be9b8cdc169c991c4')
}
class VGG(nn.Layer):
def __init__(self, features):
super(VGG, self).__init__()
self.features = features
def forward(self, x):
x = self.features(x)
return x
def vgg16(pretrained=False):
features = make_layers(cfg)
model = VGG(features)
if pretrained:
weight_path = get_weights_path_from_url(model_urls['vgg16'][0],
model_urls['vgg16'][1])
param = paddle.load(weight_path)
model.load_dict(param)
return model
...@@ -80,7 +80,7 @@ def calculate_gain(nonlinearity, param=None): ...@@ -80,7 +80,7 @@ def calculate_gain(nonlinearity, param=None):
@paddle.no_grad() @paddle.no_grad()
def constant_(x, value): def constant_(x, value):
temp_value = paddle.fill_constant(x.shape, x.dtype, value) temp_value = paddle.full(x.shape, value, x.dtype)
x.set_value(temp_value) x.set_value(temp_value)
return x return x
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import math
class _SpectralNorm(nn.SpectralNorm): class _SpectralNorm(nn.SpectralNorm):
......
import paddle import paddle
import functools import functools
import paddle.nn as nn import paddle.nn as nn
from .nn import Spectralnorm
class Identity(nn.Layer): class Identity(nn.Layer):
...@@ -35,6 +36,8 @@ def build_norm_layer(norm_type='instance'): ...@@ -35,6 +36,8 @@ def build_norm_layer(norm_type='instance'):
bias_attr=paddle.ParamAttr(initializer=nn.initializer.Constant(0.0), bias_attr=paddle.ParamAttr(initializer=nn.initializer.Constant(0.0),
learning_rate=0.0, learning_rate=0.0,
trainable=False)) trainable=False))
elif norm_type == 'spectral':
norm_layer = functools.partial(Spectralnorm)
elif norm_type == 'none': elif norm_type == 'none':
def norm_layer(x): def norm_layer(x):
......
...@@ -12,25 +12,8 @@ def build_lr_scheduler(cfg): ...@@ -12,25 +12,8 @@ def build_lr_scheduler(cfg):
0, epoch + 1 - cfg.start_epoch) / float(cfg.decay_epochs + 1) 0, epoch + 1 - cfg.start_epoch) / float(cfg.decay_epochs + 1)
return lr_l return lr_l
scheduler = paddle.optimizer.lr_scheduler.LambdaLR( scheduler = paddle.optimizer.lr.LambdaDecay(cfg.learning_rate,
cfg.learning_rate, lr_lambda=lambda_rule) lr_lambda=lambda_rule)
return scheduler return scheduler
else: else:
raise NotImplementedError raise NotImplementedError
# paddle.optimizer.lr_scheduler
class LinearDecay(paddle.optimizer.lr_scheduler._LRScheduler):
def __init__(self, learning_rate, step_per_epoch, start_epoch,
decay_epochs):
super(LinearDecay, self).__init__()
self.learning_rate = learning_rate
self.start_epoch = start_epoch
self.decay_epochs = decay_epochs
self.step_per_epoch = step_per_epoch
def step(self):
cur_epoch = int(self.step_num // self.step_per_epoch)
decay_rate = 1.0 - max(
0, cur_epoch + 1 - self.start_epoch) / float(self.decay_epochs + 1)
return self.create_lr_var(decay_rate * self.learning_rate)
import os import numpy as np
from tqdm import tqdm from scipy.spatial import ConvexHull
import paddle import paddle
import imageio
from scipy.spatial import ConvexHull
import numpy as np
def normalize_kp(kp_source, def normalize_kp(kp_source,
kp_driving, kp_driving,
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse import argparse
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description='Segmentron') parser = argparse.ArgumentParser(description='Segmentron')
parser.add_argument('--config-file', metavar="FILE", parser.add_argument('--config-file',
metavar="FILE",
help='config file path') help='config file path')
# cuda setting # cuda setting
parser.add_argument('--no-cuda', action='store_true', default=False, parser.add_argument('--no-cuda',
action='store_true',
default=False,
help='disables CUDA training') help='disables CUDA training')
# checkpoint and log # checkpoint and log
parser.add_argument('--resume', type=str, default=None, parser.add_argument('--resume',
type=str,
default=None,
help='put the path to resuming file if needed') help='put the path to resuming file if needed')
parser.add_argument('--load', type=str, default=None, parser.add_argument('--load',
type=str,
default=None,
help='put the path to resuming file if needed') help='put the path to resuming file if needed')
# for evaluation # for evaluation
parser.add_argument('--val-interval', type=int, default=1, parser.add_argument('--val-interval',
type=int,
default=1,
help='run validation every interval') help='run validation every interval')
parser.add_argument('--evaluate-only', action='store_true', default=False, parser.add_argument('--evaluate-only',
action='store_true',
default=False,
help='skip validation during training') help='skip validation during training')
# config options # config options
parser.add_argument('opts', help='See config for all options', parser.add_argument('opts',
default=None, nargs=argparse.REMAINDER) help='See config for all options',
default=None,
nargs=argparse.REMAINDER)
#for inference
parser.add_argument("--source_path",
default="",
metavar="FILE",
help="path to source image")
parser.add_argument("--reference_dir",
default="",
help="path to reference images")
parser.add_argument("--model_path", default="", help="model for loading")
args = parser.parse_args() args = parser.parse_args()
return args return args
\ No newline at end of file
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import numpy as np
def generate_P_from_lmks(lmks, resize, w, h):
"""generate P from lmks"""
diff_size = (64, 64)
xs, ys = np.meshgrid(np.linspace(0, resize - 1, resize),
np.linspace(0, resize - 1, resize))
xs = xs[None].repeat(68, axis=0)
ys = ys[None].repeat(68, axis=0)
fix = np.concatenate([ys, xs], axis=0)
lmks = lmks.transpose(1, 0).reshape(-1, 1, 1)
diff = fix - lmks
diff = diff.transpose(1, 2, 0)
diff = cv2.resize(diff, diff_size, interpolation=cv2.INTER_NEAREST)
diff = diff.transpose(2, 0, 1).reshape(136, -1)
norm = np.linalg.norm(diff, axis=0)
P_np = diff / norm
return P_np
def copy_area(tar, src, lms):
rect = [
int(min(lms[:, 1])) - 16,
int(min(lms[:, 0])) - 16,
int(max(lms[:, 1])) + 16 + 1,
int(max(lms[:, 0])) + 16 + 1
]
tar[rect[1]:rect[3], rect[0]:rect[2]] = \
src[rect[1]:rect[3], rect[0]:rect[2]]
src[rect[1]:rect[3], rect[0]:rect[2]] = 0
def rebound_box(mask, mask_B, mask_face):
"""solver ps"""
index_tmp = mask.nonzero()
x_index = index_tmp[0]
y_index = index_tmp[1]
index_tmp = mask_B.nonzero()
x_B_index = index_tmp[0]
y_B_index = index_tmp[1]
mask_temp = np.copy(mask)
mask_B_temp = np.copy(mask_B)
mask_temp[min(x_index) - 16:max(x_index) + 17, min(y_index) - 16:max(y_index) + 17] =\
mask_face[min(x_index) -
16:max(x_index) +
17, min(y_index) -
16:max(y_index) +
17]
mask_B_temp[min(x_B_index) - 16:max(x_B_index) + 17, min(y_B_index) - 16:max(y_B_index) + 17] =\
mask_face[min(x_B_index) -
16:max(x_B_index) +
17, min(y_B_index) -
16:max(y_B_index) +
17]
return mask_temp, mask_B_temp
def calculate_consis_mask(mask, mask_B):
h_a, w_a = mask.shape[1:]
h_b, w_b = mask_B.shape[1:]
mask_transpose = np.transpose(mask, (1, 2, 0))
mask_B_transpose = np.transpose(mask_B, (1, 2, 0))
mask = cv2.resize(mask_transpose,
dsize=(w_a // 4, h_a // 4),
interpolation=cv2.INTER_NEAREST)
mask = np.transpose(mask, (2, 0, 1))
mask_B = cv2.resize(mask_B_transpose,
dsize=(w_b // 4, h_b // 4),
interpolation=cv2.INTER_NEAREST)
mask_B = np.transpose(mask_B, (2, 0, 1))
"""calculate consistency mask between images"""
h_a, w_a = mask.shape[1:]
h_b, w_b = mask_B.shape[1:]
mask_lip = mask[0]
mask_skin = mask[1]
mask_eye = mask[2]
mask_B_lip = mask_B[0]
mask_B_skin = mask_B[1]
mask_B_eye = mask_B[2]
maskA_one_hot = np.zeros((h_a * w_a, 3))
maskA_one_hot[:, 0] = mask_skin.flatten()
maskA_one_hot[:, 1] = mask_eye.flatten()
maskA_one_hot[:, 2] = mask_lip.flatten()
maskB_one_hot = np.zeros((h_b * w_b, 3))
maskB_one_hot[:, 0] = mask_B_skin.flatten()
maskB_one_hot[:, 1] = mask_B_eye.flatten()
maskB_one_hot[:, 2] = mask_B_lip.flatten()
con_mask = np.matmul(maskA_one_hot.reshape((h_a * w_a, 3)),
np.transpose(maskB_one_hot.reshape((h_b * w_b, 3))))
con_mask = np.clip(con_mask, 0, 1)
return con_mask
def cal_hist(image):
"""
cal cumulative hist for channel list
"""
hists = []
for i in range(0, 3):
channel = image[i]
# channel = image[i, :, :]
#channel = torch.from_numpy(channel)
hist, _ = np.histogram(channel, bins=256, range=(0, 255))
#hist = torch.histc(channel, bins=256, min=0, max=256)
# refHist=hist.view(256,1)
sum = hist.sum()
pdf = [v / sum for v in hist]
for i in range(1, 256):
pdf[i] = pdf[i - 1] + pdf[i]
hists.append(pdf)
return hists
def cal_trans(ref, adj):
"""
calculate transfer function
algorithm refering to wiki item: Histogram matching
"""
table = list(range(0, 256))
for i in list(range(1, 256)):
for j in list(range(1, 256)):
if ref[i] >= adj[j - 1] and ref[i] <= adj[j]:
table[i] = j
break
table[255] = 255
return table
def histogram_matching(dstImg, refImg, index):
"""
perform histogram matching
dstImg is transformed to have the same the histogram with refImg's
index[0], index[1]: the index of pixels that need to be transformed in dstImg
index[2], index[3]: the index of pixels that to compute histogram in refImg
"""
dst_align = [dstImg[i, index[0], index[1]] for i in range(0, 3)]
ref_align = [refImg[i, index[2], index[3]] for i in range(0, 3)]
hist_ref = cal_hist(ref_align)
hist_dst = cal_hist(dst_align)
tables = [cal_trans(hist_dst[i], hist_ref[i]) for i in range(0, 3)]
mid = dst_align.copy()
for i in range(0, 3):
for k in range(0, len(index[0])):
dst_align[i][k] = tables[i][int(mid[i][k])]
for i in range(0, 3):
dstImg[i, index[0], index[1]] = dst_align[i]
return dstImg
def hisMatch(input_data, target_data, mask_src, mask_tar, index):
"""solver ps"""
mask_src = np.float32(np.clip(mask_src, 0, 1))
mask_tar = np.float32(np.clip(mask_tar, 0, 1))
input_masked = np.float32(input_data) * mask_src
target_masked = np.float32(target_data) * mask_tar
input_match = histogram_matching(input_masked, target_masked, index)
return input_match
def mask_preprocess(mask, mask_B):
"""solver ps"""
index_tmp = mask.nonzero()
x_index = index_tmp[0]
y_index = index_tmp[1]
index_tmp = mask_B.nonzero()
x_B_index = index_tmp[0]
y_B_index = index_tmp[1]
index = [x_index, y_index, x_B_index, y_B_index]
index_2 = [x_B_index, y_B_index, x_index, y_index]
return [mask, mask_B, index, index_2]
def generate_mask_aug(mask, lmks):
lms_eye_left = lmks[42:48]
lms_eye_right = lmks[36:42]
mask_eye_left = np.zeros_like(mask)
mask_eye_right = np.zeros_like(mask)
mask_face = np.float32(mask == 1) + np.float32(mask == 6)
copy_area(mask_eye_left, mask_face, lms_eye_left)
copy_area(mask_eye_right, mask_face, lms_eye_right)
mask_skin = mask_face
mask_lip = np.float32(mask == 7) + np.float32(mask == 9)
mask_eye = mask_eye_left + mask_eye_right
mask_aug = np.concatenate(
(np.expand_dims(mask_lip, 0), np.expand_dims(
mask_skin, 0), np.expand_dims(mask_eye, 0)), 0)
return mask_aug
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os import os
import sys import sys
...@@ -30,7 +31,7 @@ def main(args, cfg): ...@@ -30,7 +31,7 @@ def main(args, cfg):
# build trainer # build trainer
trainer = Trainer(cfg) trainer = Trainer(cfg)
# continue train or evaluate, checkpoint need contain epoch and optimizer info # continue train or evaluate, checkpoint need contain epoch and optimizer info
if args.resume: if args.resume:
trainer.resume(args.resume) trainer.resume(args.resume)
...@@ -50,4 +51,3 @@ if __name__ == '__main__': ...@@ -50,4 +51,3 @@ if __name__ == '__main__':
cfg = get_config(args.config_file) cfg = get_config(args.config_file)
main(args, cfg) main(args, cfg)
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册