diff --git a/applications/DAIN/demo.py b/applications/DAIN/demo.py deleted file mode 100644 index 6a4377cf984a70a74155f413fd631a0e05764342..0000000000000000000000000000000000000000 --- a/applications/DAIN/demo.py +++ /dev/null @@ -1,233 +0,0 @@ -import os, sys -import math -import random -import time -import glob -import shutil -import numpy as np -from imageio import imread, imsave -import cv2 - -import paddle.fluid as fluid - -import networks -from util import * -from my_args import args - -if __name__ == '__main__': - - DO_MiddleBurryOther = True - - video_path = args.video_path - output_path = args.output_path - frame_path_input = os.path.join(output_path, 'frames-input') - frame_path_interpolated = os.path.join(output_path, 'frames-interpolated') - frame_path_combined = os.path.join(output_path, 'frames-combined') - video_path_input = os.path.join(output_path, 'videos-input') - video_path_output = os.path.join(output_path, 'videos-output') - - if not os.path.exists(output_path): - os.makedirs(output_path) - if not os.path.exists(frame_path_input): - os.makedirs(frame_path_input) - if not os.path.exists(frame_path_interpolated): - os.makedirs(frame_path_interpolated) - if not os.path.exists(frame_path_combined): - os.makedirs(frame_path_combined) - if not os.path.exists(video_path_input): - os.makedirs(video_path_input) - if not os.path.exists(video_path_output): - os.makedirs(video_path_output) - - args.KEY_FRAME_THREAD = 0. - saved_model = args.saved_model - - timestep = args.time_step - num_frames = int(1.0 / timestep) - 1 - - image = fluid.data(name='image', - shape=[2, 1, args.channels, -1, -1], - dtype='float32') - DAIN = networks.__dict__["DAIN_slowmotion"](channel=args.channels, - filter_size=args.filter_size, - timestep=args.time_step, - training=False) - out = DAIN(image) - out = out[0][1] - - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - fetch_list = [out.name] - - inference_program = fluid.default_main_program().clone(for_test=True) - inference_program = fluid.io.load_persistables(exe, saved_model, - inference_program) - - if not DO_MiddleBurryOther: - sys.exit() - - if video_path.endswith('.mp4'): - videos = [video_path] - else: - videos = sorted(glob.glob(os.path.join(video_path, '*.mp4'))) - for cnt, vid in enumerate(videos): - print("Interpolating video:", vid) - cap = cv2.VideoCapture(vid) - fps = cap.get(cv2.CAP_PROP_FPS) - print("Old fps (frame rate): ", fps) - - timestep = args.time_step - times_interp = int(1.0 / timestep) - r2 = str(int(fps) * times_interp) - - print("New fps (frame rate): ", r2) - - # set start and end of video - #ss = 0 - #t = 10 - #ss = time.strftime('%H:%M:%S', time.gmtime(ss)) - #t = time.strftime('%H:%M:%S', time.gmtime(t)) - #print(r, ss, t) - r = None - ss = None - t = None - - out_path = dump_frames_ffmpeg(vid, frame_path_input, r, ss, t) - - vidname = vid.split('/')[-1].split('.')[0] - - tot_timer = AverageMeter() - proc_timer = AverageMeter() - end = time.time() - - frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) - - img = imread(frames[0]) - - int_width = img.shape[1] - int_height = img.shape[0] - channel = img.shape[2] - if not channel == 3: - continue - - if int_width != ((int_width >> 7) << 7): - int_width_pad = (((int_width >> 7) + 1) << 7) # more than necessary - padding_left = int((int_width_pad - int_width) / 2) - padding_right = int_width_pad - int_width - padding_left - else: - int_width_pad = int_width - padding_left = 32 - padding_right = 32 - - if int_height != ((int_height >> 7) << 7): - int_height_pad = ( - ((int_height >> 7) + 1) << 7) # more than necessary - padding_top = int((int_height_pad - int_height) / 2) - padding_bottom = int_height_pad - int_height - padding_top - else: - int_height_pad = int_height - padding_top = 32 - padding_bottom = 32 - - frame_num = len(frames) - print(os.path.join(frame_path_input, vidname, '*.png')) - print('processing {} frames, from video: {}'.format(frame_num, vid)) - - if not os.path.exists(os.path.join(frame_path_interpolated, vidname)): - os.makedirs(os.path.join(frame_path_interpolated, vidname)) - if not os.path.exists(os.path.join(frame_path_combined, vidname)): - os.makedirs(os.path.join(frame_path_combined, vidname)) - - for i in range(frame_num - 1): - print(frames[i]) - first = frames[i] - second = frames[i + 1] - - img_first = imread(first) - img_second = imread(second) - '''--------------Frame change test------------------------''' - img_first_gray = np.dot(img_first[..., :3], [0.299, 0.587, 0.114]) - img_second_gray = np.dot(img_second[..., :3], [0.299, 0.587, 0.114]) - - img_first_gray = img_first_gray.flatten(order='C') - img_second_gray = img_second_gray.flatten(order='C') - corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1] - key_frame = False - if corr < args.KEY_FRAME_THREAD: - key_frame = True - '''-------------------------------------------------------''' - - X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255 - X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255 - - if key_frame: - y_ = [ - np.transpose(255.0 * X0.clip(0, 1.0), (1, 2, 0)) - for i in range(num_frames) - ] - else: - assert (X0.shape[1] == X1.shape[1]) - assert (X0.shape[2] == X1.shape[2]) - - print("size before padding ", X0.shape) - X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \ - (padding_left, padding_right)), mode='edge') - X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \ - (padding_left, padding_right)), mode='edge') - print("size after padding ", X0.shape) - - X0 = np.expand_dims(X0, axis=0) - X1 = np.expand_dims(X1, axis=0) - - X0 = np.expand_dims(X0, axis=0) - X1 = np.expand_dims(X1, axis=0) - - X = np.concatenate((X0, X1), axis=0) - - proc_end = time.time() - o = exe.run(inference_program, - fetch_list=fetch_list, - feed={"image": X}) - y_ = o[0] - - proc_timer.update(time.time() - proc_end) - tot_timer.update(time.time() - end) - end = time.time() - print("*******current image process time \t " + - str(time.time() - proc_end) + "s ******") - - y_ = [ - np.transpose( - 255.0 * item.clip( - 0, 1.0)[0, :, padding_top:padding_top + int_height, - padding_left:padding_left + int_width], - (1, 2, 0)) for item in y_ - ] - time_offsets = [ - kk * timestep for kk in range(1, 1 + num_frames, 1) - ] - - count = 1 - for item, time_offset in zip(y_, time_offsets): - out_dir = os.path.join( - frame_path_interpolated, vidname, - "{:0>4d}_{:0>4d}.png".format(i, count)) - count = count + 1 - imsave(out_dir, np.round(item).astype(np.uint8)) - - timestep = args.time_step - num_frames = int(1.0 / timestep) - 1 - - input_dir = os.path.join(frame_path_input, vidname) - interpolated_dir = os.path.join(frame_path_interpolated, vidname) - combined_dir = os.path.join(frame_path_combined, vidname) - combine_frames(input_dir, interpolated_dir, combined_dir, num_frames) - - frame_pattern_combined = os.path.join(frame_path_combined, vidname, - '%08d.png') - video_pattern_output = os.path.join(video_path_output, vidname + '.mp4') - if os.path.exists(video_pattern_output): - os.remove(video_pattern_output) - frames_to_video_ffmpeg(frame_pattern_combined, video_pattern_output, r2) diff --git a/applications/DAIN/my_args.py b/applications/DAIN/my_args.py deleted file mode 100644 index 448c3c2215c4a5fcba61bcb79ca242d3a3db1b18..0000000000000000000000000000000000000000 --- a/applications/DAIN/my_args.py +++ /dev/null @@ -1,96 +0,0 @@ -import os -import datetime -import argparse -import numpy -import networks - -modelnames = networks.__all__ -# import datasets -datasetNames = ('Vimeo_90K_interp') #datasets.__all__ - -parser = argparse.ArgumentParser(description='DAIN') - -parser.add_argument('--debug', action='store_true', help='Enable debug mode') -parser.add_argument('--netName', - type=str, - default='DAIN', - choices=modelnames, - help='model architecture: ' + ' | '.join(modelnames) + - ' (default: DAIN)') - -parser.add_argument('--datasetName', - default='Vimeo_90K_interp', - choices=datasetNames, - nargs='+', - help='dataset type : ' + ' | '.join(datasetNames) + - ' (default: Vimeo_90K_interp)') -parser.add_argument('--video_path', - default='', - help='the path of selected videos') -parser.add_argument('--output_path', default='', help='the output root path') - -parser.add_argument('--seed', - type=int, - default=1, - help='random seed (default: 1)') - -parser.add_argument('--batch_size', - '-b', - type=int, - default=1, - help='batch size (default:1)') -parser.add_argument('--channels', - '-c', - type=int, - default=3, - choices=[1, 3], - help='channels of images (default:3)') -parser.add_argument('--filter_size', - '-f', - type=int, - default=4, - help='the size of filters used (default: 4)', - choices=[2, 4, 6, 5, 51]) - -parser.add_argument('--time_step', - type=float, - default=0.5, - help='choose the time steps') -parser.add_argument( - '--alpha', - type=float, - nargs='+', - default=[0.0, 1.0], - help= - 'the ration of loss for interpolated and rectified result (default: [0.0, 1.0])' -) -parser.add_argument('--frame_rate', - type=int, - default=None, - help='frame rate of the input video') - -parser.add_argument('--patience', - type=int, - default=5, - help='the patience of reduce on plateou') -parser.add_argument('--factor', - type=float, - default=0.2, - help='the factor of reduce on plateou') - -parser.add_argument('--saved_model', - type=str, - default='', - help='path to the model weights') -parser.add_argument('--no-date', - action='store_true', - help='don\'t append date timestamp to folder') -parser.add_argument('--use_cuda', - default=True, - type=bool, - help='use cuda or not') -parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not') -parser.add_argument('--remove_duplicates', - default=True, - type=bool, - help='remove duplicate frames or not') diff --git a/applications/DAIN/networks/__init__.py b/applications/DAIN/networks/__init__.py deleted file mode 100644 index 2462f961093ff2581353fcd74440f7c04e900ed3..0000000000000000000000000000000000000000 --- a/applications/DAIN/networks/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .dain import DAIN -from .dain_slowmotion import DAIN_slowmotion -__all__ = ('DAIN', 'DAIN_slowmotion') diff --git a/applications/DAIN/networks/dain.py b/applications/DAIN/networks/dain.py deleted file mode 100644 index 8d51c9f7c86f687bb034125850ee5bc0c5cd0272..0000000000000000000000000000000000000000 --- a/applications/DAIN/networks/dain.py +++ /dev/null @@ -1,124 +0,0 @@ -import paddle.fluid as fluid -import resblock -import pwcnet - - -class DAIN(fluid.dygraph.Layer): - def __init__(self, channel=3, filter_size=4, timestep=0.5, training=True): - # base class initialization - super(DAIN, self).__init__() - - self.filter_size = filter_size - self.training = training - self.timestep = timestep - assert (timestep == 0.5) - self.numFrames = int(1.0 / timestep) - 1 - - ctx_ch = 3 * 64 + 3 - inplanes = 3 + 3 + 3 + 2 * 1 + 2 * 2 + 16 * 2 + 2 * ctx_ch - - self.rectifyNet = resblock.__dict__['MultipleBasicBlock_4'](inplanes, - 64) - self.flownets = pwcnet.__dict__['pwc_dc_net']() - self.div_flow = 20.0 - - def forward(self, input): - """ - Parameters - ---------- - input: shape (3, batch, 3, width, height) - ----------- - """ - losses = [] - offsets = [] - ''' - STEP 1: sequeeze the input - ''' - if self.training == True: - - assert input.shape[0] == 3 - input_0 = input[0] - input_1 = input[1] - input_2 = input[2] - else: - # print(input.shape[0]) - assert input.shape[0] == 2 - input_0 = input[0] - input_2 = input[1] - - #prepare the input data of current scale - cur_input_0 = input_0 - if self.training == True: - cur_input_1 = input_1 - cur_input_2 = input_2 - ''' - STEP 3.2: concatenating the inputs. - ''' - cur_offset_input = fluid.layers.concat([cur_input_0, cur_input_2], - axis=1) - ''' - STEP 3.3: perform the estimation - ''' - time_offsets = [ - kk * self.timestep for kk in range(1, 1 + self.numFrames, 1) - ] - - cur_offset_outputs = [ - self.forward_flownets(self.flownets, - cur_offset_input, - time_offsets=time_offsets), - self.forward_flownets(self.flownets, - fluid.layers.concat( - [cur_input_2, cur_input_0], axis=1), - time_offsets=time_offsets[::-1]) - ] - - cur_offset_output = [cur_offset_outputs[0][0], cur_offset_outputs[1][0]] - - # Warp image use warp-op in PWC-Net - ref0 = self.flownets.warp_nomask(cur_input_0, cur_offset_output[0]) - ref2 = self.flownets.warp_nomask(cur_input_2, cur_offset_output[1]) - cur_output = (ref0 + ref2) / 2.0 - - rectify_input = fluid.layers.concat([ - cur_output, ref0, ref2, cur_offset_output[0], cur_offset_output[1] - ], - axis=1) - - cur_output_rectified = self.rectifyNet(rectify_input) + cur_output - ''' - STEP 3.5: for training phase, we collect the variables to be penalized. - ''' - if self.training == True: - losses += [cur_output - cur_input_1] - losses += [cur_output_rectified - cur_input_1] - offsets += [cur_offset_output] - ''' - STEP 4: return the results - ''' - if self.training == True: - # if in the training phase, we output the losses to be minimized. - # return losses, loss_occlusion - return losses, offsets - else: - cur_outputs = [cur_output, cur_output_rectified] - return cur_outputs, cur_offset_output - - def forward_flownets(self, model, input, time_offsets=None): - - if time_offsets == None: - time_offsets = [0.5] - elif type(time_offsets) == float: - time_offsets = [time_offsets] - elif type(time_offsets) == list: - pass - # this is a single direction motion results, but not a bidirectional one - temp = model(input) - - # single direction to bidirection should haven it. - temps = [ - self.div_flow * temp * time_offset for time_offset in time_offsets - ] - # nearest interpolation won't be better i think - temps = [fluid.layers.resize_bilinear(temp, scale=4) for temp in temps] - return temps diff --git a/applications/DAIN/networks/dain_slowmotion.py b/applications/DAIN/networks/dain_slowmotion.py deleted file mode 100644 index e21b3ee2ddf5f4126810e01f2c9022f62bbc3fd8..0000000000000000000000000000000000000000 --- a/applications/DAIN/networks/dain_slowmotion.py +++ /dev/null @@ -1,148 +0,0 @@ -import paddle.fluid as fluid -import resblock -import time -import pwcnet - - -class DAIN_slowmotion(fluid.dygraph.Layer): - def __init__(self, channel=3, filter_size=4, timestep=0.5, training=True): - # base class initialization - super(DAIN_slowmotion, self).__init__() - - self.filter_size = filter_size - self.training = training - self.timestep = timestep - self.num_frames = int(1.0 / timestep) - 1 - - ctx_ch = 3 * 64 + 3 - # inplanes = 3 + 3 + 3 + 2*1 + 2*2 + 2 - inplanes = 13 - - self.flownets = pwcnet.__dict__['pwc_dc_net']() - self.rectifyNet = resblock.__dict__['MultipleBasicBlock_4'](inplanes, - 64) - self.div_flow = 20.0 - - def forward(self, input): - """ - Parameters - ---------- - input: shape (3, batch, 3, width, height) - ----------- - """ - losses = [] - offsets = [] - ''' - STEP 1: sequeeze the input - ''' - if self.training == True: - - assert input.shape[0] == 3 - input_0 = input[0] - input_1 = input[1] - input_2 = input[2] - else: - assert input.shape[0] == 2 - input_0 = input[0] - input_2 = input[1] - - #prepare the input data of current scale - cur_input_0 = input_0 - if self.training == True: - cur_input_1 = input_1 - cur_input_2 = input_2 - ''' - STEP 3.2: concatenating the inputs. - ''' - cur_offset_input = fluid.layers.concat([cur_input_0, cur_input_2], - axis=1) - ''' - STEP 3.3: perform the estimation - ''' - time_offsets = [ - kk * self.timestep for kk in range(1, 1 + self.num_frames, 1) - ] - - cur_offset_outputs = [ - self.forward_flownets(self.flownets, - cur_offset_input, - time_offsets=time_offsets), - self.forward_flownets(self.flownets, - fluid.layers.concat( - [cur_input_2, cur_input_0], axis=1), - time_offsets=time_offsets[::-1]) - ] - ''' - STEP 3.4: perform the frame interpolation process - ''' - count = 0 - for temp_0, temp_1, timeoffset in zip(cur_offset_outputs[0], - cur_offset_outputs[1], - time_offsets): - cur_offset_output = [temp_0, temp_1] - - ref0 = self.flownets.warp_nomask(cur_input_0, cur_offset_output[0]) - ref2 = self.flownets.warp_nomask(cur_input_2, cur_offset_output[1]) - cur_output_temp = (ref0 + ref2) / 2.0 - - if count == 0: - cur_output = fluid.layers.unsqueeze(cur_output_temp, axes=0) - else: - cur_output_ = fluid.layers.unsqueeze(cur_output_temp, axes=0) - cur_output = fluid.layers.concat([cur_output, cur_output_], - axis=0) - - rectify_input = fluid.layers.concat([ - cur_output_temp, ref0, ref2, cur_offset_output[0], - cur_offset_output[1] - ], - axis=1) - - cur_output_rectified_temp = self.rectifyNet( - rectify_input) + cur_output_temp - - if count == 0: - cur_output_rectified = fluid.layers.unsqueeze( - cur_output_rectified_temp, axes=0) - else: - cur_output_rectified_ = fluid.layers.unsqueeze( - cur_output_rectified_temp, axes=0) - cur_output_rectified = fluid.layers.concat( - [cur_output_rectified, cur_output_rectified_], axis=0) - - count += 1 - ''' - STEP 3.5: for training phase, we collect the variables to be penalized. - ''' - if self.training == True: - losses += [cur_output - cur_input_1] - losses += [cur_output_rectified - cur_input_1] - offsets += [cur_offset_output] - ''' - STEP 4: return the results - ''' - if self.training == True: - # if in the training phase, we output the losses to be minimized. - # return losses, loss_occlusion - return losses, offsets - else: - cur_outputs = [cur_output, cur_output_rectified] - return cur_outputs, cur_offset_output - - def forward_flownets(self, model, input, time_offsets=None): - if time_offsets == None: - time_offsets = [0.5] - elif type(time_offsets) == float: - time_offsets = [time_offsets] - elif type(time_offsets) == list: - pass - # this is a single direction motion results, but not a bidirectional one - temp = model(input) - - # single direction to bidirection should haven it. - temps = [ - self.div_flow * temp * time_offset for time_offset in time_offsets - ] - # nearest interpolation won't be better i think - temps = [fluid.layers.resize_bilinear(temp, scale=4) for temp in temps] - return temps diff --git a/applications/DAIN/predict.py b/applications/DAIN/predict.py deleted file mode 100644 index 39290433b2b0cfbc2bfcce18d800740987c1d131..0000000000000000000000000000000000000000 --- a/applications/DAIN/predict.py +++ /dev/null @@ -1,277 +0,0 @@ -import os -import sys - -cur_path = os.path.abspath(os.path.dirname(__file__)) -sys.path.append(cur_path) - -import time -import glob -import numpy as np -from imageio import imread, imsave -from tqdm import tqdm -import cv2 - -import paddle.fluid as fluid -from paddle.utils.download import get_path_from_url -from ppgan.utils.video import video2frames, frames2video - -from util import * -from my_args import parser - -DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar' - - -def infer_engine(model_dir, - run_mode='fluid', - batch_size=1, - use_gpu=False, - min_subgraph_size=3): - if not use_gpu and not run_mode == 'fluid': - raise ValueError( - "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" - .format(run_mode, use_gpu)) - precision_map = { - 'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32, - 'trt_fp16': fluid.core.AnalysisConfig.Precision.Half - } - config = fluid.core.AnalysisConfig(os.path.join(model_dir, 'model'), - os.path.join(model_dir, 'params')) - if use_gpu: - # initial GPU memory(M), device ID - config.enable_use_gpu(100, 0) - # optimize graph and fuse op - config.switch_ir_optim(True) - else: - config.disable_gpu() - - if run_mode in precision_map.keys(): - config.enable_tensorrt_engine(workspace_size=1 << 10, - max_batch_size=batch_size, - min_subgraph_size=min_subgraph_size, - precision_mode=precision_map[run_mode], - use_static=False, - use_calib_mode=False) - - # disable print log when predict - config.disable_glog_info() - # enable shared memory - config.enable_memory_optim() - # disable feed, fetch OP, needed by zero_copy_run - config.switch_use_feed_fetch_ops(False) - predictor = fluid.core.create_paddle_predictor(config) - return predictor - - -def executor(model_dir, use_gpu=False): - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - program, feed_names, fetch_targets = fluid.io.load_inference_model( - dirname=model_dir, - executor=exe, - model_filename='model', - params_filename='params') - return exe, program, fetch_targets - - -class VideoFrameInterp(object): - def __init__(self, - time_step, - model_path, - video_path, - use_gpu=True, - key_frame_thread=0., - output_path='output', - remove_duplicates=True): - self.video_path = video_path - self.output_path = os.path.join(output_path, 'DAIN') - if model_path is None: - model_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path) - - self.model_path = model_path - self.time_step = time_step - self.key_frame_thread = key_frame_thread - - self.exe, self.program, self.fetch_targets = executor(model_path, - use_gpu=use_gpu) - - def run(self): - frame_path_input = os.path.join(self.output_path, 'frames-input') - frame_path_interpolated = os.path.join(self.output_path, - 'frames-interpolated') - frame_path_combined = os.path.join(self.output_path, 'frames-combined') - video_path_output = os.path.join(self.output_path, 'videos-output') - - if not os.path.exists(self.output_path): - os.makedirs(self.output_path) - if not os.path.exists(frame_path_input): - os.makedirs(frame_path_input) - if not os.path.exists(frame_path_interpolated): - os.makedirs(frame_path_interpolated) - if not os.path.exists(frame_path_combined): - os.makedirs(frame_path_combined) - if not os.path.exists(video_path_output): - os.makedirs(video_path_output) - - timestep = self.time_step - num_frames = int(1.0 / timestep) - 1 - - if self.video_path.endswith('.mp4'): - videos = [self.video_path] - else: - videos = sorted(glob.glob(os.path.join(self.video_path, '*.mp4'))) - - for cnt, vid in enumerate(videos): - print("Interpolating video:", vid) - cap = cv2.VideoCapture(vid) - fps = cap.get(cv2.CAP_PROP_FPS) - print("Old fps (frame rate): ", fps) - - times_interp = int(1.0 / timestep) - r2 = str(int(fps) * times_interp) - print("New fps (frame rate): ", r2) - - out_path = video2frames(vid, frame_path_input) - - vidname = vid.split('/')[-1].split('.')[0] - - tot_timer = AverageMeter() - proc_timer = AverageMeter() - end = time.time() - - frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) - if remove_duplicates: - frames = remove_duplicates(out_path) - - img = imread(frames[0]) - - int_width = img.shape[1] - int_height = img.shape[0] - channel = img.shape[2] - if not channel == 3: - continue - - if int_width != ((int_width >> 7) << 7): - int_width_pad = ( - ((int_width >> 7) + 1) << 7) # more than necessary - padding_left = int((int_width_pad - int_width) / 2) - padding_right = int_width_pad - int_width - padding_left - else: - int_width_pad = int_width - padding_left = 32 - padding_right = 32 - - if int_height != ((int_height >> 7) << 7): - int_height_pad = ( - ((int_height >> 7) + 1) << 7) # more than necessary - padding_top = int((int_height_pad - int_height) / 2) - padding_bottom = int_height_pad - int_height - padding_top - else: - int_height_pad = int_height - padding_top = 32 - padding_bottom = 32 - - frame_num = len(frames) - print('processing {} frames, from video: {}'.format(frame_num, vid)) - - if not os.path.exists(os.path.join(frame_path_interpolated, - vidname)): - os.makedirs(os.path.join(frame_path_interpolated, vidname)) - if not os.path.exists(os.path.join(frame_path_combined, vidname)): - os.makedirs(os.path.join(frame_path_combined, vidname)) - - for i in tqdm(range(frame_num - 1)): - first = frames[i] - second = frames[i + 1] - - img_first = imread(first) - img_second = imread(second) - '''--------------Frame change test------------------------''' - img_first_gray = np.dot(img_first[..., :3], - [0.299, 0.587, 0.114]) - img_second_gray = np.dot(img_second[..., :3], - [0.299, 0.587, 0.114]) - - img_first_gray = img_first_gray.flatten(order='C') - img_second_gray = img_second_gray.flatten(order='C') - corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1] - key_frame = False - if corr < self.key_frame_thread: - key_frame = True - '''-------------------------------------------------------''' - - X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255 - X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255 - - assert (X0.shape[1] == X1.shape[1]) - assert (X0.shape[2] == X1.shape[2]) - - X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \ - (padding_left, padding_right)), mode='edge') - X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \ - (padding_left, padding_right)), mode='edge') - - X0 = np.expand_dims(X0, axis=0) - X1 = np.expand_dims(X1, axis=0) - - X0 = np.expand_dims(X0, axis=0) - X1 = np.expand_dims(X1, axis=0) - - X = np.concatenate((X0, X1), axis=0) - - proc_end = time.time() - o = self.exe.run(self.program, - fetch_list=self.fetch_targets, - feed={"image": X}) - - y_ = o[0] - - proc_timer.update(time.time() - proc_end) - tot_timer.update(time.time() - end) - end = time.time() - - y_ = [ - np.transpose( - 255.0 * item.clip( - 0, 1.0)[0, :, padding_top:padding_top + int_height, - padding_left:padding_left + int_width], - (1, 2, 0)) for item in y_ - ] - time_offsets = [ - kk * timestep for kk in range(1, 1 + num_frames, 1) - ] - - count = 1 - for item, time_offset in zip(y_, time_offsets): - out_dir = os.path.join( - frame_path_interpolated, vidname, - "{:0>6d}_{:0>4d}.png".format(i, count)) - count = count + 1 - imsave(out_dir, np.round(item).astype(np.uint8)) - - num_frames = int(1.0 / timestep) - 1 - - input_dir = os.path.join(frame_path_input, vidname) - interpolated_dir = os.path.join(frame_path_interpolated, vidname) - combined_dir = os.path.join(frame_path_combined, vidname) - combine_frames(input_dir, interpolated_dir, combined_dir, - num_frames) - - frame_pattern_combined = os.path.join(frame_path_combined, vidname, - '%08d.png') - video_pattern_output = os.path.join(video_path_output, - vidname + '.mp4') - if os.path.exists(video_pattern_output): - os.remove(video_pattern_output) - frames2video(frame_pattern_combined, video_pattern_output, r2) - - return frame_pattern_combined, video_pattern_output - - -if __name__ == '__main__': - args = parser.parse_args() - predictor = VideoFrameInterp(args.time_step, - args.saved_model, - args.video_path, - args.output_path, - remove_duplicates=args.remove_duplicates) - predictor.run() diff --git a/applications/DAIN/pwcnet/__init__.py b/applications/DAIN/pwcnet/__init__.py deleted file mode 100644 index bf51e1daaa571b2e005e3268f198ae9b97ac770e..0000000000000000000000000000000000000000 --- a/applications/DAIN/pwcnet/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .pwcnet import * diff --git a/applications/DAIN/pwcnet/correlation_op/README.md b/applications/DAIN/pwcnet/correlation_op/README.md deleted file mode 100644 index d413853e0038182a40901d06aae71d8d0fe66956..0000000000000000000000000000000000000000 --- a/applications/DAIN/pwcnet/correlation_op/README.md +++ /dev/null @@ -1,13 +0,0 @@ -自定义OP编译: -2. sh make.sh编译成correlation_lib.so动态库 -3. 添加动态库路径到LD_LIBRARY_PATH: -``` -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python3.7 -c 'import paddle; print(paddle.sysconfig.get_lib())'` -``` -4. 添加correlation op的python路径: -``` -export PYTHONPATH=$PYTHONPATH:`pwd` -``` -5. python test_correlation.py运行单测,验证是否加载成功。 - -PS: 如果paddle whl包是从官网上下载的,需要使用gcc 4.8,即把make.sh中的g++ 改为 g++-4.8 diff --git a/applications/DAIN/pwcnet/correlation_op/correlation.py b/applications/DAIN/pwcnet/correlation_op/correlation.py deleted file mode 100644 index 6a75d6a0fbd94b2b2957924c69bb26abf005512b..0000000000000000000000000000000000000000 --- a/applications/DAIN/pwcnet/correlation_op/correlation.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle.fluid as fluid -import os -file_dir = os.path.dirname(os.path.abspath(__file__)) -fluid.load_op_library(os.path.join(file_dir, 'correlation_lib.so')) - -from paddle.fluid.layer_helper import LayerHelper - - -def correlation(input1, - input2, - pad_size, - kernel_size, - max_displacement, - stride1, - stride2, - corr_type_multiply=1): - helper = LayerHelper("correlation", **locals()) - output = helper.create_variable_for_type_inference(dtype=input1.dtype) - helper.append_op(type="correlation", - inputs={ - "Input1": input1, - "Input2": input2 - }, - attrs={ - "pad_size": pad_size, - "kernel_size": kernel_size, - "max_displacement": max_displacement, - "stride1": stride1, - "stride2": stride2, - "corr_type_multiply": corr_type_multiply - }, - outputs={"Output": output}) - return output diff --git a/applications/DAIN/pwcnet/correlation_op/correlation_op.cc b/applications/DAIN/pwcnet/correlation_op/correlation_op.cc deleted file mode 100644 index 0c638cfe470f9f546c4cf41e79a2393758d3c548..0000000000000000000000000000000000000000 --- a/applications/DAIN/pwcnet/correlation_op/correlation_op.cc +++ /dev/null @@ -1,137 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; - -inline std::vector CorrelationOutputSize(int batch, int input_height, int input_width, int stride1, int stride2, int kernel_size, int pad_size, int max_displacement) { - - std::vector output_shape({batch}); - int kernel_radius = (kernel_size - 1) / 2; - int border_radius = kernel_radius + max_displacement; - int padded_input_height = input_height + 2 * pad_size; - int padded_input_width = input_width + 2 * pad_size; - int output_channel = ((max_displacement/stride2) * 2 + 1) * ((max_displacement/stride2) * 2 + 1); - output_shape.push_back(output_channel); - int output_height = std::ceil(static_cast(padded_input_height - 2 * border_radius) / static_cast(stride1)); - int output_width = std::ceil(static_cast(padded_input_width - 2 * border_radius) / static_cast(stride1)); - output_shape.push_back(output_height); - output_shape.push_back(output_width); - return output_shape; -} - -class CorrelationOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override{ - AddInput("Input1", "input1"); - AddInput("Input2", "input2"); - AddOutput("Output", "output"); - AddAttr("pad_size", "pad size for input1 and input2"); - AddAttr("kernel_size", "kernel size of input1 and input2"); - AddAttr("max_displacement", "max displacement of input1 and input2"); - AddAttr("stride1", "Input1 stride"); - AddAttr("stride2", "Input2 stride"); - AddAttr("corr_type_multiply", "correlation coefficient").SetDefault(1); - AddComment(R"DOC(Correlation of two feature map. Only support NCHW data format.)DOC"); - } -}; - -class CorrelationOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override{ - PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(input1) cannot be null"); - PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(input2) cannot be null"); - int stride1 = ctx->Attrs().Get("stride1"); - int stride2 = ctx->Attrs().Get("stride2"); - int max_displacement = ctx->Attrs().Get("max_displacement"); - int pad_size = ctx->Attrs().Get("pad_size"); - int kernel_size = ctx->Attrs().Get("kernel_size"); - - auto in_dims = ctx->GetInputDim("Input1"); - auto in2_dims = ctx->GetInputDim("Input2"); - PADDLE_ENFORCE_EQ(in_dims.size() == 4, true, "input1 must be 4-dims"); - PADDLE_ENFORCE_EQ(in2_dims.size() == 4, true, "input2 must be 4-dims"); - std::vector output_shape = CorrelationOutputSize(in_dims[0], in_dims[2], in_dims[3], stride1, stride2, kernel_size, pad_size, max_displacement); - ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); - } - - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override{ - auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input1"); - PADDLE_ENFORCE_EQ(input_data_type, ctx.Input("Input2")->type(), "Input1 and Input2 shoule have same type"); - return framework::OpKernelType(input_data_type, ctx.GetPlace()); - } -}; - -template -class CorrelationOpGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("correlation_grad"); - op->SetInput("Input1", this->Input("Input1")); - op->SetInput("Input2", this->Input("Input2")); - op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output")); - op->SetOutput(framework::GradVarName("Input1"), this->InputGrad("Input1")); - op->SetOutput(framework::GradVarName("Input2"), this->InputGrad("Input2")); - op->SetAttrMap(this->Attrs()); - } -}; - -class CorrelationOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override{ - PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(Input1) should not be null"); - PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(Input2) should not be null"); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Output")), true, "Input(Output@GRAD) should not be null"); - - auto in1_dims = ctx->GetInputDim("Input1"); - auto in2_dims = ctx->GetInputDim("Input2"); - ctx->SetOutputDim(framework::GradVarName("Input1"), in1_dims); - ctx->SetOutputDim(framework::GradVarName("Input2"), in1_dims); - } - - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override{ - const auto* var = ctx.InputVar(framework::GradVarName("Output")); - if (var == nullptr) { - PADDLE_THROW("cannot find Output@GRAD"); - } - return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(ctx, "Input1"), ctx.GetPlace()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(correlation, ops::CorrelationOp, ops::CorrelationOpMaker, - ops::CorrelationOpGradMaker, - ops::CorrelationOpGradMaker); -REGISTER_OPERATOR(correlation_grad, ops::CorrelationOpGrad); diff --git a/applications/DAIN/pwcnet/correlation_op/correlation_op.cu b/applications/DAIN/pwcnet/correlation_op/correlation_op.cu deleted file mode 100644 index 161844430fe4b9dfeaf80dbe127d802d67a6de76..0000000000000000000000000000000000000000 --- a/applications/DAIN/pwcnet/correlation_op/correlation_op.cu +++ /dev/null @@ -1,434 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include "paddle/fluid/framework/op_registry.h" - -#define THREADS_PER_BLOCK 32 -#define FULL_MASK 0xffffffff - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; - -template -__forceinline__ __device__ T warpReduceSum(T val) { - for (int offset = 16; offset > 0; offset /= 2) { - val += __shfl_down_sync(FULL_MASK, val, offset); - } - return val; -} - -template -__forceinline__ __device__ T blockReduceSum(T val) { - static __shared__ T shared[32]; - int lane = threadIdx.x % warpSize; - int wid = threadIdx.x / warpSize; - - val = warpReduceSum(val); - if (lane == 0) - shared[wid] = val; - - __syncthreads(); - val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0; - - if (wid == 0) - val = warpReduceSum(val); - - return val; -} - -template -__global__ void set_zero(T *x, int num) { - for(int i = blockIdx.x * blockDim.x + threadIdx.x; i < num; i += blockDim.x * gridDim.x) - x[i] = static_cast(0); -} - -template -__global__ void channel_first(const T *input, T *rinput, const int channel, const int height, const int width, const int pad_size) { - int n = blockIdx.x; - int h = blockIdx.y; - int w = blockIdx.z; - - int ch_off = threadIdx.x; - T value; - int dimchw = channel * height * width; - int dimhw = height * width; - - int p_dimw = (width + 2 * pad_size); - int p_dimh = (height + 2 * pad_size); - int p_dimchw = channel * p_dimw * p_dimh; - int p_dimcw = channel * p_dimw; - - for (int c = ch_off; c < channel; c += THREADS_PER_BLOCK) { - value = input[n * dimchw + c * dimhw + h * width + w]; - rinput[n * p_dimchw + (h + pad_size) * p_dimcw + (w + pad_size) * channel + c] = value; - } -} - -template -__global__ void correlation_forward(T *output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int input_channel, const int input_height, const int input_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) { - - int p_input_width = input_width + 2 * pad_size; - int p_input_height = input_height + 2 * pad_size; - - int kernel_rad = (kernel_size - 1) / 2; - int displacement_rad = max_displacement / stride2; - - int displacement_size = 2 * displacement_rad + 1; - - int n = blockIdx.x; - int h1 = blockIdx.y * stride1 + max_displacement; - int w1 = blockIdx.z * stride1 + max_displacement; - int c = threadIdx.x; - - int p_dimchw = p_input_height * p_input_width * input_channel; - int p_dimcw = p_input_width * input_channel; - int p_dimc = input_channel; - - int t_dimchw = output_channel * output_height * output_width; - int t_dimhw = output_height * output_width; - int t_dimw = output_width; - - int nelems = kernel_size * kernel_size * p_dimc; - - for (int tj = -displacement_rad; tj <= displacement_rad; ++tj) { - for(int ti = -displacement_rad; ti <= displacement_rad; ++ti) { - int w2 = w1 + ti * stride2; - int h2 = h1 + tj * stride2; - - T acc0 = 0; - for(int j = -kernel_rad; j <= kernel_rad; ++j) { - for(int i = -kernel_rad; i <= kernel_rad; ++i) { - for(int ch = c; ch < p_dimc; ch += blockDim.x) { - int index1 = n * p_dimchw + (h1 + j) * p_dimcw + (w1 + i) * p_dimc + ch; - int index2 = n * p_dimchw + (h2 + j) * p_dimcw + (w2 + i) * p_dimc + ch; - acc0 += static_cast(rinput1[index1] * rinput2[index2]); - } - } - } - if (blockDim.x == warpSize) { - __syncwarp(); - acc0 = warpReduceSum(acc0); - } else { - __syncthreads(); - acc0 = blockReduceSum(acc0); - } - - if (threadIdx.x == 0) { - int tc = (tj + displacement_rad) * displacement_size + (ti + displacement_rad); - const int t_index = n * t_dimchw + tc * t_dimhw + blockIdx.y * t_dimw + blockIdx.z; - output[t_index] = static_cast(acc0 / nelems); - } - } - } - -} - -//class CorrelationKernel -template -class CorrelationKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must be CUDAPlace"); - - auto *input1 = ctx.Input("Input1"); - auto *input2 = ctx.Input("Input2"); - int pad_size = ctx.Attr("pad_size"); - int kernel_size = ctx.Attr("kernel_size"); - int stride1 = ctx.Attr("stride1"); - int stride2 = ctx.Attr("stride2"); - int max_displacement = ctx.Attr("max_displacement"); - int corr_type_multiply = ctx.Attr("corr_type_multiply"); - - auto *output = ctx.Output("Output"); - output->mutable_data(ctx.GetPlace()); - auto &dev_ctx = ctx.template device_context(); - - // base on input1, NCHW - auto in_dims = input1->dims(); - int N = in_dims[0]; - int C = in_dims[1]; - int H = in_dims[2]; - int W = in_dims[3]; - - int padded_input_height = H + 2 * pad_size; - int padded_input_width = W + 2 * pad_size; - - Tensor rinput1 = ctx.AllocateTmpTensor({N, padded_input_height, padded_input_width, C}, dev_ctx); - rinput1.mutable_data(ctx.GetPlace()); - - Tensor rinput2 = ctx.AllocateTmpTensor({N, padded_input_height, padded_input_width, C}, dev_ctx); - rinput2.mutable_data(ctx.GetPlace()); - - set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data(), rinput1.numel()); - set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data(), rinput2.numel()); - set_zero<<<(output->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(output->data(), output->numel()); - - auto out_dims = output->dims(); - int OC = out_dims[1]; - int OH = out_dims[2]; - int OW = out_dims[3]; - - dim3 blocks_grid(N, H, W); - dim3 threads_block(THREADS_PER_BLOCK); - - channel_first<<>>(input1->data(), rinput1.data(), C, H, W, pad_size); - channel_first<<>>(input2->data(), rinput2.data(), C, H, W, pad_size); - - dim3 threadsPerBlock(THREADS_PER_BLOCK); - dim3 totalBlocksCorr(N, OH, OW); - - correlation_forward<<>>(output->data(), OC, OH, OW, rinput1.data(), -C, H, W, rinput2.data(), pad_size, kernel_size, max_displacement, stride1, stride2); - } -}; - -template -__global__ void correlation_backward_input1(int item, T *grad_input1, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) { - - int n = item; - int h = blockIdx.x * stride1 + pad_size; - int w = blockIdx.y * stride1 + pad_size; - int c = blockIdx.z; - int tch_off = threadIdx.x; - - int kernel_rad = (kernel_size - 1) / 2; - int displacement_rad = max_displacement / stride2; - int displacement_size = 2 * displacement_rad + 1; - - int xmin = (w - kernel_rad - max_displacement) / stride1; - int ymin = (h - kernel_rad - max_displacement) / stride1; - - int xmax = (w + kernel_rad - max_displacement) / stride1; - int ymax = (h + kernel_rad - max_displacement) / stride1; - - if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) { - return; - } - - if (xmin > xmax || ymin > ymax) { - return; - } - - xmin = max(0, xmin); - xmax = min(output_width - 1, xmax); - - ymin = max(0, ymin); - ymax = min(output_height - 1, ymax); - - int p_input_width = input_width + 2 * pad_size; - int p_input_height = input_height + 2 * pad_size; - int p_dimchw = input_channel * p_input_height * p_input_width; - int p_dimcw = input_channel * p_input_width; - int p_dimc = input_channel; - - int t_dimchw = output_channel * output_height * output_width; - int t_dimhw = output_height * output_width; - int t_dimw = output_width; - - int o_dimchw = input_channel * input_height * input_width; - int o_dimhw = input_height * input_width; - int o_dimw = input_width; - - int nelems = kernel_size * kernel_size * input_channel; - - __shared__ T prod_sum[THREADS_PER_BLOCK]; - prod_sum[tch_off] = 0; - - for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) { - int i2 = (tc % displacement_size - displacement_rad) * stride2; - int j2 = (tc / displacement_size - displacement_rad) * stride2; - - int index2 = n * p_dimchw + (h + j2) * p_dimcw + (w + i2) * p_dimc + c; - - T val2 = rinput2[index2]; - for (int j = ymin; j <= ymax; ++j) { - for (int i = xmin; i <= xmax; ++i) { - int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i; - prod_sum[tch_off] += grad_output[t_index] * val2; - } - } - } - - __syncthreads(); - - if (tch_off == 0) { - T reduce_sum = 0; - for (int index = 0; index < THREADS_PER_BLOCK; index++) { - reduce_sum += prod_sum[index]; - } - const int index1 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size); - grad_input1[index1] = static_cast(reduce_sum / nelems); - } - -} - -template -__global__ void correlation_backward_input2(int item, T *grad_input2, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2){ - - int n = item; - int h = blockIdx.x * stride1 + pad_size; - int w = blockIdx.y * stride1 + pad_size; - int c = blockIdx.z; - - int tch_off = threadIdx.x; - - int kernel_rad = (kernel_size - 1) / 2; - int displacement_rad = max_displacement / stride2; - int displacement_size = 2 * displacement_rad + 1; - - int p_input_width = input_width + 2 * pad_size; - int p_input_height = input_height + 2 * pad_size; - int p_dimchw = input_channel * p_input_height * p_input_width; - int p_dimcw = input_channel * p_input_width; - int p_dimc = input_channel; - - int t_dimchw = output_channel * output_height * output_width; - int t_dimhw = output_height * output_width; - int t_dimw = output_width; - - int o_dimchw = input_channel * input_height * input_width; - int o_dimhw = input_height * input_width; - int o_dimw = input_width; - - int nelems = kernel_size * kernel_size * input_channel; - - __shared__ T prod_sum[THREADS_PER_BLOCK]; - prod_sum[tch_off] = 0; - - for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) { - int i2 = (tc % displacement_size - displacement_rad) * stride2; - int j2 = (tc / displacement_size - displacement_rad) * stride2; - - int xmin = (w - kernel_rad - max_displacement - i2) / stride1; - int ymin = (h - kernel_rad - max_displacement - j2) / stride1; - - int xmax = (w + kernel_rad - max_displacement - i2) / stride1; - int ymax = (h + kernel_rad - max_displacement - j2) / stride1; - - if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) { - continue; - } - - if (xmin > xmax || ymin > ymax) { - continue; - } - - xmin = max(0, xmin); - xmax = min(output_width - 1, xmax); - - ymin = max(0, ymin); - ymax = min(output_height - 1, ymax); - - int index1 = n * p_dimchw + (h - j2) * p_dimcw + (w - i2) * p_dimc + c; - T val1 = rinput1[index1]; - for (int j = ymin; j <= ymax; ++j) { - for (int i = xmin; i <= xmax; ++i) { - int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i; - prod_sum[tch_off] += grad_output[t_index] * val1; - } - } - } - - __syncthreads(); - - if (tch_off == 0) { - T reduce_sum = 0; - for (int index = 0; index < THREADS_PER_BLOCK; index++) { - reduce_sum += prod_sum[index]; - } - const int index2 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size); - grad_input2[index2] = static_cast(reduce_sum / nelems); - } -} - -template -class CorrelationGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must use CUDAPlace."); - const auto *input1 = ctx.Input("Input1"); - const auto *input2 = ctx.Input("Input2"); - const auto *grad_output = ctx.Input(framework::GradVarName("Output")); - const int pad_size = ctx.Attr("pad_size"); - const int kernel_size = ctx.Attr("kernel_size"); - const int stride1 = ctx.Attr("stride1"); - const int stride2 = ctx.Attr("stride2"); - const int max_displacement = ctx.Attr("max_displacement"); - const int corr_type_multiply = ctx.Attr("corr_type_multiply"); - - auto *grad_input1 = ctx.Output(framework::GradVarName("Input1")); - grad_input1->mutable_data(ctx.GetPlace()); - auto *grad_input2 = ctx.Output(framework::GradVarName("Input2")); - grad_input2->mutable_data(ctx.GetPlace()); - auto &dev_ctx = ctx.template device_context(); - - auto in_dims = input1->dims(); - int N = in_dims[0]; - int C = in_dims[1]; - int H = in_dims[2]; - int W = in_dims[3]; - - int padded_input_height = H + 2 * pad_size; - int padded_input_width = W + 2 * pad_size; - - Tensor rinput1 = ctx.AllocateTmpTensor({N, padded_input_height, padded_input_width, C}, dev_ctx); - rinput1.mutable_data(ctx.GetPlace()); - - Tensor rinput2 = ctx.AllocateTmpTensor({N, padded_input_height, padded_input_width, C}, dev_ctx); - rinput2.mutable_data(ctx.GetPlace()); - - set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data(), rinput1.numel()); - set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data(), rinput2.numel()); - set_zero<<<(grad_input1->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input1->data(), grad_input1->numel()); - set_zero<<<(grad_input2->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input2->data(), grad_input2->numel()); - - auto grad_out_dims = grad_output->dims(); - int GOC = grad_out_dims[1]; - int GOH = grad_out_dims[2]; - int GOW = grad_out_dims[3]; - - dim3 blocks_grid(N, H, W); - dim3 threads_block(THREADS_PER_BLOCK); - - channel_first<<>>(input1->data(), rinput1.data(), C, H, W, pad_size); - channel_first<<>>(input2->data(), rinput2.data(), C, H, W, pad_size); - - dim3 threadsPerBlock(THREADS_PER_BLOCK); - dim3 totalBlocksCorr(H, W, C); - - for (int n = 0; n < N; n++) { - correlation_backward_input1<<>>(n, grad_input1->data(), C, H, W, grad_output->data(), GOC, GOH, GOW, rinput2.data(), pad_size, kernel_size, max_displacement, stride1, stride2); - } - - for (int n = 0; n < N; n++) { - correlation_backward_input2<<>>(n, grad_input2->data(), C, H, W, grad_output->data(), GOC, GOH, GOW, rinput1.data(), pad_size, kernel_size, max_displacement, stride1, stride2); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL( - correlation, ops::CorrelationKernel, - ops::CorrelationKernel); -REGISTER_OP_CUDA_KERNEL( - correlation_grad, ops::CorrelationGradKernel, - ops::CorrelationGradKernel); - diff --git a/applications/DAIN/pwcnet/correlation_op/make.sh b/applications/DAIN/pwcnet/correlation_op/make.sh deleted file mode 100644 index 1aad7ebb463d3e581fccdb35be8de08163c00009..0000000000000000000000000000000000000000 --- a/applications/DAIN/pwcnet/correlation_op/make.sh +++ /dev/null @@ -1,31 +0,0 @@ -# source /ssd1/vis/liufanglong/.bashrc -#export PATH=/home/work/cuda-9.0/bin:$PATH -#export PATH=/home/work/cuda-9.0/bin:$PATH -#export LD_LIBRARY_PATH="/home/work/cuda-9.0/lib64:$LD_LIBRARY_PATH" -#export LD_LIBRARY_PATH=/home/vis/chao/local/cudnn_v7.6/cuda/lib64:$LD_LIBRARY_PATH -#export CPLUS_INCLUDE_PATH=/home/vis/chao/local/cudnn_v7.6/cuda/include:/ssd1/vis/liufanglong/local/fluid_1.1.0_for_slurm/nccl_2.3.5/include:$CPLUS_INCLUDE_PATH -#export LD_LIBRARY_PATH=/ssd1/vis/liufanglong/local/fluid_1.1.0_for_slurm/nccl_2.3.5/lib:$LD_LIBRARY_PATH - -include_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_include())' ) -lib_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_lib())' ) - -echo $include_dir -echo $lib_dir - -OPS='correlation_op' -for op in ${OPS} -do -nvcc ${op}.cu -c -o ${op}.cu.o -ccbin cc -DPADDLE_WITH_CUDA -DEIGEN_USE_GPU -DPADDLE_USE_DSO -DPADDLE_WITH_MKLDNN -Xcompiler -fPIC -std=c++11 -Xcompiler -fPIC -w --expt-relaxed-constexpr -O0 -g -DNVCC \ - -I ${include_dir}/third_party/ \ - -I ${include_dir} -done - -# g++-4.8 correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \ -# g++ ${OPS}.cu.o ${OPS}.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \ -g++ correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \ - -I ${include_dir}/third_party/ \ - -I ${include_dir} \ - -L ${lib_dir} \ - -L /usr/local/cuda/lib64/ -lpaddle_framework -lcudart - -# rm *.cu.o diff --git a/applications/DAIN/pwcnet/correlation_op/test_correlation.py b/applications/DAIN/pwcnet/correlation_op/test_correlation.py deleted file mode 100644 index 6372b14744772a0ddcb15a0b1e6dbd53311f12b5..0000000000000000000000000000000000000000 --- a/applications/DAIN/pwcnet/correlation_op/test_correlation.py +++ /dev/null @@ -1,136 +0,0 @@ -import unittest -from correlation import correlation -import numpy as np -import paddle.fluid as fluid -from paddle.fluid.dygraph.base import to_variable - - -def corr(x_1, - x_2, - pad_size=4, - kernel_size=1, - max_displacement=4, - stride1=1, - stride2=1, - corr_multiply=1): - K = kernel_size - # rinput1 = np.pad(x_1, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3) - # rinput2 = np.pad(x_2, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3) - - rinput1 = np.pad(x_1, ((0, 0), (0, 0), (pad_size, pad_size), - (pad_size, pad_size)), - mode='constant') - rinput2 = np.pad(x_2, ((0, 0), (0, 0), (pad_size, pad_size), - (pad_size, pad_size)), - mode='constant') - rinput1 = np.transpose(rinput1, (0, 2, 3, 1)) - rinput2 = np.transpose(rinput2, (0, 2, 3, 1)) - B = int(rinput1.shape[0]) - H = int(x_1.shape[2]) - W = int(x_2.shape[3]) - d = max_displacement - D = 2 * d + 1 - output = np.zeros((B, D * D, H, W), dtype=np.float32) - - for b in range(B): - for i in range(H): - for j in range(W): - for k in range(-d, d + 1): - for l in range(-d, d + 1): - x1_index = i + pad_size - y1_index = j + pad_size - x2_index = x1_index + k - y2_index = y1_index + l - output[b, l + d + D * (k + d), i, - j] = np.mean(rinput1[b, x1_index:x1_index + K, - y1_index:y1_index + K] * - rinput2[b, x2_index:x2_index + K, - y2_index:y2_index + K]) - - return output - - -class TestCorrelationOp(unittest.TestCase): - def test_check_output(self): - #x_shape = (1, 196, 3, 3) - np.random.seed(13) - np.set_printoptions(threshold=np.inf) - x_shape = (2, 10, 3, 3) - x_type = 'float32' - x1 = fluid.layers.data(name='x1', - shape=x_shape, - dtype=x_type, - append_batch_size=False) - x2 = fluid.layers.data(name='x2', - shape=x_shape, - dtype=x_type, - append_batch_size=False) - - x1_np = np.random.randn(2, 3, 4, 5).astype(x_type) - x2_np = np.random.randn(2, 3, 4, 5).astype(x_type) - out_np = corr(x1_np, - x2_np, - pad_size=4, - kernel_size=1, - max_displacement=4, - stride1=1, - stride2=1) - - out = correlation(x1, - x2, - pad_size=4, - kernel_size=1, - max_displacement=4, - stride1=1, - stride2=1) - - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - res = exe.run(feed={'x1': x1_np, 'x2': x2_np}, fetch_list=[out.name]) - - self.assertTrue(np.allclose(res[0], out_np)) - - -class Net(fluid.dygraph.Layer): - def __init__(self, name_scope): - super(Net, self).__init__(name_scope) - - def forward(self, x1, x2): - y = correlation(x1, - x2, - pad_size=4, - kernel_size=1, - max_displacement=4, - stride1=1, - stride2=1) - return y - - -class TestCorrelationOpDyGraph(unittest.TestCase): - def test_check_output(self): - np.random.seed(13) - np.set_printoptions(threshold=np.inf) - x_shape = (2, 10, 3, 3) - x_type = 'float32' - place = fluid.CUDAPlace(0) - with fluid.dygraph.guard(place): - x1_np = np.random.randn(2, 3, 4, 5).astype(x_type) - x2_np = np.random.randn(2, 3, 4, 5).astype(x_type) - out_np = corr(x1_np, - x2_np, - pad_size=4, - kernel_size=1, - max_displacement=4, - stride1=1, - stride2=1) - - x1 = to_variable(x1_np) - x2 = to_variable(x2_np) - corr_pd = Net('corr_pd') - y = corr_pd(x1, x2) - out = y.numpy() - self.assertTrue(np.allclose(out, out_np)) - - -if __name__ == '__main__': - unittest.main() diff --git a/applications/DAIN/pwcnet/pwcnet.py b/applications/DAIN/pwcnet/pwcnet.py deleted file mode 100644 index effdc623b4bf8527bf542ffec5ebb5d07d4a81b4..0000000000000000000000000000000000000000 --- a/applications/DAIN/pwcnet/pwcnet.py +++ /dev/null @@ -1,590 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np - -import paddle -import paddle.fluid as fluid -from paddle.fluid.dygraph import Conv2D, Conv2DTranspose -from paddle.fluid.contrib import correlation - -__all__ = ['pwc_dc_net'] - - -class PWCDCNet(fluid.dygraph.Layer): - def __init__(self, md=4): - super(PWCDCNet, self).__init__() - self.md = md - self.param_attr = fluid.ParamAttr( - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0004), - initializer=fluid.initializer.MSRAInitializer(uniform=True, - fan_in=None, - seed=0)) - self.conv1a = Conv2D(3, 16, 3, 2, 1, param_attr=self.param_attr) - self.conv1aa = Conv2D(16, 16, 3, 1, 1, param_attr=self.param_attr) - self.conv1b = Conv2D(16, 16, 3, 1, 1, param_attr=self.param_attr) - self.conv2a = Conv2D(16, 32, 3, 2, 1, param_attr=self.param_attr) - self.conv2aa = Conv2D(32, 32, 3, 1, 1, param_attr=self.param_attr) - self.conv2b = Conv2D(32, 32, 3, 1, 1, param_attr=self.param_attr) - self.conv3a = Conv2D(32, 64, 3, 2, 1, param_attr=self.param_attr) - self.conv3aa = Conv2D(64, 64, 3, 1, 1, param_attr=self.param_attr) - self.conv3b = Conv2D(64, 64, 3, 1, 1, param_attr=self.param_attr) - self.conv4a = Conv2D(64, 96, 3, 2, 1, param_attr=self.param_attr) - self.conv4aa = Conv2D(96, 96, 3, 1, 1, param_attr=self.param_attr) - self.conv4b = Conv2D(96, 96, 3, 1, 1, param_attr=self.param_attr) - self.conv5a = Conv2D(96, 128, 3, 2, 1, param_attr=self.param_attr) - self.conv5aa = Conv2D(128, 128, 3, 1, 1, param_attr=self.param_attr) - self.conv5b = Conv2D(128, 128, 3, 1, 1, param_attr=self.param_attr) - self.conv6aa = Conv2D(128, 196, 3, 2, 1, param_attr=self.param_attr) - self.conv6a = Conv2D(196, 196, 3, 1, 1, param_attr=self.param_attr) - self.conv6b = Conv2D(196, 196, 3, 1, 1, param_attr=self.param_attr) - - nd = (2 * self.md + 1)**2 - dd = np.cumsum([128, 128, 96, 64, 32], dtype=np.int32).astype(np.int) - dd = [int(d) for d in dd] - od = nd - self.conv6_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr) - self.conv6_1 = Conv2D(od + dd[0], - 128, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv6_2 = Conv2D(od + dd[1], - 96, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv6_3 = Conv2D(od + dd[2], - 64, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv6_4 = Conv2D(od + dd[3], - 32, - 3, - 1, - 1, - param_attr=self.param_attr) - self.predict_flow6 = Conv2D(od + dd[4], - 2, - 3, - 1, - 1, - param_attr=self.param_attr) - self.deconv6 = Conv2DTranspose(2, - 2, - 4, - stride=2, - padding=1, - param_attr=self.param_attr) - self.upfeat6 = Conv2DTranspose(od + dd[4], - 2, - 4, - stride=2, - padding=1, - param_attr=self.param_attr) - - od = nd + 128 + 4 - self.conv5_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr) - self.conv5_1 = Conv2D(od + dd[0], - 128, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv5_2 = Conv2D(od + dd[1], - 96, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv5_3 = Conv2D(od + dd[2], - 64, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv5_4 = Conv2D(od + dd[3], - 32, - 3, - 1, - 1, - param_attr=self.param_attr) - self.predict_flow5 = Conv2D(od + dd[4], - 2, - 3, - 1, - 1, - param_attr=self.param_attr) - self.deconv5 = Conv2DTranspose(2, - 2, - 4, - stride=2, - padding=1, - param_attr=self.param_attr) - self.upfeat5 = Conv2DTranspose(od + dd[4], - 2, - 4, - stride=2, - padding=1, - param_attr=self.param_attr) - - od = nd + 96 + 4 - self.conv4_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr) - self.conv4_1 = Conv2D(od + dd[0], - 128, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv4_2 = Conv2D(od + dd[1], - 96, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv4_3 = Conv2D(od + dd[2], - 64, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv4_4 = Conv2D(od + dd[3], - 32, - 3, - 1, - 1, - param_attr=self.param_attr) - self.predict_flow4 = Conv2D(od + dd[4], - 2, - 3, - 1, - 1, - param_attr=self.param_attr) - self.deconv4 = Conv2DTranspose(2, - 2, - 4, - stride=2, - padding=1, - param_attr=self.param_attr) - self.upfeat4 = Conv2DTranspose(od + dd[4], - 2, - 4, - stride=2, - padding=1, - param_attr=self.param_attr) - - od = nd + 64 + 4 - self.conv3_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr) - self.conv3_1 = Conv2D(od + dd[0], - 128, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv3_2 = Conv2D(od + dd[1], - 96, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv3_3 = Conv2D(od + dd[2], - 64, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv3_4 = Conv2D(od + dd[3], - 32, - 3, - 1, - 1, - param_attr=self.param_attr) - self.predict_flow3 = Conv2D(od + dd[4], - 2, - 3, - 1, - 1, - param_attr=self.param_attr) - self.deconv3 = Conv2DTranspose(2, - 2, - 4, - stride=2, - padding=1, - param_attr=self.param_attr) - self.upfeat3 = Conv2DTranspose(od + dd[4], - 2, - 4, - stride=2, - padding=1, - param_attr=self.param_attr) - - od = nd + 32 + 4 - self.conv2_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr) - self.conv2_1 = Conv2D(od + dd[0], - 128, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv2_2 = Conv2D(od + dd[1], - 96, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv2_3 = Conv2D(od + dd[2], - 64, - 3, - 1, - 1, - param_attr=self.param_attr) - self.conv2_4 = Conv2D(od + dd[3], - 32, - 3, - 1, - 1, - param_attr=self.param_attr) - self.predict_flow2 = Conv2D(od + dd[4], - 2, - 3, - 1, - 1, - param_attr=self.param_attr) - # self.deconv2 = Conv2DTranspose(2, 2, 4, stride=2, padding=1, param_attr=self.param_attr) - - self.dc_conv1 = Conv2D(od + dd[4], - 128, - 3, - 1, - 1, - dilation=1, - param_attr=self.param_attr) - self.dc_conv2 = Conv2D(128, - 128, - 3, - 1, - 2, - dilation=2, - param_attr=self.param_attr) - self.dc_conv3 = Conv2D(128, - 128, - 3, - 1, - 4, - dilation=4, - param_attr=self.param_attr) - self.dc_conv4 = Conv2D(128, - 96, - 3, - 1, - 8, - dilation=8, - param_attr=self.param_attr) - self.dc_conv5 = Conv2D(96, - 64, - 3, - 1, - 16, - dilation=16, - param_attr=self.param_attr) - self.dc_conv6 = Conv2D(64, - 32, - 3, - 1, - 1, - dilation=1, - param_attr=self.param_attr) - self.dc_conv7 = Conv2D(32, 2, 3, 1, 1, param_attr=self.param_attr) - - def warp(self, x, flo): - """ - warp an image/tensor (im2) back to im1, according to the optical flow - x: [B, C, H, W] (im2) - flo: [B, 2, H, W] flow - """ - x_shape = fluid.layers.shape(x) - B, H, W = x_shape[0], x_shape[2], x_shape[3] - bb = fluid.layers.range(0, B, 1, 'float32') - xx = fluid.layers.range(0, W, 1, 'float32') - yy = fluid.layers.range(0, H, 1, 'float32') - _, yy, xx = paddle.tensor.meshgrid(bb, yy, xx) - yy = fluid.layers.unsqueeze(yy, [1]) - xx = fluid.layers.unsqueeze(xx, [1]) - grid = fluid.layers.concat(input=[xx, yy], axis=1) - flo = flo - vgrid = fluid.layers.elementwise_add(grid, flo) - - vgrid_0 = 2.0 * fluid.layers.slice( - vgrid, axes=[1], starts=[0], ends=[1]) / (W - 1.) - 1.0 - vgrid_1 = 2.0 * fluid.layers.slice( - vgrid, axes=[1], starts=[1], ends=[2]) / (H - 1.) - 1.0 - - vgrid = fluid.layers.concat(input=[vgrid_0, vgrid_1], axis=1) - vgrid = fluid.layers.transpose(vgrid, [0, 2, 3, 1]) - output = fluid.layers.grid_sampler(name='grid_sample', x=x, grid=vgrid) - - mask = fluid.layers.zeros_like(x) - mask = mask + 1.0 - mask = fluid.layers.grid_sampler(name='grid_sample', x=mask, grid=vgrid) - mask_temp1 = fluid.layers.cast(mask < 0.9990, 'float32') - mask = mask * (1 - mask_temp1) - mask = fluid.layers.cast(mask > 0, 'float32') - outwarp = fluid.layers.elementwise_mul(output, mask) - - return outwarp - - def warp_nomask(self, x, flo): - """ - warp an image/tensor (im2) back to im1, according to the optical flow - x: [B, C, H, W] (im2) - flo: [B, 2, H, W] flow - """ - - B, C, H, W = x.shape - # mesh grid - # xx = fluid.layers.range(0, W, 1, 'float32') - # xx = fluid.layers.reshape(xx, shape=[1, -1]) - # xx = fluid.layers.expand(x=xx, expand_times=[H, 1]) - # xx = fluid.layers.reshape(xx, shape=[1, 1, H, W]) - # xx = fluid.layers.expand(x=xx, expand_times=[B, 1, 1, 1]) - # - # yy = fluid.layers.range(0, H, 1, 'float32') - # yy = fluid.layers.reshape(yy, shape=[-1, 1]) - # yy = fluid.layers.expand(x=yy, expand_times=[1, W]) - # yy = fluid.layers.reshape(x=yy, shape=[1, 1, H, W]) - # yy = fluid.layers.expand(x=yy, expand_times=[B, 1, 1, 1]) - - x_shape = fluid.layers.shape(x) - B, H, W = x_shape[0], x_shape[2], x_shape[3] - bb = fluid.layers.range(0, B, 1, 'float32') - xx = fluid.layers.range(0, W, 1, 'float32') - # xx = fluid.layers.reshape(xx, shape=[1, -1]) - yy = fluid.layers.range(0, H, 1, 'float32') - # yy = fluid.layers.reshape(yy, shape=[1, -1]) - _, yy, xx = paddle.tensor.meshgrid(bb, yy, xx) - yy = fluid.layers.unsqueeze(yy, [1]) - xx = fluid.layers.unsqueeze(xx, [1]) - - grid = fluid.layers.concat(input=[xx, yy], axis=1) - flo = flo - vgrid = fluid.layers.elementwise_add(grid, flo) - #vgrid_0 = 2.0 * fluid.layers.slice(vgrid, axes=[1], starts=[0], ends=[1]) / max(W - 1, 1) - 1.0 - #vgrid_1 = 2.0 * fluid.layers.slice(vgrid, axes=[1], starts=[1], ends=[2]) / max(H - 1, 1) - 1.0 - vgrid_0 = 2.0 * fluid.layers.slice( - vgrid, axes=[1], starts=[0], ends=[1]) / (W - 1.) - 1.0 - vgrid_1 = 2.0 * fluid.layers.slice( - vgrid, axes=[1], starts=[1], ends=[2]) / (H - 1.) - 1.0 - vgrid = fluid.layers.concat(input=[vgrid_0, vgrid_1], axis=1) - vgrid = fluid.layers.transpose(vgrid, [0, 2, 3, 1]) - output = fluid.layers.grid_sampler(name='grid_sample', x=x, grid=vgrid) - - return output - - def corr(self, x_1, x_2): - out = correlation(x_1, - x_2, - pad_size=self.md, - kernel_size=1, - max_displacement=self.md, - stride1=1, - stride2=1, - corr_type_multiply=1) - return out - - def forward(self, x, output_more=False): - im1 = fluid.layers.slice(x, axes=[1], starts=[0], ends=[3]) - im2 = fluid.layers.slice(x, axes=[1], starts=[3], ends=[6]) - # print("\n\n********************PWC Net details *************** \n\n") - c11 = fluid.layers.leaky_relu(self.conv1a(im1), 0.1) - c11 = fluid.layers.leaky_relu(self.conv1aa(c11), 0.1) - c11 = fluid.layers.leaky_relu(self.conv1b(c11), 0.1) - - c21 = fluid.layers.leaky_relu(self.conv1a(im2), 0.1) - c21 = fluid.layers.leaky_relu(self.conv1aa(c21), 0.1) - c21 = fluid.layers.leaky_relu(self.conv1b(c21), 0.1) - c12 = fluid.layers.leaky_relu(self.conv2a(c11), 0.1) - c12 = fluid.layers.leaky_relu(self.conv2aa(c12), 0.1) - c12 = fluid.layers.leaky_relu(self.conv2b(c12), 0.1) - - c22 = fluid.layers.leaky_relu(self.conv2a(c21), 0.1) - c22 = fluid.layers.leaky_relu(self.conv2aa(c22), 0.1) - c22 = fluid.layers.leaky_relu(self.conv2b(c22), 0.1) - - c13 = fluid.layers.leaky_relu(self.conv3a(c12), 0.1) - c13 = fluid.layers.leaky_relu(self.conv3aa(c13), 0.1) - c13 = fluid.layers.leaky_relu(self.conv3b(c13), 0.1) - - c23 = fluid.layers.leaky_relu(self.conv3a(c22), 0.1) - c23 = fluid.layers.leaky_relu(self.conv3aa(c23), 0.1) - c23 = fluid.layers.leaky_relu(self.conv3b(c23), 0.1) - - c14 = fluid.layers.leaky_relu(self.conv4a(c13), 0.1) - c14 = fluid.layers.leaky_relu(self.conv4aa(c14), 0.1) - c14 = fluid.layers.leaky_relu(self.conv4b(c14), 0.1) - - c24 = fluid.layers.leaky_relu(self.conv4a(c23), 0.1) - c24 = fluid.layers.leaky_relu(self.conv4aa(c24), 0.1) - c24 = fluid.layers.leaky_relu(self.conv4b(c24), 0.1) - - c15 = fluid.layers.leaky_relu(self.conv5a(c14), 0.1) - c15 = fluid.layers.leaky_relu(self.conv5aa(c15), 0.1) - c15 = fluid.layers.leaky_relu(self.conv5b(c15), 0.1) - - c25 = fluid.layers.leaky_relu(self.conv5a(c24), 0.1) - c25 = fluid.layers.leaky_relu(self.conv5aa(c25), 0.1) - c25 = fluid.layers.leaky_relu(self.conv5b(c25), 0.1) - - c16 = fluid.layers.leaky_relu(self.conv6aa(c15), 0.1) - c16 = fluid.layers.leaky_relu(self.conv6a(c16), 0.1) - c16 = fluid.layers.leaky_relu(self.conv6b(c16), 0.1) - - c26 = fluid.layers.leaky_relu(self.conv6aa(c25), 0.1) - c26 = fluid.layers.leaky_relu(self.conv6a(c26), 0.1) - c26 = fluid.layers.leaky_relu(self.conv6b(c26), 0.1) - - corr6 = self.corr(c16, c26) - corr6 = fluid.layers.leaky_relu(corr6, alpha=0.1) - - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv6_0(corr6), 0.1), corr6], - axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv6_1(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv6_2(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv6_3(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv6_4(x), 0.1), x], axis=1) - - flow6 = self.predict_flow6(x) - up_flow6 = self.deconv6(flow6) - up_feat6 = self.upfeat6(x) - - warp5 = self.warp(c25, up_flow6 * 0.625) - corr5 = self.corr(c15, warp5) - corr5 = fluid.layers.leaky_relu(corr5, alpha=0.1) - - x = fluid.layers.concat(input=[corr5, c15, up_flow6, up_feat6], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv5_0(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv5_1(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv5_2(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv5_3(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv5_4(x), 0.1), x], axis=1) - - flow5 = self.predict_flow5(x) - up_flow5 = self.deconv5(flow5) - up_feat5 = self.upfeat5(x) - - warp4 = self.warp(c24, up_flow5 * 1.25) - corr4 = self.corr(c14, warp4) - corr4 = fluid.layers.leaky_relu(corr4, alpha=0.1) - - x = fluid.layers.concat(input=[corr4, c14, up_flow5, up_feat5], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv4_0(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv4_1(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv4_2(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv4_3(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv4_4(x), 0.1), x], axis=1) - - flow4 = self.predict_flow4(x) - up_flow4 = self.deconv4(flow4) - up_feat4 = self.upfeat4(x) - - warp3 = self.warp(c23, up_flow4 * 2.5) - corr3 = self.corr(c13, warp3) - corr3 = fluid.layers.leaky_relu(corr3, alpha=0.1) - - x = fluid.layers.concat(input=[corr3, c13, up_flow4, up_feat4], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv3_0(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv3_1(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv3_2(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv3_3(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv3_4(x), 0.1), x], axis=1) - - flow3 = self.predict_flow3(x) - up_flow3 = self.deconv3(flow3) - up_feat3 = self.upfeat3(x) - - warp2 = self.warp(c22, up_flow3 * 5.0) - corr2 = self.corr(c12, warp2) - corr2 = fluid.layers.leaky_relu(corr2, alpha=0.1) - - x = fluid.layers.concat(input=[corr2, c12, up_flow3, up_feat3], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv2_0(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv2_1(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv2_2(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv2_3(x), 0.1), x], axis=1) - x = fluid.layers.concat( - input=[fluid.layers.leaky_relu(self.conv2_4(x), 0.1), x], axis=1) - - flow2 = self.predict_flow2(x) - - x = fluid.layers.leaky_relu( - self.dc_conv4( - fluid.layers.leaky_relu( - self.dc_conv3( - fluid.layers.leaky_relu( - self.dc_conv2( - fluid.layers.leaky_relu(self.dc_conv1(x), 0.1)), - 0.1)), 0.1)), 0.1) - flow2 += self.dc_conv7( - fluid.layers.leaky_relu( - self.dc_conv6(fluid.layers.leaky_relu(self.dc_conv5(x), 0.1)), - 0.1)) - - if not output_more: - return flow2 - else: - return [flow2, flow3, flow4, flow5, flow6] - - -def pwc_dc_net(path=None): - model = PWCDCNet() - if path is not None: - import pickle - data = pickle.load(open(path, 'rb')) - weight_list = [] - for k, v in data.items(): - weight_list.append(v) - param_dict = {} - for i, param in enumerate(model.parameters()): - param_dict[param.name] = weight_list[i] - model.load_dict(param_dict) - - return model diff --git a/applications/DAIN/resblock/__init__.py b/applications/DAIN/resblock/__init__.py deleted file mode 100644 index f11f7b5473aa4b9d28df5f21802885449296f20d..0000000000000000000000000000000000000000 --- a/applications/DAIN/resblock/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .basicblock import * diff --git a/applications/DAIN/resblock/basicblock.py b/applications/DAIN/resblock/basicblock.py deleted file mode 100644 index a8333a41f406377fadabb3fe36ab3b4e8e3a8dc5..0000000000000000000000000000000000000000 --- a/applications/DAIN/resblock/basicblock.py +++ /dev/null @@ -1,94 +0,0 @@ -import paddle.fluid as fluid -from paddle.fluid.dygraph import Conv2D - -__all__ = ['MultipleBasicBlock', 'MultipleBasicBlock_4'] - - -def conv3x3(in_planes, out_planes, dilation=1, stride=1, param_attr=None): - return Conv2D(in_planes, - out_planes, - filter_size=3, - stride=stride, - padding=int(dilation * (3 - 1) / 2), - dilation=dilation, - bias_attr=False, - param_attr=param_attr) - - -class BasicBlock(fluid.dygraph.Layer): - expansion = 1 - - def __init__(self, inplanes, planes, dilation=1, stride=1, downsample=None): - super(BasicBlock, self).__init__() - - param_attr = fluid.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=1.0, seed=0)) - - self.conv1 = conv3x3(inplanes, planes, dilation, stride, param_attr) - self.conv2 = conv3x3(planes, planes, param_attr=param_attr) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - # out = self.bn1(out) - out = fluid.layers.relu(out) - - out = self.conv2(out) - # out = self.bn2(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = fluid.layers.relu(out) - - return out - - -class MultipleBasicBlock(fluid.dygraph.Layer): - def __init__(self, - input_feature, - block, - num_blocks, - intermediate_feature=64, - dense=True): - super(MultipleBasicBlock, self).__init__() - self.dense = dense - self.num_block = num_blocks - self.intermediate_feature = intermediate_feature - - param_attr = fluid.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=1.0, seed=0)) - - self.block1 = Conv2D(input_feature, - intermediate_feature, - filter_size=7, - stride=1, - padding=3, - bias_attr=True, - param_attr=param_attr) - - dim = intermediate_feature - self.block2 = block(dim, dim, dilation=1) if num_blocks >= 2 else None - self.block3 = block(dim, dim, dilation=1) if num_blocks >= 3 else None - self.block4 = block(dim, dim, dilation=1) if num_blocks >= 4 else None - self.block5 = Conv2D(dim, 3, 3, 1, 1) - - def forward(self, x): - x = fluid.layers.relu(self.block1(x)) - x = self.block2(x) if self.num_block >= 2 else x - x = self.block3(x) if self.num_block >= 3 else x - x = self.block4(x) if self.num_block >= 4 else x - x = self.block5(x) - return x - - -def MultipleBasicBlock_4(input_feature, intermediate_feature=64): - model = MultipleBasicBlock(input_feature, BasicBlock, 4, - intermediate_feature) - return model diff --git a/applications/DAIN/run.sh b/applications/DAIN/run.sh deleted file mode 100644 index fcf2c7653606f3b0815e57b0b1382124669b91a4..0000000000000000000000000000000000000000 --- a/applications/DAIN/run.sh +++ /dev/null @@ -1,15 +0,0 @@ -cd pwcnet/correlation_op -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python -c 'import paddle; print(paddle.sysconfig.get_lib())'` -export PYTHONPATH=$PYTHONPATH:`pwd` -cd ../../ - -VID_PATH=/paddle/work/github/DAIN/data/CBA.mp4 -OUT_PATH=output -MODEL_PATH=DAIN_paddle_weight - - -CUDA_VISIBLE_DEVICES=2 python predict.py \ - --time_step 0.125 \ - --video_path=$VID_PATH \ - --output_path=$OUT_PATH \ - --saved_model=$MODEL_PATH \ No newline at end of file diff --git a/applications/DAIN/util.py b/applications/DAIN/util.py deleted file mode 100644 index dc343ff20668f0b9db3a426a3789943ea4e28cb5..0000000000000000000000000000000000000000 --- a/applications/DAIN/util.py +++ /dev/null @@ -1,78 +0,0 @@ -import os, sys -import glob -import shutil -import cv2 - - -class AverageMeter(object): - """Computes and stores the average and current value""" - def __init__(self): - self.reset() - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - -def combine_frames(input, interpolated, combined, num_frames): - frames1 = sorted(glob.glob(os.path.join(input, '*.png'))) - frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png'))) - num1 = len(frames1) - num2 = len(frames2) - # assert (num1 - 1) * num_frames == num2 - for i in range(num1): - src = frames1[i] - imgname = int(src.split('/')[-1].split('.')[-2]) - assert i == imgname - dst = os.path.join(combined, '{:08d}.png'.format(i * (num_frames + 1))) - shutil.copy2(src, dst) - if i < num1 - 1: - try: - for k in range(num_frames): - src = frames2[i * num_frames + k] - dst = os.path.join( - combined, - '{:08d}.png'.format(i * (num_frames + 1) + k + 1)) - shutil.copy2(src, dst) - except Exception as e: - print(e) - print(len(frames2), num_frames, i, k, i * num_frames + k) - - -def remove_duplicates(paths): - def dhash(image, hash_size=8): - gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - resized = cv2.resize(gray, (hash_size + 1, hash_size)) - diff = resized[:, 1:] > resized[:, :-1] - return sum([2**i for (i, v) in enumerate(diff.flatten()) if v]) - - hashes = {} - image_paths = sorted(glob.glob(os.path.join(paths, '*.png'))) - for image_path in image_paths: - image = cv2.imread(image_path) - h = dhash(image) - p = hashes.get(h, []) - p.append(image_path) - hashes[h] = p - - for (h, hashed_paths) in hashes.items(): - if len(hashed_paths) > 1: - for p in hashed_paths[1:]: - os.remove(p) - - frames = sorted(glob.glob(os.path.join(paths, '*.png'))) - for fid, frame in enumerate(frames): - new_name = '{:08d}'.format(fid) + '.png' - new_name = os.path.join(paths, new_name) - os.rename(frame, new_name) - - frames = sorted(glob.glob(os.path.join(paths, '*.png'))) - return frames diff --git a/applications/DeepRemaster/utils.py b/applications/DeepRemaster/utils.py deleted file mode 100644 index 11ed9b0f3e3cf6d8387237986b3b3705f43d8cd2..0000000000000000000000000000000000000000 --- a/applications/DeepRemaster/utils.py +++ /dev/null @@ -1,35 +0,0 @@ -import paddle -from skimage import color -import numpy as np -from PIL import Image - -def convertLAB2RGB( lab ): - lab[:, :, 0:1] = lab[:, :, 0:1] * 100 # [0, 1] -> [0, 100] - lab[:, :, 1:3] = np.clip(lab[:, :, 1:3] * 255 - 128, -100, 100) # [0, 1] -> [-128, 128] - rgb = color.lab2rgb( lab.astype(np.float64) ) - return rgb - -def convertRGB2LABTensor( rgb ): - lab = color.rgb2lab( np.asarray( rgb ) ) # RGB -> LAB L[0, 100] a[-127, 128] b[-128, 127] - ab = np.clip(lab[:, :, 1:3] + 128, 0, 255) # AB --> [0, 255] - ab = paddle.to_tensor(ab.astype('float32')) / 255. - L = lab[:, :, 0] * 2.55 # L --> [0, 255] - L = Image.fromarray( np.uint8( L ) ) - - L = paddle.to_tensor(np.array(L).astype('float32')[..., np.newaxis] / 255.0) - return L, ab - -def addMergin(img, target_w, target_h, background_color=(0,0,0)): - width, height = img.size - if width==target_w and height==target_h: - return img - scale = max(target_w,target_h)/max(width, height) - width = int(width*scale/16.)*16 - height = int(height*scale/16.)*16 - - img = img.resize((width, height), Image.BICUBIC) - xp = (target_w-width)//2 - yp = (target_h-height)//2 - result = Image.new(img.mode, (target_w, target_h), background_color) - result.paste(img, (xp, yp)) - return result diff --git a/applications/EDVR/data.py b/applications/EDVR/data.py deleted file mode 100644 index ece62cf9fd8bfbee3640cc0bbfbc7567c15dae72..0000000000000000000000000000000000000000 --- a/applications/EDVR/data.py +++ /dev/null @@ -1,89 +0,0 @@ -import cv2 - -import numpy as np - - -def read_img(path, size=None, is_gt=False): - """read image by cv2 - return: Numpy float32, HWC, BGR, [0,1]""" - img = cv2.imread(path, cv2.IMREAD_UNCHANGED) - - img = img.astype(np.float32) / 255. - if img.ndim == 2: - img = np.expand_dims(img, axis=2) - - if img.shape[2] > 3: - img = img[:, :, :3] - return img - - -def get_test_neighbor_frames(crt_i, N, max_n, padding='new_info'): - """Generate an index list for reading N frames from a sequence of images - Args: - crt_i (int): current center index - max_n (int): max number of the sequence of images (calculated from 1) - N (int): reading N frames - padding (str): padding mode, one of replicate | reflection | new_info | circle - Example: crt_i = 0, N = 5 - replicate: [0, 0, 0, 1, 2] - reflection: [2, 1, 0, 1, 2] - new_info: [4, 3, 0, 1, 2] - circle: [3, 4, 0, 1, 2] - - Returns: - return_l (list [int]): a list of indexes - """ - max_n = max_n - 1 - n_pad = N // 2 - return_l = [] - - for i in range(crt_i - n_pad, crt_i + n_pad + 1): - if i < 0: - if padding == 'replicate': - add_idx = 0 - elif padding == 'reflection': - add_idx = -i - elif padding == 'new_info': - add_idx = (crt_i + n_pad) + (-i) - elif padding == 'circle': - add_idx = N + i - else: - raise ValueError('Wrong padding mode') - elif i > max_n: - if padding == 'replicate': - add_idx = max_n - elif padding == 'reflection': - add_idx = max_n * 2 - i - elif padding == 'new_info': - add_idx = (crt_i - n_pad) - (i - max_n) - elif padding == 'circle': - add_idx = i - N - else: - raise ValueError('Wrong padding mode') - else: - add_idx = i - return_l.append(add_idx) - # name_b = '{:08d}'.format(crt_i) - return return_l - - -class EDVRDataset: - def __init__(self, frame_paths): - self.frames = frame_paths - - def __getitem__(self, index): - indexs = get_test_neighbor_frames(index, 5, len(self.frames)) - frame_list = [] - for i in indexs: - img = read_img(self.frames[i]) - frame_list.append(img) - - img_LQs = np.stack(frame_list, axis=0) - # BGR to RGB, HWC to CHW, numpy to tensor - img_LQs = img_LQs[:, :, :, [2, 1, 0]] - img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32') - - return img_LQs, self.frames[index] - - def __len__(self): - return len(self.frames) diff --git a/applications/EDVR/predict.py b/applications/EDVR/predict.py deleted file mode 100644 index 5f95714cea667555d64ddfea83d75c8191b773b0..0000000000000000000000000000000000000000 --- a/applications/EDVR/predict.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import os -import sys - -cur_path = os.path.abspath(os.path.dirname(__file__)) -sys.path.append(cur_path) - -import time -import argparse -import ast -import glob -import numpy as np - -import paddle.fluid as fluid -import cv2 - -from tqdm import tqdm -from data import EDVRDataset -from paddle.utils.download import get_path_from_url -from ppgan.utils.video import frames2video, video2frames - -EDVR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar' - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument('--input', - type=str, - default=None, - help='input video path') - parser.add_argument('--output', - type=str, - default='output', - help='output path') - parser.add_argument('--weight_path', - type=str, - default=None, - help='weight path') - args = parser.parse_args() - return args - - -def get_img(pred): - pred = pred.squeeze() - pred = np.clip(pred, a_min=0., a_max=1.0) - pred = pred * 255 - pred = pred.round() - pred = pred.astype('uint8') - pred = np.transpose(pred, (1, 2, 0)) # chw -> hwc - pred = pred[:, :, ::-1] # rgb -> bgr - return pred - - -def save_img(img, framename): - dirname = os.path.dirname(framename) - if not os.path.exists(dirname): - os.makedirs(dirname) - - cv2.imwrite(framename, img) - - -class EDVRPredictor: - def __init__(self, input, output, weight_path=None): - self.input = input - self.output = os.path.join(output, 'EDVR') - - place = fluid.CUDAPlace( - 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() - self.exe = fluid.Executor(place) - - if weight_path is None: - weight_path = get_path_from_url(EDVR_WEIGHT_URL, cur_path) - - model_filename = 'EDVR_model.pdmodel' - params_filename = 'EDVR_params.pdparams' - - out = fluid.io.load_inference_model(dirname=weight_path, - model_filename=model_filename, - params_filename=params_filename, - executor=self.exe) - self.infer_prog, self.feed_list, self.fetch_list = out - - def run(self): - vid = self.input - base_name = os.path.basename(vid).split('.')[0] - output_path = os.path.join(self.output, base_name) - pred_frame_path = os.path.join(output_path, 'frames_pred') - - if not os.path.exists(output_path): - os.makedirs(output_path) - - if not os.path.exists(pred_frame_path): - os.makedirs(pred_frame_path) - - cap = cv2.VideoCapture(vid) - fps = cap.get(cv2.CAP_PROP_FPS) - - out_path = video2frames(vid, output_path) - - frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) - - dataset = EDVRDataset(frames) - - periods = [] - cur_time = time.time() - for infer_iter, data in enumerate(tqdm(dataset)): - data_feed_in = [data[0]] - - infer_outs = self.exe.run( - self.infer_prog, - fetch_list=self.fetch_list, - feed={self.feed_list[0]: np.array(data_feed_in)}) - infer_result_list = [item for item in infer_outs] - - frame_path = data[1] - - img_i = get_img(infer_result_list[0]) - save_img( - img_i, - os.path.join(pred_frame_path, os.path.basename(frame_path))) - - prev_time = cur_time - cur_time = time.time() - period = cur_time - prev_time - periods.append(period) - - # print('Processed {} samples'.format(infer_iter + 1)) - frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png') - vid_out_path = os.path.join(self.output, - '{}_edvr_out.mp4'.format(base_name)) - frames2video(frame_pattern_combined, vid_out_path, str(int(fps))) - - return frame_pattern_combined, vid_out_path - - -if __name__ == "__main__": - args = parse_args() - predictor = EDVRPredictor(args.input, args.output, args.weight_path) - predictor.run() diff --git a/applications/EDVR/run.sh b/applications/EDVR/run.sh deleted file mode 100644 index 271eefe33dd734c09cc7acd82c181e36ad3c33c9..0000000000000000000000000000000000000000 --- a/applications/EDVR/run.sh +++ /dev/null @@ -1,41 +0,0 @@ -# examples of running programs: -# bash ./run.sh inference EDVR ./configs/edvr_L.yaml -# bash ./run.sh predict EDvR ./cofings/edvr_L.yaml - -# configs should be ./configs/xxx.yaml - -mode=$1 -name=$2 -configs=$3 - -save_inference_dir="./data/inference_model" -use_gpu=True -fix_random_seed=False -log_interval=1 -valid_interval=1 - -weights="./weights/paddle_state_dict_L.npz" - - -export CUDA_VISIBLE_DEVICES=6 #0,1,5,6 fast, 2,3,4,7 slow -# export FLAGS_fast_eager_deletion_mode=1 -# export FLAGS_eager_delete_tensor_gb=0.0 -# export FLAGS_fraction_of_gpu_memory_to_use=0.98 - -if [ "$mode"x == "predict"x ]; then - echo $mode $name $configs $weights - if [ "$weights"x != ""x ]; then - python predict.py --model_name=$name \ - --config=$configs \ - --log_interval=$log_interval \ - --video_path='' \ - --use_gpu=$use_gpu - else - python predict.py --model_name=$name \ - --config=$configs \ - --log_interval=$log_interval \ - --use_gpu=$use_gpu \ - --video_path='' - fi -fi - diff --git a/applications/first_order_model/configs/vox-256.yaml b/applications/first_order_model/configs/vox-256.yaml deleted file mode 100644 index abfe9a23949aea62f9b4b7772d3e987f253ecd62..0000000000000000000000000000000000000000 --- a/applications/first_order_model/configs/vox-256.yaml +++ /dev/null @@ -1,83 +0,0 @@ -dataset_params: - root_dir: data/vox-png - frame_shape: [256, 256, 3] - id_sampling: True - pairs_list: data/vox256.csv - augmentation_params: - flip_param: - horizontal_flip: True - time_flip: True - jitter_param: - brightness: 0.1 - contrast: 0.1 - saturation: 0.1 - hue: 0.1 - - -model_params: - common_params: - num_kp: 10 - num_channels: 3 - estimate_jacobian: True - kp_detector_params: - temperature: 0.1 - block_expansion: 32 - max_features: 1024 - scale_factor: 0.25 - num_blocks: 5 - generator_params: - block_expansion: 64 - max_features: 512 - num_down_blocks: 2 - num_bottleneck_blocks: 6 - estimate_occlusion_map: True - dense_motion_params: - block_expansion: 64 - max_features: 1024 - num_blocks: 5 - scale_factor: 0.25 - discriminator_params: - scales: [1] - block_expansion: 32 - max_features: 512 - num_blocks: 4 - sn: True - -train_params: - num_epochs: 100 - num_repeats: 75 - epoch_milestones: [60, 90] - lr_generator: 2.0e-4 - lr_discriminator: 2.0e-4 - lr_kp_detector: 2.0e-4 - batch_size: 40 - scales: [1, 0.5, 0.25, 0.125] - checkpoint_freq: 50 - transform_params: - sigma_affine: 0.05 - sigma_tps: 0.005 - points_tps: 5 - loss_weights: - generator_gan: 0 - discriminator_gan: 1 - feature_matching: [10, 10, 10, 10] - perceptual: [10, 10, 10, 10, 10] - equivariance_value: 10 - equivariance_jacobian: 10 - -reconstruction_params: - num_videos: 1000 - format: '.mp4' - -animate_params: - num_pairs: 50 - format: '.mp4' - normalization_params: - adapt_movement_scale: False - use_relative_movement: True - use_relative_jacobian: True - -visualizer_params: - kp_size: 5 - draw_border: True - colormap: 'gist_rainbow' diff --git a/applications/run.sh b/applications/run.sh deleted file mode 100644 index 8dcc8192c0e6b6698b052ccb6cd4abfbd106f4a9..0000000000000000000000000000000000000000 --- a/applications/run.sh +++ /dev/null @@ -1,9 +0,0 @@ -# 模型说明 -# 目前包含DAIN(插帧模型),DeOldify(上色模型),DeepRemaster(去噪与上色模型),EDVR(基于连续帧(视频)超分辨率模型),RealSR(基于图片的超分辨率模型) -# 参数说明 -# input 输入视频的路径 -# output 输出视频保存的路径 -# proccess_order 要使用的模型及顺序 - -python tools/video-enhance.py \ ---input input.mp4 --output output --proccess_order DeOldify RealSR diff --git a/applications/tools/first-order-demo.py b/applications/tools/first-order-demo.py index 40e6c1a35549aaeaf1181d23c47c3f968fe94f21..624aea24d46ce9dc917e1461d343130da5f6dc9b 100644 --- a/applications/tools/first-order-demo.py +++ b/applications/tools/first-order-demo.py @@ -1,217 +1,69 @@ -import matplotlib -matplotlib.use('Agg') -import os -import sys +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import argparse -import yaml -import pickle -from argparse import ArgumentParser -from tqdm import tqdm - -import imageio -import numpy as np -from skimage.transform import resize -from skimage import img_as_ubyte import paddle - -from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator -from ppgan.modules.keypoint_detector import KPDetector -from ppgan.utils.animate import normalize_kp -from scipy.spatial import ConvexHull - -paddle.disable_static() - -if sys.version_info[0] < 3: - raise Exception( - "You must use Python 3 or higher. Recommended version is Python 3.7") - - -def load_checkpoints(config_path, checkpoint_path, cpu=False): - - with open(config_path) as f: - config = yaml.load(f) - - generator = OcclusionAwareGenerator( - **config['model_params']['generator_params'], - **config['model_params']['common_params']) - - kp_detector = KPDetector(**config['model_params']['kp_detector_params'], - **config['model_params']['common_params']) - - checkpoint = pickle.load(open(checkpoint_path, 'rb')) - generator.set_state_dict(checkpoint['generator']) - - kp_detector.set_state_dict(checkpoint['kp_detector']) - - generator.eval() - kp_detector.eval() - - return generator, kp_detector - - -def make_animation(source_image, - driving_video, - generator, - kp_detector, - relative=True, - adapt_movement_scale=True, - cpu=False): - with paddle.no_grad(): - predictions = [] - source = paddle.to_tensor(source_image[np.newaxis].astype( - np.float32)).transpose([0, 3, 1, 2]) - # if not cpu: - # source = source.cuda() - driving = paddle.to_tensor( - np.array(driving_video)[np.newaxis].astype(np.float32)).transpose( - [0, 4, 1, 2, 3]) - kp_source = kp_detector(source) - kp_driving_initial = kp_detector(driving[:, :, 0]) - - for frame_idx in tqdm(range(driving.shape[2])): - driving_frame = driving[:, :, frame_idx] - kp_driving = kp_detector(driving_frame) - kp_norm = normalize_kp(kp_source=kp_source, - kp_driving=kp_driving, - kp_driving_initial=kp_driving_initial, - use_relative_movement=relative, - use_relative_jacobian=relative, - adapt_movement_scale=adapt_movement_scale) - out = generator(source, kp_source=kp_source, kp_driving=kp_norm) - - predictions.append( - np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0]) - return predictions - - -def find_best_frame(source, driving, cpu=False): - import face_alignment - - def normalize_kp(kp): - kp = kp - kp.mean(axis=0, keepdims=True) - area = ConvexHull(kp[:, :2]).volume - area = np.sqrt(area) - kp[:, :2] = kp[:, :2] / area - return kp - - fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, - flip_input=True, - device='cpu' if cpu else 'cuda') - kp_source = fa.get_landmarks(255 * source)[0] - kp_source = normalize_kp(kp_source) - norm = float('inf') - frame_num = 0 - for i, image in tqdm(enumerate(driving)): - kp_driving = fa.get_landmarks(255 * image)[0] - kp_driving = normalize_kp(kp_driving) - new_norm = (np.abs(kp_source - kp_driving)**2).sum() - if new_norm < norm: - norm = new_norm - frame_num = i - return frame_num - +from ppgan.apps.first_order_predictor import FirstOrderPredictor + +parser = argparse.ArgumentParser() +parser.add_argument("--config", default=None, help="path to config") +parser.add_argument("--weight_path", + default=None, + help="path to checkpoint to restore") +parser.add_argument("--source_image", type=str, help="path to source image") +parser.add_argument("--driving_video", type=str, help="path to driving video") +parser.add_argument("--output", default='output', help="path to output") +parser.add_argument("--relative", + dest="relative", + action="store_true", + help="use relative or absolute keypoint coordinates") +parser.add_argument( + "--adapt_scale", + dest="adapt_scale", + action="store_true", + help="adapt movement scale based on convex hull of keypoints") + +parser.add_argument( + "--find_best_frame", + dest="find_best_frame", + action="store_true", + help= + "Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)" +) + +parser.add_argument("--best_frame", + dest="best_frame", + type=int, + default=None, + help="Set frame to start from.") +parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.") + +parser.set_defaults(relative=False) +parser.set_defaults(adapt_scale=False) if __name__ == "__main__": - parser = ArgumentParser() - parser.add_argument("--config", required=True, help="path to config") - parser.add_argument("--checkpoint", - default='vox-cpk.pth.tar', - help="path to checkpoint to restore") - - parser.add_argument("--source_image", - default='sup-mat/source.png', - help="path to source image") - parser.add_argument("--driving_video", - default='sup-mat/source.png', - help="path to driving video") - parser.add_argument("--result_video", - default='result.mp4', - help="path to output") - - parser.add_argument("--relative", - dest="relative", - action="store_true", - help="use relative or absolute keypoint coordinates") - parser.add_argument( - "--adapt_scale", - dest="adapt_scale", - action="store_true", - help="adapt movement scale based on convex hull of keypoints") - - parser.add_argument( - "--find_best_frame", - dest="find_best_frame", - action="store_true", - help= - "Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)" - ) - - parser.add_argument("--best_frame", - dest="best_frame", - type=int, - default=None, - help="Set frame to start from.") - - parser.add_argument("--cpu", - dest="cpu", - action="store_true", - help="cpu mode.") - - parser.set_defaults(relative=False) - parser.set_defaults(adapt_scale=False) - - opt = parser.parse_args() - - source_image = imageio.imread(opt.source_image) - reader = imageio.get_reader(opt.driving_video) - fps = reader.get_meta_data()['fps'] - driving_video = [] - try: - for im in reader: - driving_video.append(im) - except RuntimeError: - pass - reader.close() - - source_image = resize(source_image, (256, 256))[..., :3] - driving_video = [ - resize(frame, (256, 256))[..., :3] for frame in driving_video - ] - generator, kp_detector = load_checkpoints(config_path=opt.config, - checkpoint_path=opt.checkpoint, - cpu=opt.cpu) - - if opt.find_best_frame or opt.best_frame is not None: - i = opt.best_frame if opt.best_frame is not None else find_best_frame( - source_image, driving_video, cpu=opt.cpu) - print("Best frame: " + str(i)) - driving_forward = driving_video[i:] - driving_backward = driving_video[:(i + 1)][::-1] - predictions_forward = make_animation( - source_image, - driving_forward, - generator, - kp_detector, - relative=opt.relative, - adapt_movement_scale=opt.adapt_scale, - cpu=opt.cpu) - predictions_backward = make_animation( - source_image, - driving_backward, - generator, - kp_detector, - relative=opt.relative, - adapt_movement_scale=opt.adapt_scale, - cpu=opt.cpu) - predictions = predictions_backward[::-1] + predictions_forward[1:] - else: - predictions = make_animation(source_image, - driving_video, - generator, - kp_detector, - relative=opt.relative, - adapt_movement_scale=opt.adapt_scale, - cpu=opt.cpu) - imageio.mimsave(opt.result_video, - [img_as_ubyte(frame) for frame in predictions], - fps=fps) + args = parser.parse_args() + + if args.cpu: + paddle.set_device('cpu') + + predictor = FirstOrderPredictor(output=args.output, + weight_path=args.weight_path, + config=args.config, + relative=args.relative, + adapt_scale=args.adapt_scale, + find_best_frame=args.find_best_frame, + best_frame=args.best_frame) + predictor.run(args.source_image, args.driving_video) diff --git a/applications/tools/ps_demo.py b/applications/tools/ps_demo.py new file mode 100644 index 0000000000000000000000000000000000000000..f8acb7d3c6b66beeb7d4a6914cf7161f4a5f2f9b --- /dev/null +++ b/applications/tools/ps_demo.py @@ -0,0 +1,211 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import argparse +from pathlib import Path + +from PIL import Image +from fire import Fire +import numpy as np + +import paddle +import paddle.vision.transforms as T +import ppgan.faceutils as futils +from ppgan.utils.options import parse_args +from ppgan.utils.config import get_config +from ppgan.utils.setup import setup +from ppgan.utils.filesystem import load +from ppgan.engine.trainer import Trainer +from ppgan.models.builder import build_model +from ppgan.utils.preprocess import * + + +def toImage(net_output): + img = net_output.squeeze(0).transpose( + (1, 2, 0)).numpy() # [1,c,h,w]->[h,w,c] + img = (img * 255.0).clip(0, 255) + img = np.uint8(img) + img = Image.fromarray(img, mode='RGB') + return img + + +def mask2image(mask: np.array, format="HWC"): + H, W = mask.shape + + canvas = np.zeros((H, W, 3), dtype=np.uint8) + for i in range(int(mask.max())): + color = np.random.rand(1, 1, 3) * 255 + canvas += (mask == i)[:, :, None] * color.astype(np.uint8) + return canvas + + +class PreProcess: + def __init__(self, config, need_parser=True): + self.img_size = 256 + self.transform = transform = T.Compose([ + T.Resize(size=256), + T.Permute(to_rgb=False), + ]) + self.norm = T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5]) + if need_parser: + self.face_parser = futils.mask.FaceParser() + self.up_ratio = 0.6 / 0.85 + self.down_ratio = 0.2 / 0.85 + self.width_ratio = 0.2 / 0.85 + + def __call__(self, image): + face = futils.dlib.detect(image) + + if not face: + return + face_on_image = face[0] + image, face, crop_face = futils.dlib.crop(image, face_on_image, + self.up_ratio, + self.down_ratio, + self.width_ratio) + np_image = np.array(image) + mask = self.face_parser.parse( + np.float32(cv2.resize(np_image, (512, 512)))) + mask = cv2.resize(mask.numpy(), (self.img_size, self.img_size), + interpolation=cv2.INTER_NEAREST) + mask = mask.astype(np.uint8) + mask_color = mask2image(mask) + cv2.imwrite('mask_temp.png', mask_color) + mask_tensor = paddle.to_tensor(mask) + + lms = futils.dlib.landmarks(image, face) * self.img_size / image.width + lms = lms.round() + + P_np = generate_P_from_lmks(lms, self.img_size, self.img_size, + self.img_size) + + mask_aug = generate_mask_aug(mask, lms) + + image = self.transform(np_image) + + return [ + self.norm(image), + np.float32(mask_aug), + np.float32(P_np), + np.float32(mask) + ], face_on_image, crop_face + + +class PostProcess: + def __init__(self, config): + self.denoise = True + self.img_size = 256 + + def __call__(self, source: Image, result: Image): + # TODO: Refract -> name, resize + source = np.array(source) + result = np.array(result) + + height, width = source.shape[:2] + small_source = cv2.resize(source, (self.img_size, self.img_size)) + laplacian_diff = source.astype(np.float) - cv2.resize( + small_source, (width, height)).astype(np.float) + result = (cv2.resize(result, + (width, height)) + laplacian_diff).round().clip( + 0, 255).astype(np.uint8) + if self.denoise: + result = cv2.fastNlMeansDenoisingColored(result) + result = Image.fromarray(result).convert('RGB') + return result + + +class Inference: + def __init__(self, config, model_path=''): + self.model = build_model(config) + self.preprocess = PreProcess(config) + self.model_path = model_path + + def transfer(self, source, reference, with_face=False): + source_input, face, crop_face = self.preprocess(source) + reference_input, face, crop_face = self.preprocess(reference) + + consis_mask = np.float32( + calculate_consis_mask(source_input[1], reference_input[1])) + consis_mask = paddle.to_tensor(np.expand_dims(consis_mask, 0)) + + if not (source_input and reference_input): + if with_face: + return None, None + return + for i in range(len(source_input) - 1): + source_input[i] = paddle.to_tensor( + np.expand_dims(source_input[i], 0)) + + for i in range(len(reference_input) - 1): + reference_input[i] = paddle.to_tensor( + np.expand_dims(reference_input[i], 0)) + + input_data = { + 'image_A': source_input[0], + 'image_B': reference_input[0], + 'mask_A_aug': source_input[1], + 'mask_B_aug': reference_input[1], + 'P_A': source_input[2], + 'P_B': reference_input[2], + 'consis_mask': consis_mask + } + state_dicts = load(self.model_path) + net = getattr(self.model, 'netG') + net.set_dict(state_dicts['netG']) + result, _ = self.model.test(input_data) + print('result shape: ', result.shape) + min_, max_ = result.min(), result.max() + result += -min_ + result = paddle.divide(result, max_ - min_ + 1e-5) + img = toImage(result) + + if with_face: + return img, crop_face + img.save('before.png') + + return img + + +def main(args, cfg, save_path='transferred_image.png'): + + setup(args, cfg) + + inference = Inference(cfg, args.model_path) + postprocess = PostProcess(cfg) + + source = Image.open(args.source_path).convert("RGB") + reference_paths = list(Path(args.reference_dir).glob("*")) + np.random.shuffle(reference_paths) + for reference_path in reference_paths: + if not reference_path.is_file(): + print(reference_path, "is not a valid file.") + continue + + reference = Image.open(reference_path).convert("RGB") + + # Transfer the psgan from reference to source. + image, face = inference.transfer(source, reference, with_face=True) + image.save('before.png') + source_crop = source.crop( + (face.left(), face.top(), face.right(), face.bottom())) + image = postprocess(source_crop, image) + image.save(save_path) + + +if __name__ == '__main__': + args = parse_args() + cfg = get_config(args.config_file) + main(args, cfg) diff --git a/applications/tools/video-enhance.py b/applications/tools/video-enhance.py index 04ece7689d33f37c111e5f5acf2c20969f83c2bd..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/applications/tools/video-enhance.py +++ b/applications/tools/video-enhance.py @@ -1,115 +0,0 @@ -import sys -sys.path.append('.') - -import argparse -import paddle - -from DAIN.predict import VideoFrameInterp -from DeepRemaster.predict import DeepReasterPredictor -from DeOldify.predict import DeOldifyPredictor -from RealSR.predict import RealSRPredictor -from EDVR.predict import EDVRPredictor - -parser = argparse.ArgumentParser(description='Fix video') -parser.add_argument('--input', type=str, default=None, help='Input video') -parser.add_argument('--output', type=str, default='output', help='output dir') -parser.add_argument('--DAIN_weight', - type=str, - default=None, - help='Path to model weight') -parser.add_argument('--DeepRemaster_weight', - type=str, - default=None, - help='Path to model weight') -parser.add_argument('--DeOldify_weight', - type=str, - default=None, - help='Path to model weight') -parser.add_argument('--RealSR_weight', - type=str, - default=None, - help='Path to model weight') -parser.add_argument('--EDVR_weight', - type=str, - default=None, - help='Path to model weight') -# DAIN args -parser.add_argument('--time_step', - type=float, - default=0.5, - help='choose the time steps') -# DeepRemaster args -parser.add_argument('--reference_dir', - type=str, - default=None, - help='Path to the reference image directory') -parser.add_argument('--colorization', - action='store_true', - default=False, - help='Remaster with colorization') -parser.add_argument('--mindim', - type=int, - default=360, - help='Length of minimum image edges') -# DeOldify args -parser.add_argument('--render_factor', - type=int, - default=32, - help='model inputsize=render_factor*16') -#process order support model name:[DAIN, DeepRemaster, DeOldify, RealSR, EDVR] -parser.add_argument('--proccess_order', - type=str, - default='none', - nargs='+', - help='Process order') - -if __name__ == "__main__": - args = parser.parse_args() - - orders = args.proccess_order - temp_video_path = None - - for order in orders: - print('Model {} proccess start..'.format(order)) - if temp_video_path is None: - temp_video_path = args.input - if order == 'DAIN': - predictor = VideoFrameInterp(args.time_step, - args.DAIN_weight, - temp_video_path, - output_path=args.output) - frames_path, temp_video_path = predictor.run() - elif order == 'DeepRemaster': - paddle.disable_static() - predictor = DeepReasterPredictor( - temp_video_path, - args.output, - weight_path=args.DeepRemaster_weight, - colorization=args.colorization, - reference_dir=args.reference_dir, - mindim=args.mindim) - frames_path, temp_video_path = predictor.run() - paddle.enable_static() - elif order == 'DeOldify': - paddle.disable_static() - predictor = DeOldifyPredictor(temp_video_path, - args.output, - weight_path=args.DeOldify_weight) - frames_path, temp_video_path = predictor.run() - paddle.enable_static() - elif order == 'RealSR': - paddle.disable_static() - predictor = RealSRPredictor(temp_video_path, - args.output, - weight_path=args.RealSR_weight) - frames_path, temp_video_path = predictor.run() - paddle.enable_static() - elif order == 'EDVR': - predictor = EDVRPredictor(temp_video_path, - args.output, - weight_path=args.EDVR_weight) - frames_path, temp_video_path = predictor.run() - - print('Model {} output frames path:'.format(order), frames_path) - print('Model {} output video path:'.format(order), temp_video_path) - print('Model {} proccess done!'.format(order)) diff --git a/configs/cyclegan_cityscapes.yaml b/configs/cyclegan_cityscapes.yaml index f74d9e3bdd35e3a521b3056a5b268f67bba2e406..97bf179e56d435178af468a39ede578878e826f1 100644 --- a/configs/cyclegan_cityscapes.yaml +++ b/configs/cyclegan_cityscapes.yaml @@ -36,16 +36,18 @@ dataset: output_nc: 3 serial_batches: False pool_size: 50 - transform: - load_size: 286 - crop_size: 256 - preprocess: resize_and_crop - no_flip: False - normalize: - mean: - (127.5, 127.5, 127.5) - std: - (127.5, 127.5, 127.5) + transforms: + - name: Resize + size: [286, 286] + interpolation: 2 #cv2.INTER_CUBIC + - name: RandomCrop + output_size: [256, 256] + - name: RandomHorizontalFlip + prob: 0.5 + - name: Permute + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] test: name: SingleDataset dataroot: data/cityscapes/testB @@ -55,17 +57,14 @@ dataset: output_nc: 3 serial_batches: False pool_size: 50 - transform: - load_size: 256 - crop_size: 256 - preprocess: resize_and_crop - no_flip: True - normalize: - mean: - (127.5, 127.5, 127.5) - std: - (127.5, 127.5, 127.5) - + transforms: + - name: Resize + size: [256, 256] + interpolation: 2 #cv2.INTER_CUBIC + - name: Permute + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] optimizer: name: Adam diff --git a/configs/cyclegan_horse2zebra.yaml b/configs/cyclegan_horse2zebra.yaml index 0e845bd5183428f7c166bae300f74757406c07f5..c86cd8c3440203a53269d71b9c627c0dafe351f8 100644 --- a/configs/cyclegan_horse2zebra.yaml +++ b/configs/cyclegan_horse2zebra.yaml @@ -35,16 +35,18 @@ dataset: output_nc: 3 serial_batches: False pool_size: 50 - transform: - load_size: 286 - crop_size: 256 - preprocess: resize_and_crop - no_flip: False - normalize: - mean: - (127.5, 127.5, 127.5) - std: - (127.5, 127.5, 127.5) + transforms: + - name: Resize + size: [286, 286] + interpolation: 2 #cv2.INTER_CUBIC + - name: RandomCrop + output_size: [256, 256] + - name: RandomHorizontalFlip + prob: 0.5 + - name: Permute + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] test: name: SingleDataset dataroot: data/horse2zebra/testA @@ -55,15 +57,14 @@ dataset: serial_batches: False pool_size: 50 transform: - load_size: 256 - crop_size: 256 - preprocess: resize_and_crop - no_flip: True - normalize: - mean: - (127.5, 127.5, 127.5) - std: - (127.5, 127.5, 127.5) + transform: + - name: Resize + size: [256, 256] + interpolation: 2 #cv2.INTER_CUBIC + - name: Permute + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] optimizer: name: Adam diff --git a/configs/makeup.yaml b/configs/makeup.yaml new file mode 100644 index 0000000000000000000000000000000000000000..03bfb066877551dfae5f966c37168d9b48a502c4 --- /dev/null +++ b/configs/makeup.yaml @@ -0,0 +1,54 @@ +epochs: 100 +isTrain: True +output_dir: tmp +checkpoints_dir: checkpoints +lambda_A: 10.0 +lambda_B: 10.0 +lambda_identity: 0.5 + +model: + name: MakeupModel + generator: + name: GeneratorPSGANAttention + conv_dim: 64 + repeat_num: 6 + discriminator: + name: NLayerDiscriminator + ndf: 64 + n_layers: 3 + input_nc: 3 + norm_type: spectral + gan_mode: lsgan + +dataset: + train: + name: MakeupDataset + trans_size: 256 + dataroot: MT-Dataset + cls_list: [non-makeup, makeup] + phase: train + pool_size: 16 + test: + name: MakeupDataset + trans_size: 256 + dataroot: MT-Dataset + cls_list: [non-makeup, makeup] + phase: test + pool_size: 16 + +optimizer: + name: Adam + beta1: 0.5 + +lr_scheduler: + name: linear + learning_rate: 0.0002 + start_epoch: 100 + decay_epochs: 100 + +log_config: + interval: 10 + visiual_interval: 500 + +snapshot_config: + interval: 1 diff --git a/configs/pix2pix_cityscapes.yaml b/configs/pix2pix_cityscapes.yaml index 5919ff2e5a5c2c267a9204d117dc7aba5fb245a7..5a6dd3bbce4f49178a12495c294cf1eef2778071 100644 --- a/configs/pix2pix_cityscapes.yaml +++ b/configs/pix2pix_cityscapes.yaml @@ -33,16 +33,23 @@ dataset: output_nc: 3 serial_batches: False pool_size: 0 - transform: - load_size: 286 - crop_size: 256 - preprocess: resize_and_crop - no_flip: False - normalize: - mean: - (127.5, 127.5, 127.5) - std: - (127.5, 127.5, 127.5) + transforms: + - name: Resize + size: [286, 286] + interpolation: 2 #cv2.INTER_CUBIC + keys: [image, image] + - name: PairedRandomCrop + output_size: [256, 256] + keys: [image, image] + - name: PairedRandomHorizontalFlip + prob: 0.5 + keys: [image, image] + - name: Permute + keys: [image, image] + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] + keys: [image, image] test: name: PairedDataset dataroot: data/cityscapes/ @@ -53,16 +60,18 @@ dataset: output_nc: 3 serial_batches: True pool_size: 50 - transform: - load_size: 256 - crop_size: 256 - preprocess: resize_and_crop - no_flip: True - normalize: - mean: - (127.5, 127.5, 127.5) - std: - (127.5, 127.5, 127.5) + transforms: + - name: Resize + size: [256, 256] + interpolation: 2 #cv2.INTER_CUBIC + keys: [image, image] + - name: Permute + keys: [image, image] + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] + keys: [image, image] + optimizer: name: Adam diff --git a/configs/pix2pix_cityscapes_2gpus.yaml b/configs/pix2pix_cityscapes_2gpus.yaml index 20f494c6fb13690254dd2d047df8c8970615ebff..41279f0be0737f0230b6b5539700907437289391 100644 --- a/configs/pix2pix_cityscapes_2gpus.yaml +++ b/configs/pix2pix_cityscapes_2gpus.yaml @@ -32,16 +32,23 @@ dataset: output_nc: 3 serial_batches: False pool_size: 0 - transform: - load_size: 286 - crop_size: 256 - preprocess: resize_and_crop - no_flip: False - normalize: - mean: - (127.5, 127.5, 127.5) - std: - (127.5, 127.5, 127.5) + transforms: + - name: Resize + size: [286, 286] + interpolation: 2 #cv2.INTER_CUBIC + keys: [image, image] + - name: PairedRandomCrop + output_size: [256, 256] + keys: [image, image] + - name: PairedRandomHorizontalFlip + prob: 0.5 + keys: [image, image] + - name: Permute + keys: [image, image] + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] + keys: [image, image] test: name: PairedDataset dataroot: data/cityscapes/ @@ -52,16 +59,17 @@ dataset: output_nc: 3 serial_batches: True pool_size: 50 - transform: - load_size: 256 - crop_size: 256 - preprocess: resize_and_crop - no_flip: True - normalize: - mean: - (127.5, 127.5, 127.5) - std: - (127.5, 127.5, 127.5) + transforms: + - name: Resize + size: [256, 256] + interpolation: 2 #cv2.INTER_CUBIC + keys: [image, image] + - name: Permute + keys: [image, image] + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] + keys: [image, image] optimizer: name: Adam diff --git a/configs/pix2pix_facades.yaml b/configs/pix2pix_facades.yaml index 31b5f145dccdfd75bbdcd14c3fa896676d729037..37e89ce120185100a6146fa5f550334f522eef42 100644 --- a/configs/pix2pix_facades.yaml +++ b/configs/pix2pix_facades.yaml @@ -32,16 +32,23 @@ dataset: output_nc: 3 serial_batches: False pool_size: 0 - transform: - load_size: 286 - crop_size: 256 - preprocess: resize_and_crop - no_flip: False - normalize: - mean: - (127.5, 127.5, 127.5) - std: - (127.5, 127.5, 127.5) + transforms: + - name: Resize + size: [286, 286] + interpolation: 2 #cv2.INTER_CUBIC + keys: [image, image] + - name: PairedRandomCrop + output_size: [256, 256] + keys: [image, image] + - name: PairedRandomHorizontalFlip + prob: 0.5 + keys: [image, image] + - name: Permute + keys: [image, image] + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] + keys: [image, image] test: name: PairedDataset dataroot: data/facades/ @@ -52,16 +59,17 @@ dataset: output_nc: 3 serial_batches: True pool_size: 50 - transform: - load_size: 256 - crop_size: 256 - preprocess: resize_and_crop - no_flip: True - normalize: - mean: - (127.5, 127.5, 127.5) - std: - (127.5, 127.5, 127.5) + transforms: + - name: Resize + size: [256, 256] + interpolation: 2 #cv2.INTER_CUBIC + keys: [image, image] + - name: Permute + keys: [image, image] + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] + keys: [image, image] optimizer: name: Adam diff --git a/ppgan/apps/__init__.py b/ppgan/apps/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6e8a7e9db2c17508d47f3092d5861ccc42aa2de9 --- /dev/null +++ b/ppgan/apps/__init__.py @@ -0,0 +1,6 @@ +from .dain_predictor import DAINPredictor +from .deepremaster_predictor import DeepRemasterPredictor +from .deoldify_predictor import DeOldifyPredictor +from .realsr_predictor import RealSRPredictor +from .edvr_predictor import EDVRPredictor +from .first_order_predictor import FirstOrderPredictor diff --git a/ppgan/apps/base_predictor.py b/ppgan/apps/base_predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..e25039147ae18bfb95d9af0a884d10247cf7e6aa --- /dev/null +++ b/ppgan/apps/base_predictor.py @@ -0,0 +1,72 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import cv2 +import paddle + + +class BasePredictor(object): + def __init__(self): + pass + + def build_inference_model(self): + if paddle.in_dynamic_mode(): + # todo self.model = build_model(self.cfg) + pass + else: + place = paddle.fluid.framework._current_expected_place() + self.exe = paddle.fluid.Executor(place) + file_names = os.listdir(self.weight_path) + for file_name in file_names: + if file_name.find('model') > -1: + model_file = file_name + elif file_name.find('param') > -1: + param_file = file_name + + self.program, self.feed_names, self.fetch_targets = paddle.static.load_inference_model( + dirname=self.weight_path, + executor=self.exe, + model_filename=model_file, + params_filename=param_file) + print(self.feed_names) + + def base_forward(self, inputs): + if paddle.in_dynamic_mode(): + out = self.model(inputs) + else: + feed_dict = {} + if isinstance(inputs, dict): + feed_dict = inputs + elif isinstance(inputs, (list, tuple)): + for i, feed_name in enumerate(self.feed_names): + feed_dict[feed_name] = inputs[i] + else: + feed_dict[self.feed_names[0]] = inputs + + out = self.exe.run(self.program, + fetch_list=self.fetch_targets, + feed=feed_dict) + + return out + + def is_video(self, input): + try: + cv2.VideoCapture(input) + return True + except: + return False + + def run(self): + raise NotImplementedError diff --git a/ppgan/apps/dain_predictor.py b/ppgan/apps/dain_predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..3374e384154d4376be8d0f13ae17526e8993e7de --- /dev/null +++ b/ppgan/apps/dain_predictor.py @@ -0,0 +1,256 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import cv2 +import glob +import shutil +import numpy as np +from tqdm import tqdm +from imageio import imread, imsave + +import paddle +import paddle.fluid as fluid +from paddle.utils.download import get_path_from_url +from ppgan.utils.video import video2frames, frames2video + +from .base_predictor import BasePredictor + +DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar' + + +class DAINPredictor(BasePredictor): + def __init__(self, + output_path='output', + weight_path=None, + time_step=None, + use_gpu=True, + key_frame_thread=0., + remove_duplicates=False): + self.output_path = os.path.join(output_path, 'DAIN') + if weight_path is None: + cur_path = os.path.abspath(os.path.dirname(__file__)) + weight_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path) + + self.weight_path = weight_path + self.time_step = time_step + self.key_frame_thread = key_frame_thread + self.remove_duplicates = remove_duplicates + + self.build_inference_model() + + def run(self, video_path): + frame_path_input = os.path.join(self.output_path, 'frames-input') + frame_path_interpolated = os.path.join(self.output_path, + 'frames-interpolated') + frame_path_combined = os.path.join(self.output_path, 'frames-combined') + video_path_output = os.path.join(self.output_path, 'videos-output') + + if not os.path.exists(self.output_path): + os.makedirs(self.output_path) + if not os.path.exists(frame_path_input): + os.makedirs(frame_path_input) + if not os.path.exists(frame_path_interpolated): + os.makedirs(frame_path_interpolated) + if not os.path.exists(frame_path_combined): + os.makedirs(frame_path_combined) + if not os.path.exists(video_path_output): + os.makedirs(video_path_output) + + timestep = self.time_step + num_frames = int(1.0 / timestep) - 1 + + cap = cv2.VideoCapture(video_path) + fps = cap.get(cv2.CAP_PROP_FPS) + print("Old fps (frame rate): ", fps) + + times_interp = int(1.0 / timestep) + r2 = str(int(fps) * times_interp) + print("New fps (frame rate): ", r2) + + out_path = video2frames(video_path, frame_path_input) + + vidname = video_path.split('/')[-1].split('.')[0] + + frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) + orig_frames = len(frames) + need_frames = orig_frames * times_interp + + if self.remove_duplicates: + frames = self.remove_duplicate_frames(out_path) + left_frames = len(frames) + timestep = left_frames / need_frames + num_frames = int(1.0 / timestep) - 1 + + img = imread(frames[0]) + + int_width = img.shape[1] + int_height = img.shape[0] + channel = img.shape[2] + if not channel == 3: + return + + if int_width != ((int_width >> 7) << 7): + int_width_pad = (((int_width >> 7) + 1) << 7) # more than necessary + padding_left = int((int_width_pad - int_width) / 2) + padding_right = int_width_pad - int_width - padding_left + else: + int_width_pad = int_width + padding_left = 32 + padding_right = 32 + + if int_height != ((int_height >> 7) << 7): + int_height_pad = ( + ((int_height >> 7) + 1) << 7) # more than necessary + padding_top = int((int_height_pad - int_height) / 2) + padding_bottom = int_height_pad - int_height - padding_top + else: + int_height_pad = int_height + padding_top = 32 + padding_bottom = 32 + + frame_num = len(frames) + + if not os.path.exists(os.path.join(frame_path_interpolated, vidname)): + os.makedirs(os.path.join(frame_path_interpolated, vidname)) + if not os.path.exists(os.path.join(frame_path_combined, vidname)): + os.makedirs(os.path.join(frame_path_combined, vidname)) + + for i in tqdm(range(frame_num - 1)): + first = frames[i] + second = frames[i + 1] + + img_first = imread(first) + img_second = imread(second) + '''--------------Frame change test------------------------''' + img_first_gray = np.dot(img_first[..., :3], [0.299, 0.587, 0.114]) + img_second_gray = np.dot(img_second[..., :3], [0.299, 0.587, 0.114]) + + img_first_gray = img_first_gray.flatten(order='C') + img_second_gray = img_second_gray.flatten(order='C') + corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1] + key_frame = False + if corr < self.key_frame_thread: + key_frame = True + '''-------------------------------------------------------''' + + X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255 + X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255 + + assert (X0.shape[1] == X1.shape[1]) + assert (X0.shape[2] == X1.shape[2]) + + X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \ + (padding_left, padding_right)), mode='edge') + X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \ + (padding_left, padding_right)), mode='edge') + + X0 = np.expand_dims(X0, axis=0) + X1 = np.expand_dims(X1, axis=0) + + X0 = np.expand_dims(X0, axis=0) + X1 = np.expand_dims(X1, axis=0) + + X = np.concatenate((X0, X1), axis=0) + + o = self.base_forward(X) + + y_ = o[0] + + y_ = [ + np.transpose( + 255.0 * item.clip( + 0, 1.0)[0, :, padding_top:padding_top + int_height, + padding_left:padding_left + int_width], + (1, 2, 0)) for item in y_ + ] + time_offsets = [kk * timestep for kk in range(1, 1 + num_frames, 1)] + + count = 1 + for item, time_offset in zip(y_, time_offsets): + out_dir = os.path.join(frame_path_interpolated, vidname, + "{:0>6d}_{:0>4d}.png".format(i, count)) + count = count + 1 + imsave(out_dir, np.round(item).astype(np.uint8)) + + num_frames = int(1.0 / timestep) - 1 + + input_dir = os.path.join(frame_path_input, vidname) + interpolated_dir = os.path.join(frame_path_interpolated, vidname) + combined_dir = os.path.join(frame_path_combined, vidname) + self.combine_frames(input_dir, interpolated_dir, combined_dir, + num_frames) + + frame_pattern_combined = os.path.join(frame_path_combined, vidname, + '%08d.png') + video_pattern_output = os.path.join(video_path_output, vidname + '.mp4') + if os.path.exists(video_pattern_output): + os.remove(video_pattern_output) + frames2video(frame_pattern_combined, video_pattern_output, r2) + + return frame_pattern_combined, video_pattern_output + + def combine_frames(self, input, interpolated, combined, num_frames): + frames1 = sorted(glob.glob(os.path.join(input, '*.png'))) + frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png'))) + num1 = len(frames1) + num2 = len(frames2) + + for i in range(num1): + src = frames1[i] + imgname = int(src.split('/')[-1].split('.')[-2]) + assert i == imgname + dst = os.path.join(combined, + '{:08d}.png'.format(i * (num_frames + 1))) + shutil.copy2(src, dst) + if i < num1 - 1: + try: + for k in range(num_frames): + src = frames2[i * num_frames + k] + dst = os.path.join( + combined, + '{:08d}.png'.format(i * (num_frames + 1) + k + 1)) + shutil.copy2(src, dst) + except Exception as e: + print(e) + + def remove_duplicate_frames(self, paths): + def dhash(image, hash_size=8): + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + resized = cv2.resize(gray, (hash_size + 1, hash_size)) + diff = resized[:, 1:] > resized[:, :-1] + return sum([2**i for (i, v) in enumerate(diff.flatten()) if v]) + + hashes = {} + image_paths = sorted(glob.glob(os.path.join(paths, '*.png'))) + for image_path in image_paths: + image = cv2.imread(image_path) + h = dhash(image) + p = hashes.get(h, []) + p.append(image_path) + hashes[h] = p + + for (h, hashed_paths) in hashes.items(): + if len(hashed_paths) > 1: + for p in hashed_paths[1:]: + os.remove(p) + + frames = sorted(glob.glob(os.path.join(paths, '*.png'))) + for fid, frame in enumerate(frames): + new_name = '{:08d}'.format(fid) + '.png' + new_name = os.path.join(paths, new_name) + os.rename(frame, new_name) + + frames = sorted(glob.glob(os.path.join(paths, '*.png'))) + return frames diff --git a/applications/DeepRemaster/predict.py b/ppgan/apps/deepremaster_predictor.py similarity index 76% rename from applications/DeepRemaster/predict.py rename to ppgan/apps/deepremaster_predictor.py index baa8b7fd68d4629d57f8d5e31b405fe45b049dd1..596abe67cb902b20af40489ec0a69cb79c28924b 100644 --- a/applications/DeepRemaster/predict.py +++ b/ppgan/apps/deepremaster_predictor.py @@ -1,59 +1,88 @@ -import os -import sys - -cur_path = os.path.abspath(os.path.dirname(__file__)) -sys.path.append(cur_path) - -import paddle -import paddle.nn as nn +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +import os import cv2 -from PIL import Image +import subprocess import numpy as np from tqdm import tqdm -import argparse -import subprocess -import utils +from PIL import Image +from skimage import color + +import paddle from ppgan.models.generators.remaster import NetworkR, NetworkC from paddle.utils.download import get_path_from_url +from .base_predictor import BasePredictor DEEPREMASTER_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/deep_remaster.pdparams' -parser = argparse.ArgumentParser(description='Remastering') -parser.add_argument('--input', type=str, default=None, help='Input video') -parser.add_argument('--output', type=str, default='output', help='output dir') -parser.add_argument('--reference_dir', - type=str, - default=None, - help='Path to the reference image directory') -parser.add_argument('--colorization', - action='store_true', - default=False, - help='Remaster without colorization') -parser.add_argument('--mindim', - type=int, - default='360', - help='Length of minimum image edges') - - -class DeepReasterPredictor: + +def convertLAB2RGB(lab): + lab[:, :, 0:1] = lab[:, :, 0:1] * 100 # [0, 1] -> [0, 100] + lab[:, :, 1:3] = np.clip(lab[:, :, 1:3] * 255 - 128, -100, + 100) # [0, 1] -> [-128, 128] + rgb = color.lab2rgb(lab.astype(np.float64)) + return rgb + + +def convertRGB2LABTensor(rgb): + lab = color.rgb2lab( + np.asarray(rgb)) # RGB -> LAB L[0, 100] a[-127, 128] b[-128, 127] + ab = np.clip(lab[:, :, 1:3] + 128, 0, 255) # AB --> [0, 255] + ab = paddle.to_tensor(ab.astype('float32')) / 255. + L = lab[:, :, 0] * 2.55 # L --> [0, 255] + L = Image.fromarray(np.uint8(L)) + + L = paddle.to_tensor(np.array(L).astype('float32')[..., np.newaxis] / 255.0) + return L, ab + + +def addMergin(img, target_w, target_h, background_color=(0, 0, 0)): + width, height = img.size + if width == target_w and height == target_h: + return img + scale = max(target_w, target_h) / max(width, height) + width = int(width * scale / 16.) * 16 + height = int(height * scale / 16.) * 16 + + img = img.resize((width, height), Image.BICUBIC) + xp = (target_w - width) // 2 + yp = (target_h - height) // 2 + result = Image.new(img.mode, (target_w, target_h), background_color) + result.paste(img, (xp, yp)) + return result + + +class DeepRemasterPredictor(BasePredictor): def __init__(self, - input, - output, + output='output', weight_path=None, colorization=False, reference_dir=None, mindim=360): - self.input = input self.output = os.path.join(output, 'DeepRemaster') self.colorization = colorization self.reference_dir = reference_dir self.mindim = mindim if weight_path is None: + cur_path = os.path.abspath(os.path.dirname(__file__)) weight_path = get_path_from_url(DEEPREMASTER_WEIGHT_URL, cur_path) - state_dict, _ = paddle.load(weight_path) + self.weight_path = weight_path + + state_dict = paddle.load(weight_path) self.modelR = NetworkR() self.modelR.load_dict(state_dict['modelR']) @@ -63,7 +92,7 @@ class DeepReasterPredictor: self.modelC.load_dict(state_dict['modelC']) self.modelC.eval() - def run(self): + def run(self, video_path): outputdir = self.output outputdir_in = os.path.join(outputdir, 'input/') os.makedirs(outputdir_in, exist_ok=True) @@ -94,9 +123,7 @@ class DeepReasterPredictor: refimgs = [] for i, v in enumerate(refs): - refimg = utils.addMergin(v, - target_w=target_w, - target_h=target_h) + refimg = addMergin(v, target_w=target_w, target_h=target_h) refimg = np.array(refimg).astype('float32').transpose( 2, 0, 1) / 255.0 refimgs.append(refimg) @@ -105,7 +132,7 @@ class DeepReasterPredictor: refimgs = paddle.unsqueeze(refimgs, 0) # Load video - cap = cv2.VideoCapture(self.input) + cap = cv2.VideoCapture(video_path) nframes = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) v_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH) v_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) @@ -156,7 +183,7 @@ class DeepReasterPredictor: elif nchannels == 3: cv2.imwrite(outputdir_in + '%07d.png' % index, frame) frame = frame[:, :, ::-1] ## BGR -> RGB - frame_l, frame_ab = utils.convertRGB2LABTensor(frame) + frame_l, frame_ab = convertRGB2LABTensor(frame) frame_l = frame_l.transpose([2, 0, 1]) frame_ab = frame_ab.transpose([2, 0, 1]) frame_l = frame_l.reshape([ @@ -193,7 +220,7 @@ class DeepReasterPredictor: (out_l, out_ab), axis=0).detach().numpy().transpose((1, 2, 0)) out = Image.fromarray( - np.uint8(utils.convertLAB2RGB(out) * 255)) + np.uint8(convertLAB2RGB(out) * 255)) out.save(outputdir_out + '%07d.png' % (index)) else: raise ValueError('channels of imag3 must be 3!') @@ -214,7 +241,7 @@ class DeepReasterPredictor: output = paddle.concat( (out_l, out_c), axis=0).numpy().transpose((1, 2, 0)) output = Image.fromarray( - np.uint8(utils.convertLAB2RGB(output) * 255)) + np.uint8(convertLAB2RGB(output) * 255)) output.save(outputdir_out + '%07d.png' % index) it = it + 1 @@ -222,7 +249,7 @@ class DeepReasterPredictor: # Save result videos outfile = os.path.join(outputdir, - self.input.split('/')[-1].split('.')[0]) + video_path.split('/')[-1].split('.')[0]) cmd = 'ffmpeg -y -r %d -i %s%%07d.png -vcodec libx264 -pix_fmt yuv420p -r %d %s_in.mp4' % ( fps, outputdir_in, fps, outfile) subprocess.call(cmd, shell=True) @@ -236,14 +263,3 @@ class DeepReasterPredictor: cap.release() pbar.close() return outputdir_out, '%s_out.mp4' % outfile - - -if __name__ == "__main__": - args = parser.parse_args() - paddle.disable_static() - predictor = DeepReasterPredictor(args.input, - args.output, - colorization=args.colorization, - reference_dir=args.reference_dir, - mindim=args.mindim) - predictor.run() diff --git a/applications/DeOldify/predict.py b/ppgan/apps/deoldify_predictor.py similarity index 64% rename from applications/DeOldify/predict.py rename to ppgan/apps/deoldify_predictor.py index fd94970f38adac3ed494ab0e3208aaa81cd4755f..d6d337101fb8a4a6e193fbdff00cc3e75eb6bfd0 100644 --- a/applications/DeOldify/predict.py +++ b/ppgan/apps/deoldify_predictor.py @@ -1,53 +1,45 @@ -import os -import sys - -cur_path = os.path.abspath(os.path.dirname(__file__)) -sys.path.append(cur_path) +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +import os import cv2 import glob -import argparse import numpy as np -import paddle -import pickle - from PIL import Image from tqdm import tqdm -from paddle import fluid + +import paddle from paddle.utils.download import get_path_from_url from ppgan.utils.video import frames2video, video2frames from ppgan.models.generators.deoldify import build_model -parser = argparse.ArgumentParser(description='DeOldify') -parser.add_argument('--input', type=str, default='none', help='Input video') -parser.add_argument('--output', type=str, default='output', help='output dir') -parser.add_argument('--render_factor', - type=int, - default=32, - help='model inputsize=render_factor*16') -parser.add_argument('--weight_path', - type=str, - default=None, - help='Path to the reference image directory') +from .base_predictor import BasePredictor DEOLDIFY_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams' -class DeOldifyPredictor(): - def __init__(self, - input, - output, - batch_size=1, - weight_path=None, - render_factor=32): - self.input = input +class DeOldifyPredictor(BasePredictor): + def __init__(self, output='output', weight_path=None, render_factor=32): + # self.input = input self.output = os.path.join(output, 'DeOldify') self.render_factor = render_factor self.model = build_model() if weight_path is None: + cur_path = os.path.abspath(os.path.dirname(__file__)) weight_path = get_path_from_url(DEOLDIFY_WEIGHT_URL, cur_path) - state_dict, _ = paddle.load(weight_path) + state_dict = paddle.load(weight_path) self.model.load_dict(state_dict) self.model.eval() @@ -85,8 +77,14 @@ class DeOldifyPredictor(): final = Image.fromarray(final) return final - def run_single(self, img_path): - ori_img = Image.open(img_path).convert('LA').convert('RGB') + def run_image(self, img): + if isinstance(img, str): + ori_img = Image.open(img).convert('LA').convert('RGB') + elif isinstance(img, np.ndarray): + ori_img = Image.fromarray(img).convert('LA').convert('RGB') + elif isinstance(img, Image.Image): + ori_img = img + img = self.norm(ori_img, self.render_factor) x = paddle.to_tensor(img[np.newaxis, ...]) out = self.model(x) @@ -97,9 +95,8 @@ class DeOldifyPredictor(): pred_img = self.post_process(pred_img, ori_img) return pred_img - def run(self): - vid = self.input - base_name = os.path.basename(vid).split('.')[0] + def run_video(self, video): + base_name = os.path.basename(video).split('.')[0] output_path = os.path.join(self.output, base_name) pred_frame_path = os.path.join(output_path, 'frames_pred') @@ -109,15 +106,15 @@ class DeOldifyPredictor(): if not os.path.exists(pred_frame_path): os.makedirs(pred_frame_path) - cap = cv2.VideoCapture(vid) + cap = cv2.VideoCapture(video) fps = cap.get(cv2.CAP_PROP_FPS) - out_path = video2frames(vid, output_path) + out_path = video2frames(video, output_path) frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) for frame in tqdm(frames): - pred_img = self.run_single(frame) + pred_img = self.run_image(frame) frame_name = os.path.basename(frame) pred_img.save(os.path.join(pred_frame_path, frame_name)) @@ -130,15 +127,14 @@ class DeOldifyPredictor(): return frame_pattern_combined, vid_out_path + def run(self, input): + if self.is_video(input): + return self.run_video(input) + else: + pred_img = self.run_image(input) -if __name__ == '__main__': - paddle.disable_static() - args = parser.parse_args() - - predictor = DeOldifyPredictor(args.input, - args.output, - weight_path=args.weight_path, - render_factor=args.render_factor) - frames_path, temp_video_path = predictor.run() + if self.output: + base_name = os.path.basename(input) + pred_img.save(os.path.join(self.output, base_name + '.png')) - print('output video path:', temp_video_path) + return pred_img diff --git a/ppgan/apps/edvr_predictor.py b/ppgan/apps/edvr_predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..0e9fe71db450e9a37bc3293bd0a9f196071f9213 --- /dev/null +++ b/ppgan/apps/edvr_predictor.py @@ -0,0 +1,196 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import cv2 +import time +import glob +import numpy as np +from tqdm import tqdm + +from paddle.utils.download import get_path_from_url +from ppgan.utils.video import frames2video, video2frames + +from .base_predictor import BasePredictor + +EDVR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar' + + +def get_img(pred): + pred = pred.squeeze() + pred = np.clip(pred, a_min=0., a_max=1.0) + pred = pred * 255 + pred = pred.round() + pred = pred.astype('uint8') + pred = np.transpose(pred, (1, 2, 0)) # chw -> hwc + pred = pred[:, :, ::-1] # rgb -> bgr + return pred + + +def save_img(img, framename): + dirname = os.path.dirname(framename) + if not os.path.exists(dirname): + os.makedirs(dirname) + + cv2.imwrite(framename, img) + + +def read_img(path, size=None, is_gt=False): + """read image by cv2 + return: Numpy float32, HWC, BGR, [0,1]""" + img = cv2.imread(path, cv2.IMREAD_UNCHANGED) + + img = img.astype(np.float32) / 255. + if img.ndim == 2: + img = np.expand_dims(img, axis=2) + + if img.shape[2] > 3: + img = img[:, :, :3] + return img + + +def get_test_neighbor_frames(crt_i, N, max_n, padding='new_info'): + """Generate an index list for reading N frames from a sequence of images + Args: + crt_i (int): current center index + max_n (int): max number of the sequence of images (calculated from 1) + N (int): reading N frames + padding (str): padding mode, one of replicate | reflection | new_info | circle + Example: crt_i = 0, N = 5 + replicate: [0, 0, 0, 1, 2] + reflection: [2, 1, 0, 1, 2] + new_info: [4, 3, 0, 1, 2] + circle: [3, 4, 0, 1, 2] + + Returns: + return_l (list [int]): a list of indexes + """ + max_n = max_n - 1 + n_pad = N // 2 + return_l = [] + + for i in range(crt_i - n_pad, crt_i + n_pad + 1): + if i < 0: + if padding == 'replicate': + add_idx = 0 + elif padding == 'reflection': + add_idx = -i + elif padding == 'new_info': + add_idx = (crt_i + n_pad) + (-i) + elif padding == 'circle': + add_idx = N + i + else: + raise ValueError('Wrong padding mode') + elif i > max_n: + if padding == 'replicate': + add_idx = max_n + elif padding == 'reflection': + add_idx = max_n * 2 - i + elif padding == 'new_info': + add_idx = (crt_i - n_pad) - (i - max_n) + elif padding == 'circle': + add_idx = i - N + else: + raise ValueError('Wrong padding mode') + else: + add_idx = i + return_l.append(add_idx) + + return return_l + + +class EDVRDataset: + def __init__(self, frame_paths): + self.frames = frame_paths + + def __getitem__(self, index): + indexs = get_test_neighbor_frames(index, 5, len(self.frames)) + frame_list = [] + for i in indexs: + img = read_img(self.frames[i]) + frame_list.append(img) + + img_LQs = np.stack(frame_list, axis=0) + # BGR to RGB, HWC to CHW, numpy to tensor + img_LQs = img_LQs[:, :, :, [2, 1, 0]] + img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32') + + return img_LQs, self.frames[index] + + def __len__(self): + return len(self.frames) + + +class EDVRPredictor(BasePredictor): + def __init__(self, output='output', weight_path=None): + self.input = input + self.output = os.path.join(output, 'EDVR') + + if weight_path is None: + cur_path = os.path.abspath(os.path.dirname(__file__)) + weight_path = get_path_from_url(EDVR_WEIGHT_URL, cur_path) + + self.weight_path = weight_path + + self.build_inference_model() + + def run(self, video_path): + vid = video_path + base_name = os.path.basename(vid).split('.')[0] + output_path = os.path.join(self.output, base_name) + pred_frame_path = os.path.join(output_path, 'frames_pred') + + if not os.path.exists(output_path): + os.makedirs(output_path) + + if not os.path.exists(pred_frame_path): + os.makedirs(pred_frame_path) + + cap = cv2.VideoCapture(vid) + fps = cap.get(cv2.CAP_PROP_FPS) + + out_path = video2frames(vid, output_path) + + frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) + + dataset = EDVRDataset(frames) + + periods = [] + cur_time = time.time() + for infer_iter, data in enumerate(tqdm(dataset)): + data_feed_in = [data[0]] + + outs = self.base_forward(np.array(data_feed_in)) + + infer_result_list = [item for item in outs] + + frame_path = data[1] + + img_i = get_img(infer_result_list[0]) + save_img( + img_i, + os.path.join(pred_frame_path, os.path.basename(frame_path))) + + prev_time = cur_time + cur_time = time.time() + period = cur_time - prev_time + periods.append(period) + + # print('Processed {} samples'.format(infer_iter + 1)) + frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png') + vid_out_path = os.path.join(self.output, + '{}_edvr_out.mp4'.format(base_name)) + frames2video(frame_pattern_combined, vid_out_path, str(int(fps))) + + return frame_pattern_combined, vid_out_path diff --git a/ppgan/apps/first_order_predictor.py b/ppgan/apps/first_order_predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..b83d456ee1e85fe1b51a30bbe89bb5d9ba43492a --- /dev/null +++ b/ppgan/apps/first_order_predictor.py @@ -0,0 +1,221 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import sys + +import yaml +import pickle +import imageio +import numpy as np +from tqdm import tqdm +from skimage import img_as_ubyte +from skimage.transform import resize +from scipy.spatial import ConvexHull + +import paddle +from paddle.utils.download import get_path_from_url +from ppgan.utils.animate import normalize_kp +from ppgan.modules.keypoint_detector import KPDetector +from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator + +from .base_predictor import BasePredictor + + +class FirstOrderPredictor(BasePredictor): + def __init__(self, + output='output', + weight_path=None, + config=None, + relative=False, + adapt_scale=False, + find_best_frame=False, + best_frame=None): + if config is not None and isinstance(config, str): + self.cfg = yaml.load(config) + elif isinstance(config, dict): + self.cfg = config + elif config is None: + self.cfg = { + 'model_params': { + 'common_params': { + 'num_kp': 10, + 'num_channels': 3, + 'estimate_jacobian': True + }, + 'kp_detector_params': { + 'temperature': 0.1, + 'block_expansion': 32, + 'max_features': 1024, + 'scale_factor': 0.25, + 'num_blocks': 5 + }, + 'generator_params': { + 'block_expansion': 64, + 'max_features': 512, + 'num_down_blocks': 2, + 'num_bottleneck_blocks': 6, + 'estimate_occlusion_map': True, + 'dense_motion_params': { + 'block_expansion': 64, + 'max_features': 1024, + 'num_blocks': 5, + 'scale_factor': 0.25 + } + } + } + } + if weight_path is None: + vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-cpk.pdparams' + cur_path = os.path.abspath(os.path.dirname(__file__)) + weight_path = get_path_from_url(vox_cpk_weight_url, cur_path) + + self.weight_path = weight_path + self.output = output + self.relative = relative + self.adapt_scale = adapt_scale + self.find_best_frame = find_best_frame + self.best_frame = best_frame + self.generator, self.kp_detector = self.load_checkpoints( + self.cfg, self.weight_path) + + def run(self, source_image, driving_video): + source_image = imageio.imread(source_image) + reader = imageio.get_reader(driving_video) + fps = reader.get_meta_data()['fps'] + driving_video = [] + try: + for im in reader: + driving_video.append(im) + except RuntimeError: + pass + reader.close() + + source_image = resize(source_image, (256, 256))[..., :3] + driving_video = [ + resize(frame, (256, 256))[..., :3] for frame in driving_video + ] + + if self.find_best_frame or self.best_frame is not None: + i = self.best_frame if self.best_frame is not None else self.find_best_frame_func( + source_image, driving_video) + + print("Best frame: " + str(i)) + driving_forward = driving_video[i:] + driving_backward = driving_video[:(i + 1)][::-1] + predictions_forward = self.make_animation( + source_image, + driving_forward, + self.generator, + self.kp_detector, + relative=self.relative, + adapt_movement_scale=self.adapt_scale) + predictions_backward = self.make_animation( + source_image, + driving_backward, + self.generator, + self.kp_detector, + relative=self.relative, + adapt_movement_scale=self.adapt_scale) + predictions = predictions_backward[::-1] + predictions_forward[1:] + else: + predictions = self.make_animation( + source_image, + driving_video, + self.generator, + self.kp_detector, + relative=self.relative, + adapt_movement_scale=self.adapt_scale) + imageio.mimsave(os.path.join(self.output, 'result.mp4'), + [img_as_ubyte(frame) for frame in predictions], + fps=fps) + + def load_checkpoints(self, config, checkpoint_path): + + generator = OcclusionAwareGenerator( + **config['model_params']['generator_params'], + **config['model_params']['common_params']) + + kp_detector = KPDetector(**config['model_params']['kp_detector_params'], + **config['model_params']['common_params']) + + checkpoint = paddle.load(self.weight_path) + generator.set_state_dict(checkpoint['generator']) + + kp_detector.set_state_dict(checkpoint['kp_detector']) + + generator.eval() + kp_detector.eval() + + return generator, kp_detector + + def make_animation(self, + source_image, + driving_video, + generator, + kp_detector, + relative=True, + adapt_movement_scale=True): + with paddle.no_grad(): + predictions = [] + source = paddle.to_tensor(source_image[np.newaxis].astype( + np.float32)).transpose([0, 3, 1, 2]) + + driving = paddle.to_tensor( + np.array(driving_video)[np.newaxis].astype( + np.float32)).transpose([0, 4, 1, 2, 3]) + kp_source = kp_detector(source) + kp_driving_initial = kp_detector(driving[:, :, 0]) + + for frame_idx in tqdm(range(driving.shape[2])): + driving_frame = driving[:, :, frame_idx] + kp_driving = kp_detector(driving_frame) + kp_norm = normalize_kp( + kp_source=kp_source, + kp_driving=kp_driving, + kp_driving_initial=kp_driving_initial, + use_relative_movement=relative, + use_relative_jacobian=relative, + adapt_movement_scale=adapt_movement_scale) + out = generator(source, kp_source=kp_source, kp_driving=kp_norm) + + predictions.append( + np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0]) + return predictions + + def find_best_frame_func(self, source, driving): + import face_alignment + + def normalize_kp(kp): + kp = kp - kp.mean(axis=0, keepdims=True) + area = ConvexHull(kp[:, :2]).volume + area = np.sqrt(area) + kp[:, :2] = kp[:, :2] / area + return kp + + fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, + flip_input=True) + + kp_source = fa.get_landmarks(255 * source)[0] + kp_source = normalize_kp(kp_source) + norm = float('inf') + frame_num = 0 + for i, image in tqdm(enumerate(driving)): + kp_driving = fa.get_landmarks(255 * image)[0] + kp_driving = normalize_kp(kp_driving) + new_norm = (np.abs(kp_source - kp_driving)**2).sum() + if new_norm < norm: + norm = new_norm + frame_num = i + return frame_num diff --git a/applications/RealSR/predict.py b/ppgan/apps/realsr_predictor.py similarity index 54% rename from applications/RealSR/predict.py rename to ppgan/apps/realsr_predictor.py index d032bc2a78029e174d80d0d124036db75a42d0cb..4c11f4921352d470b885f6ff4f320c81342e6928 100644 --- a/applications/RealSR/predict.py +++ b/ppgan/apps/realsr_predictor.py @@ -1,43 +1,43 @@ -import os -import sys - -cur_path = os.path.abspath(os.path.dirname(__file__)) -sys.path.append(cur_path) +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +import os import cv2 import glob -import argparse import numpy as np -import paddle -import pickle - from PIL import Image from tqdm import tqdm +import paddle from ppgan.models.generators import RRDBNet from ppgan.utils.video import frames2video, video2frames from paddle.utils.download import get_path_from_url - -parser = argparse.ArgumentParser(description='RealSR') -parser.add_argument('--input', type=str, default='none', help='Input video') -parser.add_argument('--output', type=str, default='output', help='output dir') -parser.add_argument('--weight_path', - type=str, - default=None, - help='Path to the reference image directory') +from .base_predictor import BasePredictor REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DF2K_JPEG.pdparams' -class RealSRPredictor(): - def __init__(self, input, output, batch_size=1, weight_path=None): +class RealSRPredictor(BasePredictor): + def __init__(self, output='output', weight_path=None): self.input = input self.output = os.path.join(output, 'RealSR') self.model = RRDBNet(3, 3, 64, 23) if weight_path is None: + cur_path = os.path.abspath(os.path.dirname(__file__)) weight_path = get_path_from_url(REALSR_WEIGHT_URL, cur_path) - state_dict, _ = paddle.load(weight_path) + state_dict = paddle.load(weight_path) self.model.load_dict(state_dict) self.model.eval() @@ -49,8 +49,14 @@ class RealSRPredictor(): img = img.transpose((1, 2, 0)) return (img * 255).clip(0, 255).astype('uint8') - def run_single(self, img_path): - ori_img = Image.open(img_path).convert('RGB') + def run_image(self, img): + if isinstance(img, str): + ori_img = Image.open(img).convert('RGB') + elif isinstance(img, np.ndarray): + ori_img = Image.fromarray(img).convert('RGB') + elif isinstance(img, Image.Image): + ori_img = img + img = self.norm(ori_img) x = paddle.to_tensor(img[np.newaxis, ...]) out = self.model(x) @@ -59,9 +65,8 @@ class RealSRPredictor(): pred_img = Image.fromarray(pred_img) return pred_img - def run(self): - vid = self.input - base_name = os.path.basename(vid).split('.')[0] + def run_video(self, video): + base_name = os.path.basename(video).split('.')[0] output_path = os.path.join(self.output, base_name) pred_frame_path = os.path.join(output_path, 'frames_pred') @@ -71,15 +76,15 @@ class RealSRPredictor(): if not os.path.exists(pred_frame_path): os.makedirs(pred_frame_path) - cap = cv2.VideoCapture(vid) + cap = cv2.VideoCapture(video) fps = cap.get(cv2.CAP_PROP_FPS) - out_path = video2frames(vid, output_path) + out_path = video2frames(video, output_path) frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) for frame in tqdm(frames): - pred_img = self.run_single(frame) + pred_img = self.run_image(frame) frame_name = os.path.basename(frame) pred_img.save(os.path.join(pred_frame_path, frame_name)) @@ -92,14 +97,14 @@ class RealSRPredictor(): return frame_pattern_combined, vid_out_path + def run(self, input): + if self.is_video(input): + return self.run_video(input) + else: + pred_img = self.run_image(input) -if __name__ == '__main__': - paddle.disable_static() - args = parser.parse_args() - - predictor = RealSRPredictor(args.input, - args.output, - weight_path=args.weight_path) - frames_path, temp_video_path = predictor.run() + if self.output: + base_name = os.path.basename(input) + pred_img.save(os.path.join(self.output, base_name + '.png')) - print('output video path:', temp_video_path) + return pred_img diff --git a/ppgan/datasets/__init__.py b/ppgan/datasets/__init__.py index 0aeb70936b58125fb92d00ce5905e2608142f728..fcfbbcaa3488f56915efb8be8ba4d244b2e13782 100644 --- a/ppgan/datasets/__init__.py +++ b/ppgan/datasets/__init__.py @@ -1,4 +1,19 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .unpaired_dataset import UnpairedDataset from .single_dataset import SingleDataset from .paired_dataset import PairedDataset -from .sr_image_dataset import SRImageDataset \ No newline at end of file +from .sr_image_dataset import SRImageDataset +from .makeup_dataset import MakeupDataset diff --git a/ppgan/datasets/makeup_dataset.py b/ppgan/datasets/makeup_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..cf8ac8d3aedbf4ec70de74a15f8a4a766df8a6a8 --- /dev/null +++ b/ppgan/datasets/makeup_dataset.py @@ -0,0 +1,180 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import os.path +from .base_dataset import BaseDataset, get_transform +from .transforms.makeup_transforms import get_makeup_transform +import paddle.vision.transforms as T +from PIL import Image +import random +import numpy as np +from ..utils.preprocess import * + +from .builder import DATASETS + + +@DATASETS.register() +class MakeupDataset(BaseDataset): + def __init__(self, cfg): + """Initialize this dataset class. + + Parameters: + opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions + """ + BaseDataset.__init__(self, cfg) + self.image_path = cfg.dataroot + self.mode = cfg.phase + self.transform = get_makeup_transform(cfg) + + self.norm = T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5]) + self.transform_mask = get_makeup_transform(cfg, pic="mask") + self.trans_size = cfg.trans_size + self.cls_list = cfg.cls_list + self.cls_A = self.cls_list[0] + self.cls_B = self.cls_list[1] + for cls in self.cls_list: + setattr( + self, cls + "_list_path", + os.path.join(self.image_path, self.mode + '_' + cls + ".txt")) + setattr(self, cls + "_lines", + open(getattr(self, cls + "_list_path"), 'r').readlines()) + setattr(self, "num_of_" + cls + "_data", + len(getattr(self, cls + "_lines"))) + print('Start preprocessing dataset..!') + self.preprocess() + print('Finished preprocessing dataset..!') + + def preprocess(self): + """preprocess image""" + for cls in self.cls_list: + setattr(self, cls + "_filenames", []) + setattr(self, cls + "_mask_filenames", []) + setattr(self, cls + "_lmks_filenames", []) + + lines = getattr(self, cls + "_lines") + random.shuffle(lines) + + for i, line in enumerate(lines): + splits = line.split() + getattr(self, cls + "_filenames").append(splits[0]) + getattr(self, cls + "_mask_filenames").append(splits[1]) + getattr(self, cls + "_lmks_filenames").append(splits[2]) + + def __getitem__(self, index): + """Return MANet and MDNet needed params. + + Parameters: + index (int) -- a random integer for data indexing + + Returns a dictionary that contains needed params. + """ + try: + index_A = random.randint( + 0, getattr(self, "num_of_" + self.cls_A + "_data")) + index_B = random.randint( + 0, getattr(self, "num_of_" + self.cls_B + "_data")) + + if self.mode == 'test': + num_b = getattr(self, 'num_of_' + self.cls_list[1] + '_data') + index_A = int(index / num_b) + index_B = int(index % num_b) + image_A = Image.open( + os.path.join(self.image_path, + getattr(self, self.cls_A + + "_filenames")[index_A])).convert("RGB") + + image_B = Image.open( + os.path.join(self.image_path, + getattr(self, self.cls_B + + "_filenames")[index_B])).convert("RGB") + mask_A = np.array( + Image.open( + os.path.join( + self.image_path, + getattr(self, + self.cls_A + "_mask_filenames")[index_A]))) + mask_B = np.array( + Image.open( + os.path.join( + self.image_path, + getattr(self, self.cls_B + + "_mask_filenames")[index_B])).convert('L')) + image_A = np.array(image_A) + image_B = np.array(image_B) + + image_A = self.transform(image_A) + image_B = self.transform(image_B) + + mask_A = cv2.resize(mask_A, (256, 256), + interpolation=cv2.INTER_NEAREST) + mask_B = cv2.resize(mask_B, (256, 256), + interpolation=cv2.INTER_NEAREST) + + lmks_A = np.loadtxt( + os.path.join( + self.image_path, + getattr(self, self.cls_A + "_lmks_filenames")[index_A])) + lmks_B = np.loadtxt( + os.path.join( + self.image_path, + getattr(self, self.cls_B + "_lmks_filenames")[index_B])) + lmks_A = lmks_A / image_A.shape[:2] * self.trans_size + lmks_B = lmks_B / image_B.shape[:2] * self.trans_size + + P_A = generate_P_from_lmks(lmks_A, self.trans_size, + image_A.shape[0], image_A.shape[1]) + + P_B = generate_P_from_lmks(lmks_B, self.trans_size, + image_B.shape[0], image_B.shape[1]) + + mask_A_aug = generate_mask_aug(mask_A, lmks_A) + mask_B_aug = generate_mask_aug(mask_B, lmks_B) + + consis_mask = calculate_consis_mask(mask_A_aug, mask_B_aug) + consis_mask_idt_A = calculate_consis_mask(mask_A_aug, mask_A_aug) + consis_mask_idt_B = calculate_consis_mask(mask_A_aug, mask_B_aug) + + except Exception as e: + print(e) + return self.__getitem__(index + 1) + return { + 'image_A': self.norm(image_A), + 'image_B': self.norm(image_B), + 'mask_A': np.float32(mask_A), + 'mask_B': np.float32(mask_B), + 'consis_mask': np.float32(consis_mask), + 'P_A': np.float32(P_A), + 'P_B': np.float32(P_B), + 'consis_mask_idt_A': np.float32(consis_mask_idt_A), + 'consis_mask_idt_B': np.float32(consis_mask_idt_B), + 'mask_A_aug': np.float32(mask_A_aug), + 'mask_B_aug': np.float32(mask_B_aug) + } + + def __len__(self): + """Return the total number of images in the dataset. + + As we have two datasets with potentially different number of images, + we take a maximum of + """ + if self.mode == 'train': + num_A = getattr(self, 'num_of_' + self.cls_list[0] + '_data') + num_B = getattr(self, 'num_of_' + self.cls_list[1] + '_data') + return max(num_A, num_B) + elif self.mode == "test": + num_A = getattr(self, 'num_of_' + self.cls_list[0] + '_data') + num_B = getattr(self, 'num_of_' + self.cls_list[1] + '_data') + return num_A * num_B + return max(self.A_size, self.B_size) diff --git a/ppgan/datasets/paired_dataset.py b/ppgan/datasets/paired_dataset.py index 368f8371178ab771d3139103992a97abc3ee0fe8..4a68bfab210736c256389ca02c6db804ac608fe4 100644 --- a/ppgan/datasets/paired_dataset.py +++ b/ppgan/datasets/paired_dataset.py @@ -5,13 +5,13 @@ from .base_dataset import BaseDataset, get_params, get_transform from .image_folder import make_dataset from .builder import DATASETS +from .transforms.builder import build_transforms @DATASETS.register() class PairedDataset(BaseDataset): """A dataset class for paired image dataset. """ - def __init__(self, cfg): """Initialize this dataset class. @@ -19,11 +19,14 @@ class PairedDataset(BaseDataset): cfg (dict) -- stores all the experiment flags """ BaseDataset.__init__(self, cfg) - self.dir_AB = os.path.join(cfg.dataroot, cfg.phase) # get the image directory - self.AB_paths = sorted(make_dataset(self.dir_AB, cfg.max_dataset_size)) # get image paths - assert(self.cfg.transform.load_size >= self.cfg.transform.crop_size) # crop_size should be smaller than the size of loaded image + self.dir_AB = os.path.join(cfg.dataroot, + cfg.phase) # get the image directory + self.AB_paths = sorted(make_dataset( + self.dir_AB, cfg.max_dataset_size)) # get image paths + self.input_nc = self.cfg.output_nc if self.cfg.direction == 'BtoA' else self.cfg.input_nc self.output_nc = self.cfg.input_nc if self.cfg.direction == 'BtoA' else self.cfg.output_nc + self.transforms = build_transforms(cfg.transforms) def __getitem__(self, index): """Return a data point and its metadata information. @@ -49,27 +52,11 @@ class PairedDataset(BaseDataset): A = AB[:h, :w2, :] B = AB[:h, w2:, :] - # apply the same transform to both A and B - # transform_params = get_params(self.opt, A.size) - transform_params = get_params(self.cfg.transform, (w2, h)) - - A_transform = get_transform(self.cfg.transform, transform_params, grayscale=(self.input_nc == 1)) - B_transform = get_transform(self.cfg.transform, transform_params, grayscale=(self.output_nc == 1)) - - A = A_transform(A) - B = B_transform(B) + A, B = self.transforms((A, B)) return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path} def __len__(self): """Return the total number of images in the dataset.""" return len(self.AB_paths) - - def get_path_by_indexs(self, indexs): - if isinstance(indexs, paddle.Variable): - indexs = indexs.numpy() - current_paths = [] - for index in indexs: - current_paths.append(self.AB_paths[index]) - return current_paths diff --git a/ppgan/datasets/transforms/__init__.py b/ppgan/datasets/transforms/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7734481de315882ca29235ddb4b63aa8d4e7a58c --- /dev/null +++ b/ppgan/datasets/transforms/__init__.py @@ -0,0 +1 @@ +from .transforms import RandomCrop, Resize, RandomHorizontalFlip, PairedRandomCrop, PairedRandomHorizontalFlip, Normalize, Permute diff --git a/ppgan/datasets/transforms/builder.py b/ppgan/datasets/transforms/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..6dfc88a2827a82f563571d002428b49a0ce9e977 --- /dev/null +++ b/ppgan/datasets/transforms/builder.py @@ -0,0 +1,46 @@ +import copy +import traceback +import paddle +from ...utils.registry import Registry + +TRANSFORMS = Registry("TRANSFORMS") + + +class Compose(object): + """ + Composes several transforms together use for composing list of transforms + together for a dataset transform. + + Args: + transforms (list): List of transforms to compose. + + Returns: + A compose object which is callable, __call__ for this Compose + object will call each given :attr:`transforms` sequencely. + + """ + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, data): + for f in self.transforms: + try: + data = f(data) + except Exception as e: + stack_info = traceback.format_exc() + print("fail to perform transform [{}] with error: " + "{} and stack:\n{}".format(f, e, str(stack_info))) + raise e + return data + + +def build_transforms(cfg): + transforms = [] + + for trans_cfg in cfg: + temp_trans_cfg = copy.deepcopy(trans_cfg) + name = temp_trans_cfg.pop('name') + transforms.append(TRANSFORMS.get(name)(**temp_trans_cfg)) + + transforms = Compose(transforms) + return transforms diff --git a/ppgan/datasets/transforms/makeup_transforms.py b/ppgan/datasets/transforms/makeup_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..6253a6400aa44ac2c653a4d333c83066decd0402 --- /dev/null +++ b/ppgan/datasets/transforms/makeup_transforms.py @@ -0,0 +1,29 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.vision.transforms as T +import cv2 + + +def get_makeup_transform(cfg, pic="image"): + if pic == "image": + transform = T.Compose([ + T.Resize(size=cfg.trans_size), + T.Permute(to_rgb=False), + ]) + else: + transform = T.Resize(size=cfg.trans_size, + interpolation=cv2.INTER_NEAREST) + + return transform diff --git a/ppgan/datasets/transforms/transforms.py b/ppgan/datasets/transforms/transforms.py index fa54da01bb23b50f71ea47b795cb67d595ce76ef..f2b21564dc9d651e793028656f98d0360687a7bd 100644 --- a/ppgan/datasets/transforms/transforms.py +++ b/ppgan/datasets/transforms/transforms.py @@ -1,9 +1,95 @@ +import sys import random +import numbers +import collections +import numpy as np +from paddle.utils import try_import +import paddle.vision.transforms.functional as F -class RandomCrop(object): +from .builder import TRANSFORMS - def __init__(self, output_size): +if sys.version_info < (3, 3): + Sequence = collections.Sequence + Iterable = collections.Iterable +else: + Sequence = collections.abc.Sequence + Iterable = collections.abc.Iterable + + +class Transform(): + def _set_attributes(self, args): + """ + Set attributes from the input list of parameters. + + Args: + args (list): list of parameters. + """ + if args: + for k, v in args.items(): + if k != "self" and not k.startswith("_"): + setattr(self, k, v) + + def apply_image(self, input): + raise NotImplementedError + + def __call__(self, inputs): + if isinstance(inputs, tuple): + inputs = list(inputs) + if self.keys is not None: + for i, key in enumerate(self.keys): + if isinstance(inputs, dict): + inputs[key] = getattr(self, 'apply_' + key)(inputs[key]) + elif isinstance(inputs, (list, tuple)): + inputs[i] = getattr(self, 'apply_' + key)(inputs[i]) + else: + inputs = self.apply_image(inputs) + + if isinstance(inputs, list): + inputs = tuple(inputs) + + return inputs + + +@TRANSFORMS.register() +class Resize(Transform): + """Resize the input Image to the given size. + + Args: + size (int|list|tuple): Desired output size. If size is a sequence like + (h, w), output size will be matched to this. If size is an int, + smaller edge of the image will be matched to this number. + i.e, if height > width, then image will be rescaled to + (size * height / width, size) + interpolation (int, optional): Interpolation mode of resize. Default: 1. + 0 : cv2.INTER_NEAREST + 1 : cv2.INTER_LINEAR + 2 : cv2.INTER_CUBIC + 3 : cv2.INTER_AREA + 4 : cv2.INTER_LANCZOS4 + 5 : cv2.INTER_LINEAR_EXACT + 7 : cv2.INTER_MAX + 8 : cv2.WARP_FILL_OUTLIERS + 16: cv2.WARP_INVERSE_MAP + + """ + def __init__(self, size, interpolation=1, keys=None): + super().__init__() + assert isinstance(size, int) or (isinstance(size, Iterable) + and len(size) == 2) + self._set_attributes(locals()) + if isinstance(self.size, Iterable): + self.size = tuple(size) + + def apply_image(self, img): + return F.resize(img, self.size, self.interpolation) + + +@TRANSFORMS.register() +class RandomCrop(Transform): + def __init__(self, output_size, keys=None): + super().__init__() + self._set_attributes(locals()) if isinstance(output_size, int): self.output_size = (output_size, output_size) else: @@ -19,12 +105,162 @@ class RandomCrop(object): j = random.randint(0, w - tw) return i, j, th, tw - def __call__(self, img): + def apply_image(self, img): i, j, h, w = self._get_params(img) cropped_img = img[i:i + h, j:j + w] return cropped_img +@TRANSFORMS.register() +class PairedRandomCrop(RandomCrop): + def __init__(self, output_size, keys=None): + super().__init__(output_size, keys) + + if isinstance(output_size, int): + self.output_size = (output_size, output_size) + else: + self.output_size = output_size + + def apply_image(self, img, crop_prams=None): + if crop_prams is not None: + i, j, h, w = crop_prams + else: + i, j, h, w = self._get_params(img) + cropped_img = img[i:i + h, j:j + w] + return cropped_img + + def __call__(self, inputs): + if isinstance(inputs, tuple): + inputs = list(inputs) + if self.keys is not None: + if isinstance(inputs, dict): + crop_params = self._get_params(inputs[self.keys[0]]) + elif isinstance(inputs, (list, tuple)): + crop_params = self._get_params(inputs[0]) + + for i, key in enumerate(self.keys): + if isinstance(inputs, dict): + inputs[key] = getattr(self, 'apply_' + key)(inputs[key], + crop_params) + elif isinstance(inputs, (list, tuple)): + inputs[i] = getattr(self, 'apply_' + key)(inputs[i], + crop_params) + else: + crop_params = self._get_params(inputs) + inputs = self.apply_image(inputs, crop_params) + + if isinstance(inputs, list): + inputs = tuple(inputs) + return inputs + + +@TRANSFORMS.register() +class RandomHorizontalFlip(Transform): + """Horizontally flip the input data randomly with a given probability. + + Args: + prob (float): Probability of the input data being flipped. Default: 0.5 + """ + def __init__(self, prob=0.5, keys=None): + super().__init__() + self._set_attributes(locals()) + + def apply_image(self, img): + if np.random.random() < self.prob: + return F.flip(img, code=1) + return img + + +@TRANSFORMS.register() +class PairedRandomHorizontalFlip(RandomHorizontalFlip): + def __init__(self, prob=0.5, keys=None): + super().__init__() + self._set_attributes(locals()) + + def apply_image(self, img, flip): + if flip: + return F.flip(img, code=1) + return img + + def __call__(self, inputs): + if isinstance(inputs, tuple): + inputs = list(inputs) + flip = np.random.random() < self.prob + if self.keys is not None: + + for i, key in enumerate(self.keys): + if isinstance(inputs, dict): + inputs[key] = getattr(self, 'apply_' + key)(inputs[key], + flip) + elif isinstance(inputs, (list, tuple)): + inputs[i] = getattr(self, 'apply_' + key)(inputs[i], flip) + else: + inputs = self.apply_image(inputs, flip) + + if isinstance(inputs, list): + inputs = tuple(inputs) + + return inputs + + +@TRANSFORMS.register() +class Normalize(Transform): + """Normalize the input data with mean and standard deviation. + Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, + this transform will normalize each channel of the input data. + ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` + + Args: + mean (int|float|list): Sequence of means for each channel. + std (int|float|list): Sequence of standard deviations for each channel. + + """ + def __init__(self, mean=0.0, std=1.0, keys=None): + super().__init__() + self._set_attributes(locals()) + + if isinstance(mean, numbers.Number): + mean = [mean, mean, mean] + + if isinstance(std, numbers.Number): + std = [std, std, std] + + self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1) + self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1) + + def apply_image(self, img): + return (img - self.mean) / self.std + + +@TRANSFORMS.register() +class Permute(Transform): + """Change input data to a target mode. + For example, most transforms use HWC mode image, + while the Neural Network might use CHW mode input tensor. + Input image should be HWC mode and an instance of numpy.ndarray. + + Args: + mode (str): Output mode of input. Default: "CHW". + to_rgb (bool): Convert 'bgr' image to 'rgb'. Default: True. + + """ + def __init__(self, mode="CHW", to_rgb=True, keys=None): + super().__init__() + self._set_attributes(locals()) + assert mode in [ + "CHW" + ], "Only support 'CHW' mode, but received mode: {}".format(mode) + self.mode = mode + self.to_rgb = to_rgb + + def apply_image(self, img): + if self.to_rgb: + img = img[..., ::-1] + if self.mode == "CHW": + return img.transpose((2, 0, 1)) + return img + + class Crop(): def __init__(self, pos, size): self.pos = pos @@ -35,6 +271,6 @@ class Crop(): x, y = self.pos th = tw = self.size if (ow > tw or oh > th): - return img[y: y + th, x: x + tw] + return img[y:y + th, x:x + tw] - return img \ No newline at end of file + return img diff --git a/ppgan/datasets/unpaired_dataset.py b/ppgan/datasets/unpaired_dataset.py index 5cabc5391b84e9f6aa55e0925d4202c7b3d09418..232f7bdbbecb3c2d2d8aebe76081156567e0816d 100644 --- a/ppgan/datasets/unpaired_dataset.py +++ b/ppgan/datasets/unpaired_dataset.py @@ -5,13 +5,13 @@ from .base_dataset import BaseDataset, get_transform from .image_folder import make_dataset from .builder import DATASETS +from .transforms.builder import build_transforms @DATASETS.register() class UnpairedDataset(BaseDataset): """ """ - def __init__(self, cfg): """Initialize this dataset class. @@ -19,18 +19,25 @@ class UnpairedDataset(BaseDataset): cfg (dict) -- stores all the experiment flags """ BaseDataset.__init__(self, cfg) - self.dir_A = os.path.join(cfg.dataroot, cfg.phase + 'A') # create a path '/path/to/data/trainA' - self.dir_B = os.path.join(cfg.dataroot, cfg.phase + 'B') # create a path '/path/to/data/trainB' + self.dir_A = os.path.join(cfg.dataroot, cfg.phase + + 'A') # create a path '/path/to/data/trainA' + self.dir_B = os.path.join(cfg.dataroot, cfg.phase + + 'B') # create a path '/path/to/data/trainB' - self.A_paths = sorted(make_dataset(self.dir_A, cfg.max_dataset_size)) # load images from '/path/to/data/trainA' - self.B_paths = sorted(make_dataset(self.dir_B, cfg.max_dataset_size)) # load images from '/path/to/data/trainB' + self.A_paths = sorted(make_dataset( + self.dir_A, + cfg.max_dataset_size)) # load images from '/path/to/data/trainA' + self.B_paths = sorted(make_dataset( + self.dir_B, + cfg.max_dataset_size)) # load images from '/path/to/data/trainB' self.A_size = len(self.A_paths) # get the size of dataset A self.B_size = len(self.B_paths) # get the size of dataset B btoA = self.cfg.direction == 'BtoA' - input_nc = self.cfg.output_nc if btoA else self.cfg.input_nc # get the number of channels of input image - output_nc = self.cfg.input_nc if btoA else self.cfg.output_nc # get the number of channels of output image - self.transform_A = get_transform(self.cfg.transform, grayscale=(input_nc == 1)) - self.transform_B = get_transform(self.cfg.transform, grayscale=(output_nc == 1)) + input_nc = self.cfg.output_nc if btoA else self.cfg.input_nc # get the number of channels of input image + output_nc = self.cfg.input_nc if btoA else self.cfg.output_nc # get the number of channels of output image + + self.transform_A = build_transforms(self.cfg.transforms) + self.transform_B = build_transforms(self.cfg.transforms) self.reset_paths() @@ -49,10 +56,11 @@ class UnpairedDataset(BaseDataset): A_paths (str) -- image paths B_paths (str) -- image paths """ - A_path = self.A_paths[index % self.A_size] # make sure index is within then range - if self.cfg.serial_batches: # make sure index is within then range + A_path = self.A_paths[ + index % self.A_size] # make sure index is within then range + if self.cfg.serial_batches: # make sure index is within then range index_B = index % self.B_size - else: # randomize the index for domain B to avoid fixed pairs. + else: # randomize the index for domain B to avoid fixed pairs. index_B = random.randint(0, self.B_size - 1) B_path = self.B_paths[index_B] diff --git a/ppgan/faceutils/__init__.py b/ppgan/faceutils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..59f58a197c9a86fdd345c0303ab0e0b826f4888a --- /dev/null +++ b/ppgan/faceutils/__init__.py @@ -0,0 +1,17 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import dlibutils as dlib +from . import mask +from . import image diff --git a/ppgan/faceutils/dlibutils/__init__.py b/ppgan/faceutils/dlibutils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b56699f32f682fbf38b04d2052e3f560c1a933f5 --- /dev/null +++ b/ppgan/faceutils/dlibutils/__init__.py @@ -0,0 +1 @@ +from .dlib_utils import detect, crop, landmarks, crop_from_array diff --git a/ppgan/faceutils/dlibutils/dlib_utils.py b/ppgan/faceutils/dlibutils/dlib_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8f1fb87d786acf9eddc38ffb1430c219181e2b04 --- /dev/null +++ b/ppgan/faceutils/dlibutils/dlib_utils.py @@ -0,0 +1,169 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os.path as osp + +import numpy as np +from PIL import Image +import dlib +import cv2 +from ..image import resize_by_max + +detector = dlib.get_frontal_face_detector() +predictor = dlib.shape_predictor( + osp.split(osp.realpath(__file__))[0] + '/lms.dat') + + +def detect(image: Image): + image = np.asarray(image) + h, w = image.shape[:2] + image = resize_by_max(image, 361) + actual_h, actual_w = image.shape[:2] + faces_on_small = detector(image, 1) + faces = dlib.rectangles() + for face in faces_on_small: + faces.append( + dlib.rectangle(int(face.left() / actual_w * w + 0.5), + int(face.top() / actual_h * h + 0.5), + int(face.right() / actual_w * w + 0.5), + int(face.bottom() / actual_h * h + 0.5))) + return faces + + +def crop(image: Image, face, up_ratio, down_ratio, width_ratio): + width, height = image.size + face_height = face.height() + face_width = face.width() + delta_up = up_ratio * face_height + delta_down = down_ratio * face_height + delta_width = width_ratio * width + + img_left = int(max(0, face.left() - delta_width)) + img_top = int(max(0, face.top() - delta_up)) + img_right = int(min(width, face.right() + delta_width)) + img_bottom = int(min(height, face.bottom() + delta_down)) + image = image.crop((img_left, img_top, img_right, img_bottom)) + face = dlib.rectangle(face.left() - img_left, + face.top() - img_top, + face.right() - img_left, + face.bottom() - img_top) + face_expand = dlib.rectangle(img_left, img_top, img_right, img_bottom) + center = face_expand.center() + width, height = image.size + crop_left = img_left + crop_top = img_top + crop_right = img_right + crop_bottom = img_bottom + if width > height: + left = int(center.x - height / 2) + right = int(center.x + height / 2) + if left < 0: + left, right = 0, height + elif right > width: + left, right = width - height, width + image = image.crop((left, 0, right, height)) + face = dlib.rectangle(face.left() - left, face.top(), + face.right() - left, face.bottom()) + crop_left += left + crop_right = crop_left + height + elif width < height: + top = int(center.y - width / 2) + bottom = int(center.y + width / 2) + if top < 0: + top, bottom = 0, width + elif bottom > height: + top, bottom = height - width, height + image = image.crop((0, top, width, bottom)) + face = dlib.rectangle(face.left(), + face.top() - top, face.right(), + face.bottom() - top) + crop_top += top + crop_bottom = crop_top + width + crop_face = dlib.rectangle(crop_left, crop_top, crop_right, crop_bottom) + return image, face, crop_face + + +def crop_by_image_size(image: Image, face): + center = face.center() + width, height = image.size + if width > height: + left = int(center.x - height / 2) + right = int(center.x + height / 2) + if left < 0: + left, right = 0, height + elif right > width: + left, right = width - height, width + image = image.crop((left, 0, right, height)) + face = dlib.rectangle(face.left() - left, face.top(), + face.right() - left, face.bottom()) + elif width < height: + top = int(center.y - width / 2) + bottom = int(center.y + width / 2) + if top < 0: + top, bottom = 0, width + elif bottom > height: + top, bottom = height - width, height + image = image.crop((0, top, width, bottom)) + face = dlib.rectangle(face.left(), + face.top() - top, face.right(), + face.bottom() - top) + return image, face + + +def landmarks(image: Image, face): + shape = predictor(np.asarray(image), face).parts() + return np.array([[p.y, p.x] for p in shape]) + + +def crop_from_array(image: np.array, face): + ratio = 0.20 / 0.85 # delta_size / face_size + height, width = image.shape[:2] + face_height = face.height() + face_width = face.width() + delta_height = ratio * face_height + delta_width = ratio * width + + img_left = int(max(0, face.left() - delta_width)) + img_top = int(max(0, face.top() - delta_height)) + img_right = int(min(width, face.right() + delta_width)) + img_bottom = int(min(height, face.bottom() + delta_height)) + image = image[img_top:img_bottom, img_left:img_right] + face = dlib.rectangle(face.left() - img_left, + face.top() - img_top, + face.right() - img_left, + face.bottom() - img_top) + center = face.center() + height, width = image.shape[:2] + if width > height: + left = int(center.x - height / 2) + right = int(center.x + height / 2) + if left < 0: + left, right = 0, height + elif right > width: + left, right = width - height, width + image = image[0:height, left:right] + face = dlib.rectangle(face.left() - left, face.top(), + face.right() - left, face.bottom()) + elif width < height: + top = int(center.y - width / 2) + bottom = int(center.y + width / 2) + if top < 0: + top, bottom = 0, width + elif bottom > height: + top, bottom = height - width, height + image = image[top:bottom, 0:width] + face = dlib.rectangle(face.left(), + face.top() - top, face.right(), + face.bottom() - top) + return image, face diff --git a/ppgan/faceutils/image.py b/ppgan/faceutils/image.py new file mode 100644 index 0000000000000000000000000000000000000000..aed144f2746a7400ba3e4c540490fba5d148140b --- /dev/null +++ b/ppgan/faceutils/image.py @@ -0,0 +1,14 @@ +import numpy as np +import cv2 +from io import BytesIO + + +def resize_by_max(image, max_side=512, force=False): + h, w = image.shape[:2] + if max(h, w) < max_side and not force: + return image + ratio = max(h, w) / max_side + + w = int(w / ratio + 0.5) + h = int(h / ratio + 0.5) + return cv2.resize(image, (w, h)) diff --git a/ppgan/faceutils/mask/__init__.py b/ppgan/faceutils/mask/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..15c2e99dbf311d622e7e64d21954dfbed0e7369a --- /dev/null +++ b/ppgan/faceutils/mask/__init__.py @@ -0,0 +1,15 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .face_parser import FaceParser diff --git a/ppgan/faceutils/mask/face_parser.py b/ppgan/faceutils/mask/face_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..94e58e9f8791a3159b511b3b1b51f660ff5e0697 --- /dev/null +++ b/ppgan/faceutils/mask/face_parser.py @@ -0,0 +1,71 @@ +import os.path as osp + +import numpy as np +import cv2 +from PIL import Image +import paddle +import paddle.vision.transforms as T +import pickle +from .model import BiSeNet + + +class FaceParser: + def __init__(self, device="cpu"): + self.mapper = { + 0: 0, + 1: 1, + 2: 2, + 3: 3, + 4: 4, + 5: 5, + 6: 0, + 7: 11, + 8: 12, + 9: 0, + 10: 6, + 11: 8, + 12: 7, + 13: 9, + 14: 13, + 15: 0, + 16: 0, + 17: 10, + 18: 0 + } + #self.dict = paddle.to_tensor(mapper) + self.save_pth = osp.split( + osp.realpath(__file__))[0] + '/resnet.pdparams' + + self.net = BiSeNet(n_classes=19) + + self.transforms = T.Compose([ + T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ]) + + def parse(self, image): + assert image.shape[:2] == (512, 512) + image = image / 255.0 + image = image.transpose((2, 0, 1)) + image = self.transforms(image) + + state_dict, _ = paddle.load(self.save_pth) + self.net.set_dict(state_dict) + self.net.eval() + + with paddle.no_grad(): + image = paddle.to_tensor(image) + image = image.unsqueeze(0) + out = self.net(image)[0] + parsing = out.squeeze(0).argmax(0) #argmax(0).astype('float32') + + #parsing = paddle.nn.functional.embedding(x=self.dict, weight=parsing) + + parse_np = parsing.numpy() + h, w = parse_np.shape + result = np.zeros((h, w)) + for i in range(h): + for j in range(w): + result[i][j] = self.mapper[parse_np[i][j]] + + result = paddle.to_tensor(result).astype('float32') + return result diff --git a/ppgan/faceutils/mask/model.py b/ppgan/faceutils/mask/model.py new file mode 100644 index 0000000000000000000000000000000000000000..d74c9ccf69422523779da67a6a40e288f24f6f35 --- /dev/null +++ b/ppgan/faceutils/mask/model.py @@ -0,0 +1,192 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle import nn +import paddle.nn.functional as F + +from paddle.utils.download import get_weights_path_from_url +import numpy as np + +from .resnet import resnet18 + + +class ConvBNReLU(paddle.nn.Layer): + def __init__(self, + in_chan, + out_chan, + ks=3, + stride=1, + padding=1, + *args, + **kwargs): + super(ConvBNReLU, self).__init__() + self.conv = nn.Conv2d(in_chan, + out_chan, + kernel_size=ks, + stride=stride, + padding=padding, + bias_attr=False) + self.bn = nn.BatchNorm2d(out_chan) + self.relu = nn.ReLU() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class BiSeNetOutput(paddle.nn.Layer): + def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs): + super(BiSeNetOutput, self).__init__() + self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1) + self.conv_out = nn.Conv2d(mid_chan, + n_classes, + kernel_size=1, + bias_attr=False) + + def forward(self, x): + x = self.conv(x) + x = self.conv_out(x) + return x + + +class AttentionRefinementModule(paddle.nn.Layer): + def __init__(self, in_chan, out_chan, *args, **kwargs): + super(AttentionRefinementModule, self).__init__() + self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1) + self.conv_atten = nn.Conv2d(out_chan, + out_chan, + kernel_size=1, + bias_attr=False) + self.bn_atten = nn.BatchNorm(out_chan) + self.sigmoid_atten = nn.Sigmoid() + + def forward(self, x): + feat = self.conv(x) + atten = F.avg_pool2d(feat, feat.shape[2:]) + atten = self.conv_atten(atten) + atten = self.bn_atten(atten) + atten = self.sigmoid_atten(atten) + out = feat * atten + return out + + +class ContextPath(paddle.nn.Layer): + def __init__(self, *args, **kwargs): + super(ContextPath, self).__init__() + self.resnet = resnet18() + self.arm16 = AttentionRefinementModule(256, 128) + self.arm32 = AttentionRefinementModule(512, 128) + self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1) + self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1) + self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0) + + def forward(self, x): + H0, W0 = x.shape[2:] + feat8, feat16, feat32 = self.resnet(x) + H8, W8 = feat8.shape[2:] + H16, W16 = feat16.shape[2:] + H32, W32 = feat32.shape[2:] + + avg = F.avg_pool2d(feat32, feat32.shape[2:]) + avg = self.conv_avg(avg) + avg_up = F.interpolate(avg, size=(H32, W32), mode='nearest') + + feat32_arm = self.arm32(feat32) + feat32_sum = feat32_arm + avg_up + feat32_up = F.interpolate(feat32_sum, size=(H16, W16), mode='nearest') + feat32_up = self.conv_head32(feat32_up) + + feat16_arm = self.arm16(feat16) + feat16_sum = feat16_arm + feat32_up + feat16_up = F.interpolate(feat16_sum, size=(H8, W8), mode='nearest') + feat16_up = self.conv_head16(feat16_up) + + return feat8, feat16_up, feat32_up # x8, x8, x16 + + +class SpatialPath(paddle.nn.Layer): + def __init__(self, *args, **kwargs): + super(SpatialPath, self).__init__() + self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3) + self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1) + self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1) + self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0) + + def forward(self, x): + feat = self.conv1(x) + feat = self.conv2(feat) + feat = self.conv3(feat) + feat = self.conv_out(feat) + return feat + + +class FeatureFusionModule(paddle.nn.Layer): + def __init__(self, in_chan, out_chan, *args, **kwargs): + super(FeatureFusionModule, self).__init__() + self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0) + self.conv1 = nn.Conv2d(out_chan, + out_chan // 4, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + self.conv2 = nn.Conv2d(out_chan // 4, + out_chan, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + self.relu = nn.ReLU() + self.sigmoid = nn.Sigmoid() + + def forward(self, fsp, fcp): + fcat = paddle.concat([fsp, fcp], axis=1) + feat = self.convblk(fcat) + atten = F.avg_pool2d(feat, feat.shape[2:]) + atten = self.conv1(atten) + atten = self.relu(atten) + atten = self.conv2(atten) + atten = self.sigmoid(atten) + feat_atten = feat * atten + feat_out = feat_atten + feat + return feat_out + + +class BiSeNet(paddle.nn.Layer): + def __init__(self, n_classes, *args, **kwargs): + super(BiSeNet, self).__init__() + self.cp = ContextPath() + self.ffm = FeatureFusionModule(256, 256) + self.conv_out = BiSeNetOutput(256, 256, n_classes) + self.conv_out16 = BiSeNetOutput(128, 64, n_classes) + self.conv_out32 = BiSeNetOutput(128, 64, n_classes) + + def forward(self, x): + H, W = x.shape[2:] + feat_res8, feat_cp8, feat_cp16 = self.cp( + x) # here return res3b1 feature + feat_sp = feat_res8 # use res3b1 feature to replace spatial path feature + feat_fuse = self.ffm(feat_sp, feat_cp8) + + feat_out = self.conv_out(feat_fuse) + feat_out16 = self.conv_out16(feat_cp8) + feat_out32 = self.conv_out32(feat_cp16) + + feat_out = F.interpolate(feat_out, size=(H, W)) + feat_out16 = F.interpolate(feat_out16, size=(H, W)) + feat_out32 = F.interpolate(feat_out32, size=(H, W)) + return feat_out, feat_out16, feat_out32 diff --git a/ppgan/faceutils/mask/resnet.py b/ppgan/faceutils/mask/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..5d181bacca7375d0be9b983bc20ac85a622eea34 --- /dev/null +++ b/ppgan/faceutils/mask/resnet.py @@ -0,0 +1,119 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import paddle +from paddle import nn +import paddle.nn.functional as F + +from paddle.utils.download import get_weights_path_from_url +import numpy as np +import math + +model_urls = { + 'resnet18': ('https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams', + '0ba53eea9bc970962d0ef96f7b94057e'), +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + bias_attr=False) + + +class BasicBlock(paddle.nn.Layer): + def __init__(self, in_chan, out_chan, stride=1): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(in_chan, out_chan, stride) + self.bn1 = nn.BatchNorm(out_chan) + self.conv2 = conv3x3(out_chan, out_chan) + self.bn2 = nn.BatchNorm(out_chan) + self.relu = nn.ReLU() + self.downsample = None + if in_chan != out_chan or stride != 1: + self.downsample = nn.Sequential( + nn.Conv2d(in_chan, + out_chan, + kernel_size=1, + stride=stride, + bias_attr=False), + nn.BatchNorm(out_chan), + ) + + def forward(self, x): + residual = self.conv1(x) + residual = self.relu(self.bn1(residual)) + residual = self.conv2(residual) + residual = self.bn2(residual) + + shortcut = x + if self.downsample is not None: + shortcut = self.downsample(x) + + out = shortcut + residual + out = self.relu(out) + return out + + +def create_layer_basic(in_chan, out_chan, bnum, stride=1): + layers = [BasicBlock(in_chan, out_chan, stride=stride)] + for i in range(bnum - 1): + layers.append(BasicBlock(out_chan, out_chan, stride=1)) + return nn.Sequential(*layers) + + +class Resnet18(paddle.nn.Layer): + def __init__(self): + super(Resnet18, self).__init__() + self.conv1 = nn.Conv2d(3, + 64, + kernel_size=7, + stride=2, + padding=3, + bias_attr=False) + self.bn1 = nn.BatchNorm(64) + self.relu = nn.ReLU() + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1) + self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2) + self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2) + self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2) + + def forward(self, x): + x = self.conv1(x) + x = self.relu(self.bn1(x)) + x = self.maxpool(x) + + x = self.layer1(x) + feat8 = self.layer2(x) # 1/8 + feat16 = self.layer3(feat8) # 1/16 + feat32 = self.layer4(feat16) # 1/32 + return feat8, feat16, feat32 + + +def resnet18(pretrained=False, **kwargs): + model = Resnet18() + arch = 'resnet18' + if pretrained: + weight_path = './resnet.pdparams' + param, _ = paddle.load(weight_path) + model.set_dict(param) + + return model diff --git a/ppgan/models/__init__.py b/ppgan/models/__init__.py index 1fb4e96098b6ea230c029c8c0f0ff7ad2eb5b139..b15ff66970ee418abff43ca15dea7729536be252 100644 --- a/ppgan/models/__init__.py +++ b/ppgan/models/__init__.py @@ -1,6 +1,21 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .base_model import BaseModel from .cycle_gan_model import CycleGANModel from .pix2pix_model import Pix2PixModel from .srgan_model import SRGANModel from .sr_model import SRModel - +from .makeup_model import MakeupModel +from .vgg import vgg16 diff --git a/ppgan/models/base_model.py b/ppgan/models/base_model.py index bfff7d224fc734285a852f12f26f7dc6451058b6..144afe05227964573dd0ed4da8469b8d71d4761a 100644 --- a/ppgan/models/base_model.py +++ b/ppgan/models/base_model.py @@ -26,7 +26,7 @@ class BaseModel(ABC): When creating your custom class, you need to implement your own initialization. In this function, you should first call Then, you need to define four lists: - -- self.loss_names (str list): specify the training losses that you want to plot and save. + -- self.losses (str list): specify the training losses that you want to plot and save. -- self.model_names (str list): define networks used in our training. -- self.visual_names (str list): specify the images that you want to display and save. -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an example. @@ -37,7 +37,7 @@ class BaseModel(ABC): opt.output_dir, opt.model.name) # save all the checkpoints to save_dir - self.loss_names = [] + self.losses = OrderedDict() self.model_names = [] self.visual_names = [] self.optimizers = [] @@ -115,13 +115,7 @@ class BaseModel(ABC): def get_current_losses(self): """Return traning losses / errors. train.py will print out these errors on console, and save them to a file""" - errors_ret = OrderedDict() - for name in self.loss_names: - if isinstance(name, str): - errors_ret[name] = float( - getattr(self, 'loss_' + name) - ) # float(...) works for both scalar tensor and float number - return errors_ret + return self.losses def set_requires_grad(self, nets, requires_grad=False): """Set requies_grad=Fasle for all the networks to avoid unnecessary computations diff --git a/ppgan/models/builder.py b/ppgan/models/builder.py index bd2ed58f096679a00549d53b82e9e603e7208433..607f4e915f43eb85b0032981e70081ba03cb2a8c 100644 --- a/ppgan/models/builder.py +++ b/ppgan/models/builder.py @@ -2,18 +2,9 @@ import paddle from ..utils.registry import Registry - MODELS = Registry("MODEL") def build_model(cfg): - # dataset = MODELS.get(cfg.MODEL.name)(cfg.MODEL) - # place = paddle.CUDAPlace(0) - # dataloader = paddle.io.DataLoader(dataset, - # batch_size=1, #opt.batch_size, - # places=place, - # shuffle=True, #not opt.serial_batches, - # num_workers=0)#int(opt.num_threads)) model = MODELS.get(cfg.model.name)(cfg) return model - # pass \ No newline at end of file diff --git a/ppgan/models/cycle_gan_model.py b/ppgan/models/cycle_gan_model.py index 65247a74e759c91c9ed9ae03b53e13d792b8f63c..6246ee1fd5f149dde2d05be84f0c917d973d9f33 100644 --- a/ppgan/models/cycle_gan_model.py +++ b/ppgan/models/cycle_gan_model.py @@ -31,10 +31,6 @@ class CycleGANModel(BaseModel): opt (config)-- stores all the experiment flags; needs to be a subclass of Dict """ BaseModel.__init__(self, opt) - # specify the training losses you want to print out. The training/test scripts will call - self.loss_names = [ - 'D_A', 'G_A', 'cycle_A', 'idt_A', 'D_B', 'G_B', 'cycle_B', 'idt_B' - ] # specify the images you want to save/display. The training/test scripts will call visual_names_A = ['real_A', 'fake_B', 'rec_A'] visual_names_B = ['real_B', 'fake_A', 'rec_B'] @@ -165,11 +161,13 @@ class CycleGANModel(BaseModel): """Calculate GAN loss for discriminator D_A""" fake_B = self.fake_B_pool.query(self.fake_B) self.loss_D_A = self.backward_D_basic(self.netD_A, self.real_B, fake_B) + self.losses['D_A_loss'] = self.loss_D_A def backward_D_B(self): """Calculate GAN loss for discriminator D_B""" fake_A = self.fake_A_pool.query(self.fake_A) self.loss_D_B = self.backward_D_basic(self.netD_B, self.real_A, fake_A) + self.losses['D_B_loss'] = self.loss_D_B def backward_G(self): """Calculate the loss for generators G_A and G_B""" @@ -200,6 +198,13 @@ class CycleGANModel(BaseModel): # Backward cycle loss || G_A(G_B(B)) - B|| self.loss_cycle_B = self.criterionCycle(self.rec_B, self.real_B) * lambda_B + + self.losses['G_idt_A_loss'] = self.loss_idt_A + self.losses['G_idt_B_loss'] = self.loss_idt_B + self.losses['G_A_adv_loss'] = self.loss_G_A + self.losses['G_B_adv_loss'] = self.loss_G_B + self.losses['G_A_cycle_loss'] = self.loss_cycle_A + self.losses['G_B_cycle_loss'] = self.loss_cycle_B # combined loss and calculate gradients self.loss_G = self.loss_G_A + self.loss_G_B + self.loss_cycle_A + self.loss_cycle_B + self.loss_idt_A + self.loss_idt_B diff --git a/ppgan/models/discriminators/nlayers.py b/ppgan/models/discriminators/nlayers.py index 9a718115e4c41f491ef92900e76c50d15237f4c4..ffad8b9e2b0c040447c42e4a108a1fb95c7c0be8 100644 --- a/ppgan/models/discriminators/nlayers.py +++ b/ppgan/models/discriminators/nlayers.py @@ -1,8 +1,24 @@ -import functools -import numpy as np +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import paddle +import functools +import numpy as np import paddle.nn as nn +import paddle.nn.functional as F + +from ...modules.nn import Spectralnorm from ...modules.norm import build_norm_layer from .builder import DISCRIMINATORS @@ -14,7 +30,7 @@ class NLayerDiscriminator(nn.Layer): def __init__(self, input_nc, ndf=64, n_layers=3, norm_type='instance'): """Construct a PatchGAN discriminator - Args: + Parameters: input_nc (int) -- the number of channels in input images ndf (int) -- the number of filters in the last conv layer n_layers (int) -- the number of conv layers in the discriminator @@ -22,49 +38,107 @@ class NLayerDiscriminator(nn.Layer): """ super(NLayerDiscriminator, self).__init__() norm_layer = build_norm_layer(norm_type) - if type(norm_layer) == functools.partial: - use_bias = norm_layer.func == nn.InstanceNorm + if type( + norm_layer + ) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters + use_bias = norm_layer.func == nn.InstanceNorm2d else: - use_bias = norm_layer == nn.InstanceNorm + use_bias = norm_layer == nn.InstanceNorm2d kw = 4 padw = 1 - sequence = [ - nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), - nn.LeakyReLU(0.2) - ] + + if norm_type == 'spectral': + sequence = [ + Spectralnorm( + nn.Conv2d(input_nc, + ndf, + kernel_size=kw, + stride=2, + padding=padw)), + nn.LeakyReLU(0.01) + ] + else: + sequence = [ + nn.Conv2d(input_nc, + ndf, + kernel_size=kw, + stride=2, + padding=padw, + bias_attr=use_bias), + nn.LeakyReLU(0.2) + ] nf_mult = 1 nf_mult_prev = 1 - for n in range(1, n_layers): + for n in range(1, n_layers): # gradually increase the number of filters nf_mult_prev = nf_mult nf_mult = min(2**n, 8) + if norm_type == 'spectral': + sequence += [ + Spectralnorm( + nn.Conv2d(ndf * nf_mult_prev, + ndf * nf_mult, + kernel_size=kw, + stride=2, + padding=padw)), + nn.LeakyReLU(0.01) + ] + else: + sequence += [ + nn.Conv2d(ndf * nf_mult_prev, + ndf * nf_mult, + kernel_size=kw, + stride=2, + padding=padw, + bias_attr=use_bias), + norm_layer(ndf * nf_mult), + nn.LeakyReLU(0.2) + ] + + nf_mult_prev = nf_mult + nf_mult = min(2**n_layers, 8) + if norm_type == 'spectral': + sequence += [ + Spectralnorm( + nn.Conv2d(ndf * nf_mult_prev, + ndf * nf_mult, + kernel_size=kw, + stride=1, + padding=padw)), + nn.LeakyReLU(0.01) + ] + else: sequence += [ nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, - stride=2, + stride=1, padding=padw, bias_attr=use_bias), norm_layer(ndf * nf_mult), nn.LeakyReLU(0.2) ] - nf_mult_prev = nf_mult - nf_mult = min(2**n_layers, 8) - sequence += [ - nn.Conv2d(ndf * nf_mult_prev, - ndf * nf_mult, - kernel_size=kw, - stride=1, - padding=padw, - bias_attr=use_bias), - norm_layer(ndf * nf_mult), - nn.LeakyReLU(0.2) - ] + if norm_type == 'spectral': + sequence += [ + Spectralnorm( + nn.Conv2d(ndf * nf_mult, + 1, + kernel_size=kw, + stride=1, + padding=padw, + bias_attr=False)) + ] # output 1 channel prediction map + else: + sequence += [ + nn.Conv2d(ndf * nf_mult, + 1, + kernel_size=kw, + stride=1, + padding=padw, + bias_attr=False) + ] # output 1 channel prediction map - sequence += [ - nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw) - ] self.model = nn.Sequential(*sequence) def forward(self, input): diff --git a/ppgan/models/generators/__init__.py b/ppgan/models/generators/__init__.py index 15ac59d156f852e10fa4263fb4fd5b1fe9f7a976..4429d9891e133db9239545ce8916b9ec6a508f5f 100644 --- a/ppgan/models/generators/__init__.py +++ b/ppgan/models/generators/__init__.py @@ -1,3 +1,18 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .resnet import ResnetGenerator from .unet import UnetGenerator -from .rrdb_net import RRDBNet \ No newline at end of file +from .rrdb_net import RRDBNet +from .makeup import GeneratorPSGANAttention diff --git a/ppgan/models/generators/makeup.py b/ppgan/models/generators/makeup.py new file mode 100644 index 0000000000000000000000000000000000000000..82c3057469d01b9f49afe12e126edb5835e4adb8 --- /dev/null +++ b/ppgan/models/generators/makeup.py @@ -0,0 +1,351 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +import functools +import numpy as np + +from ...modules.norm import build_norm_layer + +from .builder import GENERATORS + + +class PONO(paddle.nn.Layer): + def __init__(self, eps=1e-5): + super(PONO, self).__init__() + self.eps = eps + + def forward(self, x): + mean = paddle.mean(x, axis=1, keepdim=True) + var = paddle.mean(paddle.square(x - mean), axis=1, keepdim=True) + tmp = (x - mean) / paddle.sqrt(var + self.eps) + + return tmp + + +class ResidualBlock(paddle.nn.Layer): + """Residual Block with instance normalization.""" + def __init__(self, dim_in, dim_out, mode=None): + super(ResidualBlock, self).__init__() + if mode == 't': + weight_attr = False + bias_attr = False + elif mode == 'p' or (mode is None): + weight_attr = None + bias_attr = None + + self.main = nn.Sequential( + nn.Conv2d(dim_in, + dim_out, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False), + nn.InstanceNorm2d(dim_out, + weight_attr=weight_attr, + bias_attr=bias_attr), nn.ReLU(), + nn.Conv2d(dim_out, + dim_out, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False), + nn.InstanceNorm2d(dim_out, + weight_attr=weight_attr, + bias_attr=bias_attr)) + + def forward(self, x): + """forward""" + return x + self.main(x) + + +class StyleResidualBlock(paddle.nn.Layer): + """Residual Block with instance normalization.""" + def __init__(self, dim_in, dim_out): + super(StyleResidualBlock, self).__init__() + self.block1 = nn.Sequential( + nn.Conv2d(dim_in, + dim_out, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False), PONO()) + ks = 3 + pw = ks // 2 + self.beta1 = nn.Conv2d(dim_in, dim_out, kernel_size=ks, padding=pw) + self.gamma1 = nn.Conv2d(dim_in, dim_out, kernel_size=ks, padding=pw) + self.block2 = nn.Sequential( + nn.ReLU(), + nn.Conv2d(dim_out, + dim_out, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False), PONO()) + self.beta2 = nn.Conv2d(dim_in, dim_out, kernel_size=ks, padding=pw) + self.gamma2 = nn.Conv2d(dim_in, dim_out, kernel_size=ks, padding=pw) + + def forward(self, x, y): + """forward""" + x_ = self.block1(x) + b = self.beta1(y) + g = self.gamma1(y) + x_ = (g + 1) * x_ + b + x_ = self.block2(x_) + b = self.beta2(y) + g = self.gamma2(y) + x_ = (g + 1) * x_ + b + return x + x_ + + +class MDNet(paddle.nn.Layer): + """MDNet in PSGAN""" + def __init__(self, conv_dim=64, repeat_num=3): + super(MDNet, self).__init__() + + layers = [] + layers.append( + nn.Conv2d(3, + conv_dim, + kernel_size=7, + stride=1, + padding=3, + bias_attr=False)) + layers.append( + nn.InstanceNorm2d(conv_dim, weight_attr=None, bias_attr=None)) + + layers.append(nn.ReLU()) + + # Down-Sampling + curr_dim = conv_dim + for i in range(2): + layers.append( + nn.Conv2d(curr_dim, + curr_dim * 2, + kernel_size=4, + stride=2, + padding=1, + bias_attr=False)) + layers.append( + nn.InstanceNorm2d(curr_dim * 2, + weight_attr=None, + bias_attr=None)) + layers.append(nn.ReLU()) + curr_dim = curr_dim * 2 + + # Bottleneck + for i in range(repeat_num): + layers.append(ResidualBlock(dim_in=curr_dim, dim_out=curr_dim)) + + self.main = nn.Sequential(*layers) + + def forward(self, x): + """forward""" + out = self.main(x) + return out + + +class TNetDown(paddle.nn.Layer): + """MDNet in PSGAN""" + def __init__(self, conv_dim=64, repeat_num=3): + super(TNetDown, self).__init__() + + layers = [] + layers.append( + nn.Conv2d(3, + conv_dim, + kernel_size=7, + stride=1, + padding=3, + bias_attr=False)) + layers.append( + nn.InstanceNorm2d(conv_dim, weight_attr=False, bias_attr=False)) + + layers.append(nn.ReLU()) + + # Down-Sampling + curr_dim = conv_dim + for i in range(2): + layers.append( + nn.Conv2d(curr_dim, + curr_dim * 2, + kernel_size=4, + stride=2, + padding=1, + bias_attr=False)) + layers.append( + nn.InstanceNorm2d(curr_dim * 2, + weight_attr=False, + bias_attr=False)) + layers.append(nn.ReLU()) + curr_dim = curr_dim * 2 + + # Bottleneck + for i in range(repeat_num): + layers.append( + ResidualBlock(dim_in=curr_dim, dim_out=curr_dim, mode='t')) + + self.main = nn.Sequential(*layers) + + def forward(self, x): + """forward""" + out = self.main(x) + return out + + +class GetMatrix(paddle.fluid.dygraph.Layer): + def __init__(self, dim_in, dim_out): + super(GetMatrix, self).__init__() + self.get_gamma = nn.Conv2d(dim_in, + dim_out, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + self.get_beta = nn.Conv2d(dim_in, + dim_out, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + + def forward(self, x): + gamma = self.get_gamma(x) + beta = self.get_beta(x) + return gamma, beta + + +class MANet(paddle.nn.Layer): + """MANet in PSGAN""" + def __init__(self, conv_dim=64, repeat_num=3, w=0.01): + super(MANet, self).__init__() + self.encoder = TNetDown(conv_dim=conv_dim, repeat_num=repeat_num) + curr_dim = conv_dim * 4 + self.w = w + self.beta = nn.Conv2d(curr_dim, curr_dim, kernel_size=3, padding=1) + self.gamma = nn.Conv2d(curr_dim, curr_dim, kernel_size=3, padding=1) + self.simple_spade = GetMatrix(curr_dim, 1) # get the makeup matrix + self.repeat_num = repeat_num + for i in range(repeat_num): + setattr(self, "bottlenecks_" + str(i), + ResidualBlock(dim_in=curr_dim, dim_out=curr_dim, mode='t')) + # Up-Sampling + self.upsamplers = [] + self.up_betas = [] + self.up_gammas = [] + self.up_acts = [] + y_dim = curr_dim + for i in range(2): + layers = [] + layers.append( + nn.ConvTranspose2d(curr_dim, + curr_dim // 2, + kernel_size=4, + stride=2, + padding=1, + bias_attr=False)) + layers.append( + nn.InstanceNorm2d(curr_dim // 2, + weight_attr=False, + bias_attr=False)) + + setattr(self, "up_acts_" + str(i), nn.ReLU()) + setattr( + self, "up_betas_" + str(i), + nn.ConvTranspose2d(y_dim, + curr_dim // 2, + kernel_size=4, + stride=2, + padding=1)) + setattr( + self, "up_gammas_" + str(i), + nn.ConvTranspose2d(y_dim, + curr_dim // 2, + kernel_size=4, + stride=2, + padding=1)) + setattr(self, "up_samplers_" + str(i), nn.Sequential(*layers)) + curr_dim = curr_dim // 2 + self.img_reg = [ + nn.Conv2d(curr_dim, + 3, + kernel_size=7, + stride=1, + padding=3, + bias_attr=False) + ] + self.img_reg = nn.Sequential(*self.img_reg) + + def forward(self, x, y, x_p, y_p, consistency_mask, mask_x, mask_y): + """forward""" + # y -> ref feature + # x -> src img + x = self.encoder(x) + _, c, h, w = x.shape + x_flat = x.reshape([-1, c, h * w]) + x_flat = self.w * x_flat + if x_p is not None: + x_flat = paddle.concat([x_flat, x_p], axis=1) + + _, c2, h2, w2 = y.shape + y_flat = y.reshape([-1, c2, h2 * w2]) + y_flat = self.w * y_flat + if y_p is not None: + y_flat = paddle.concat([y_flat, y_p], axis=1) + a_ = paddle.matmul(x_flat, y_flat, transpose_x=True) * 200.0 + + # mask softmax + if consistency_mask is not None: + a_ = a_ - 100.0 * (1 - consistency_mask) + a = F.softmax(a_, axis=-1) + + gamma, beta = self.simple_spade(y) + + beta = beta.reshape([-1, h2 * w2, 1]) + beta = paddle.matmul(a, beta) + beta = beta.reshape([-1, 1, h2, w2]) + gamma = gamma.reshape([-1, h2 * w2, 1]) + gamma = paddle.matmul(a, gamma) + gamma = gamma.reshape([-1, 1, h2, w2]) + x = x * (1 + gamma) + beta + + for i in range(self.repeat_num): + layer = getattr(self, "bottlenecks_" + str(i)) + x = layer(x) + + for idx in range(2): + layer = getattr(self, "up_samplers_" + str(idx)) + x = layer(x) + layer = getattr(self, "up_acts_" + str(idx)) + x = layer(x) + x = self.img_reg(x) + x = paddle.tanh(x) + return x, a + + +@GENERATORS.register() +class GeneratorPSGANAttention(paddle.nn.Layer): + def __init__(self, conv_dim=64, repeat_num=3): + super(GeneratorPSGANAttention, self).__init__() + self.ma_net = MANet(conv_dim=conv_dim, repeat_num=repeat_num) + self.md_net = MDNet(conv_dim=conv_dim, repeat_num=repeat_num) + + def forward(self, x, y, x_p, y_p, consistency_mask, mask_x, mask_y): + """forward""" + y = self.md_net(y) + out, a = self.ma_net(x, y, x_p, y_p, consistency_mask, mask_x, mask_y) + return out, a diff --git a/ppgan/models/losses.py b/ppgan/models/losses.py index 28c77ca4c49ab5c0aae718830868ecc396236cb8..baa67fdadb02ee72f82984867002d50335216477 100644 --- a/ppgan/models/losses.py +++ b/ppgan/models/losses.py @@ -45,19 +45,18 @@ class GANLoss(nn.Layer): Returns: A label tensor filled with ground truth label, and with the size of the input """ - if target_is_real: if not hasattr(self, 'target_real_tensor'): - self.target_real_tensor = paddle.fill_constant( + self.target_real_tensor = paddle.full( shape=paddle.shape(prediction), - value=self.target_real_label, + fill_value=self.target_real_label, dtype='float32') target_tensor = self.target_real_tensor else: if not hasattr(self, 'target_fake_tensor'): - self.target_fake_tensor = paddle.fill_constant( + self.target_fake_tensor = paddle.full( shape=paddle.shape(prediction), - value=self.target_fake_label, + fill_value=self.target_fake_label, dtype='float32') target_tensor = self.target_fake_tensor diff --git a/ppgan/models/makeup_model.py b/ppgan/models/makeup_model.py new file mode 100644 index 0000000000000000000000000000000000000000..b0f60cd7524005d62c46e17b4b4458a14bdbf77d --- /dev/null +++ b/ppgan/models/makeup_model.py @@ -0,0 +1,407 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from .base_model import BaseModel + +from .builder import MODELS +from .generators.builder import build_generator +from .discriminators.builder import build_discriminator +from .losses import GANLoss +from ..modules.init import init_weights +from ..solver import build_optimizer +from ..utils.image_pool import ImagePool +from ..utils.preprocess import * +from ..datasets.makeup_dataset import MakeupDataset +import numpy as np +from .vgg import vgg16 + + +@MODELS.register() +class MakeupModel(BaseModel): + """ + This class implements the CycleGAN model, for learning image-to-image translation without paired data. + + The model training requires '--dataset_mode unaligned' dataset. + By default, it uses a '--netG resnet_9blocks' ResNet generator, + a '--netD basic' discriminator (PatchGAN introduced by pix2pix), + and a least-square GANs objective ('--gan_mode lsgan'). + + CycleGAN paper: https://arxiv.org/pdf/1703.10593.pdf + """ + def __init__(self, opt): + """Initialize the CycleGAN class. + + Parameters: + opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions + """ + BaseModel.__init__(self, opt) + # specify the training losses you want to print out. The training/test scripts will call + # specify the images you want to save/display. The training/test scripts will call + visual_names_A = ['real_A', 'fake_A', 'rec_A'] + visual_names_B = ['real_B', 'fake_B', 'rec_B'] + if self.isTrain and self.opt.lambda_identity > 0.0: # if identity loss is used, we also visualize idt_B=G_A(B) ad idt_A=G_A(B) + visual_names_A.append('idt_B') + visual_names_B.append('idt_A') + + self.visual_names = visual_names_A + visual_names_B # combine visualizations for A and B + self.vgg = vgg16(pretrained=True) + # specify the models you want to save to the disk. The training/test scripts will call and . + if self.isTrain: + self.model_names = ['G', 'D_A', 'D_B'] + else: # during test time, only load Gs + self.model_names = ['G'] + + # define networks (both Generators and discriminators) + # The naming is different from those used in the paper. + # Code (vs. paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X) + self.netG = build_generator(opt.model.generator) + init_weights(self.netG, init_type='xavier', init_gain=1.0) + + if self.isTrain: # define discriminators + self.netD_A = build_discriminator(opt.model.discriminator) + self.netD_B = build_discriminator(opt.model.discriminator) + init_weights(self.netD_A, init_type='xavier', init_gain=1.0) + init_weights(self.netD_B, init_type='xavier', init_gain=1.0) + + if self.isTrain: + self.fake_A_pool = ImagePool( + opt.dataset.train.pool_size + ) # create image buffer to store previously generated images + self.fake_B_pool = ImagePool( + opt.dataset.train.pool_size + ) # create image buffer to store previously generated images + # define loss functions + self.criterionGAN = GANLoss( + opt.model.gan_mode) #.to(self.device) # define GAN loss. + self.criterionCycle = paddle.nn.L1Loss() + self.criterionIdt = paddle.nn.L1Loss() + self.criterionL1 = paddle.nn.L1Loss() + self.criterionL2 = paddle.nn.MSELoss() + + self.build_lr_scheduler() + self.optimizer_G = build_optimizer( + opt.optimizer, + self.lr_scheduler, + parameter_list=self.netG.parameters()) + # self.optimizer_D = paddle.optimizer.Adam(learning_rate=lr_scheduler_d, parameter_list=self.netD_A.parameters() + self.netD_B.parameters(), beta1=opt.beta1) + self.optimizer_DA = build_optimizer( + opt.optimizer, + self.lr_scheduler, + parameter_list=self.netD_A.parameters()) + self.optimizer_DB = build_optimizer( + opt.optimizer, + self.lr_scheduler, + parameter_list=self.netD_B.parameters()) + self.optimizers.append(self.optimizer_G) + # self.optimizers.append(self.optimizer_D) + self.optimizers.append(self.optimizer_DA) + self.optimizers.append(self.optimizer_DB) + self.optimizer_names.extend( + ['optimizer_G', 'optimizer_DA', 'optimizer_DB']) + + def set_input(self, input): + """Unpack input data from the dataloader and perform necessary pre-processing steps. + + Parameters: + input (dict): include the data itself and its metadata information. + + The option 'direction' can be used to swap domain A and domain B. + """ + self.real_A = paddle.to_tensor(input['image_A']) + self.real_B = paddle.to_tensor(input['image_B']) + self.c_m = paddle.to_tensor(input['consis_mask']) + self.P_A = paddle.to_tensor(input['P_A']) + self.P_B = paddle.to_tensor(input['P_B']) + self.mask_A_aug = paddle.to_tensor(input['mask_A_aug']) + self.mask_B_aug = paddle.to_tensor(input['mask_B_aug']) + self.c_m_t = paddle.transpose(self.c_m, perm=[0, 2, 1]) + if self.isTrain: + self.mask_A = paddle.to_tensor(input['mask_A']) + self.mask_B = paddle.to_tensor(input['mask_B']) + self.c_m_idt_a = paddle.to_tensor(input['consis_mask_idt_A']) + self.c_m_idt_b = paddle.to_tensor(input['consis_mask_idt_B']) + + #self.hm_gt_A = self.hm_gt_A_lip + self.hm_gt_A_skin + self.hm_gt_A_eye + #self.hm_gt_B = self.hm_gt_B_lip + self.hm_gt_B_skin + self.hm_gt_B_eye + + def forward(self): + """Run forward pass; called by both functions and .""" + self.fake_A, amm = self.netG(self.real_A, self.real_B, self.P_A, + self.P_B, self.c_m, self.mask_A_aug, + self.mask_B_aug) # G_A(A) + self.fake_B, _ = self.netG(self.real_B, self.real_A, self.P_B, self.P_A, + self.c_m_t, self.mask_A_aug, + self.mask_B_aug) # G_A(A) + self.rec_A, _ = self.netG(self.fake_A, self.real_A, self.P_A, self.P_A, + self.c_m_idt_a, self.mask_A_aug, + self.mask_B_aug) # G_A(A) + self.rec_B, _ = self.netG(self.fake_B, self.real_B, self.P_B, self.P_B, + self.c_m_idt_b, self.mask_A_aug, + self.mask_B_aug) # G_A(A) + + def forward_test(self, input): + ''' + not implement now + ''' + return self.netG(input['image_A'], input['image_B'], input['P_A'], + input['P_B'], input['consis_mask'], + input['mask_A_aug'], input['mask_B_aug']) + + def test(self, input): + """Forward function used in test time. + + This function wraps function in no_grad() so we don't save intermediate steps for backprop + It also calls to produce additional visualization results + """ + with paddle.no_grad(): + return self.forward_test(input) + + def backward_D_basic(self, netD, real, fake): + """Calculate GAN loss for the discriminator + + Parameters: + netD (network) -- the discriminator D + real (tensor array) -- real images + fake (tensor array) -- images generated by a generator + + Return the discriminator loss. + We also call loss_D.backward() to calculate the gradients. + """ + # Real + pred_real = netD(real) + loss_D_real = self.criterionGAN(pred_real, True) + # Fake + pred_fake = netD(fake.detach()) + loss_D_fake = self.criterionGAN(pred_fake, False) + # Combined loss and calculate gradients + loss_D = (loss_D_real + loss_D_fake) * 0.5 + loss_D.backward() + return loss_D + + def backward_D_A(self): + """Calculate GAN loss for discriminator D_A""" + fake_B = self.fake_B_pool.query(self.fake_B) + self.loss_D_A = self.backward_D_basic(self.netD_A, self.real_B, fake_B) + self.losses['D_A_loss'] = self.loss_D_A + + def backward_D_B(self): + """Calculate GAN loss for discriminator D_B""" + fake_A = self.fake_A_pool.query(self.fake_A) + self.loss_D_B = self.backward_D_basic(self.netD_B, self.real_A, fake_A) + self.losses['D_B_loss'] = self.loss_D_B + + def backward_G(self): + """Calculate the loss for generators G_A and G_B""" + ''' + self.loss_names = [ + 'G_A_vgg', + 'G_B_vgg', + 'G_bg_consis' + ] + # specify the images you want to save/display. The training/test scripts will call + visual_names_A = ['real_A', 'fake_B', 'rec_A', 'amm_a'] + visual_names_B = ['real_B', 'fake_A', 'rec_B', 'amm_b'] + ''' + lambda_idt = self.opt.lambda_identity + lambda_A = self.opt.lambda_A + lambda_B = self.opt.lambda_B + lambda_vgg = 5e-3 + # Identity loss + if lambda_idt > 0: + self.idt_A, _ = self.netG(self.real_A, self.real_A, self.P_A, + self.P_A, self.c_m_idt_a, self.mask_A_aug, + self.mask_B_aug) # G_A(A) + self.loss_idt_A = self.criterionIdt( + self.idt_A, self.real_A) * lambda_A * lambda_idt + self.idt_B, _ = self.netG(self.real_B, self.real_B, self.P_B, + self.P_B, self.c_m_idt_b, self.mask_A_aug, + self.mask_B_aug) # G_A(A) + self.loss_idt_B = self.criterionIdt( + self.idt_B, self.real_B) * lambda_B * lambda_idt + else: + self.loss_idt_A = 0 + self.loss_idt_B = 0 + + # GAN loss D_A(G_A(A)) + self.loss_G_A = self.criterionGAN(self.netD_A(self.fake_A), True) + # GAN loss D_B(G_B(B)) + self.loss_G_B = self.criterionGAN(self.netD_B(self.fake_B), True) + # Forward cycle loss || G_B(G_A(A)) - A|| + self.loss_cycle_A = self.criterionCycle(self.rec_A, + self.real_A) * lambda_A + # Backward cycle loss || G_A(G_B(B)) - B|| + self.loss_cycle_B = self.criterionCycle(self.rec_B, + self.real_B) * lambda_B + + self.losses['G_A_adv_loss'] = self.loss_G_A + self.losses['G_B_adv_loss'] = self.loss_G_B + + mask_A_lip = self.mask_A_aug[:, 0].unsqueeze(1) + mask_B_lip = self.mask_B_aug[:, 0].unsqueeze(1) + + mask_A_lip_np = mask_A_lip.numpy().squeeze() + mask_B_lip_np = mask_B_lip.numpy().squeeze() + mask_A_lip_np, mask_B_lip_np, index_A_lip, index_B_lip = mask_preprocess( + mask_A_lip_np, mask_B_lip_np) + real_A = paddle.nn.clip((self.real_A + 1.0) / 2.0, 0.0, 1.0) * 255.0 + real_A_np = real_A.numpy().squeeze() + real_B = paddle.nn.clip((self.real_B + 1.0) / 2.0, 0.0, 1.0) * 255.0 + real_B_np = real_B.numpy().squeeze() + fake_A = paddle.nn.clip((self.fake_A + 1.0) / 2.0, 0.0, 1.0) * 255.0 + fake_A_np = fake_A.numpy().squeeze() + fake_B = paddle.nn.clip((self.fake_B + 1.0) / 2.0, 0.0, 1.0) * 255.0 + fake_B_np = fake_B.numpy().squeeze() + + fake_match_lip_A = hisMatch(fake_A_np, real_B_np, mask_A_lip_np, + mask_B_lip_np, index_A_lip) + fake_match_lip_B = hisMatch(fake_B_np, real_A_np, mask_B_lip_np, + mask_A_lip_np, index_B_lip) + fake_match_lip_A = paddle.to_tensor(fake_match_lip_A) + fake_match_lip_A.stop_gradient = True + fake_match_lip_A = fake_match_lip_A.unsqueeze(0) + fake_match_lip_B = paddle.to_tensor(fake_match_lip_B) + fake_match_lip_B.stop_gradient = True + fake_match_lip_B = fake_match_lip_B.unsqueeze(0) + fake_A_lip_masked = fake_A * mask_A_lip + fake_B_lip_masked = fake_B * mask_B_lip + g_A_lip_loss_his = self.criterionL1(fake_A_lip_masked, fake_match_lip_A) + g_B_lip_loss_his = self.criterionL1(fake_B_lip_masked, fake_match_lip_B) + + #skin + mask_A_skin = self.mask_A_aug[:, 1].unsqueeze(1) + mask_B_skin = self.mask_B_aug[:, 1].unsqueeze(1) + + mask_A_skin_np = mask_A_skin.numpy().squeeze() + mask_B_skin_np = mask_B_skin.numpy().squeeze() + mask_A_skin_np, mask_B_skin_np, index_A_skin, index_B_skin = mask_preprocess( + mask_A_skin_np, mask_B_skin_np) + + fake_match_skin_A = hisMatch(fake_A_np, real_B_np, mask_A_skin_np, + mask_B_skin_np, index_A_skin) + fake_match_skin_B = hisMatch(fake_B_np, real_A_np, mask_B_skin_np, + mask_A_skin_np, index_B_skin) + fake_match_skin_A = paddle.to_tensor(fake_match_skin_A) + fake_match_skin_A.stop_gradient = True + fake_match_skin_A = fake_match_skin_A.unsqueeze(0) + fake_match_skin_B = paddle.to_tensor(fake_match_skin_B) + fake_match_skin_B.stop_gradient = True + fake_match_skin_B = fake_match_skin_B.unsqueeze(0) + fake_A_skin_masked = fake_A * mask_A_skin + fake_B_skin_masked = fake_B * mask_B_skin + g_A_skin_loss_his = self.criterionL1(fake_A_skin_masked, + fake_match_skin_A) + g_B_skin_loss_his = self.criterionL1(fake_B_skin_masked, + fake_match_skin_B) + + #eye + mask_A_eye = self.mask_A_aug[:, 2].unsqueeze(1) + mask_B_eye = self.mask_B_aug[:, 2].unsqueeze(1) + + mask_A_eye_np = mask_A_eye.numpy().squeeze() + mask_B_eye_np = mask_B_eye.numpy().squeeze() + mask_A_eye_np, mask_B_eye_np, index_A_eye, index_B_eye = mask_preprocess( + mask_A_eye_np, mask_B_eye_np) + + fake_match_eye_A = hisMatch(fake_A_np, real_B_np, mask_A_eye_np, + mask_B_eye_np, index_A_eye) + fake_match_eye_B = hisMatch(fake_B_np, real_A_np, mask_B_eye_np, + mask_A_eye_np, index_B_eye) + fake_match_eye_A = paddle.to_tensor(fake_match_eye_A) + fake_match_eye_A.stop_gradient = True + fake_match_eye_A = fake_match_eye_A.unsqueeze(0) + fake_match_eye_B = paddle.to_tensor(fake_match_eye_B) + fake_match_eye_B.stop_gradient = True + fake_match_eye_B = fake_match_eye_B.unsqueeze(0) + fake_A_eye_masked = fake_A * mask_A_eye + fake_B_eye_masked = fake_B * mask_B_eye + g_A_eye_loss_his = self.criterionL1(fake_A_eye_masked, fake_match_eye_A) + g_B_eye_loss_his = self.criterionL1(fake_B_eye_masked, fake_match_eye_B) + + self.loss_G_A_his = (g_A_eye_loss_his + g_A_lip_loss_his + + g_A_skin_loss_his * 0.1) * 0.01 + self.loss_G_B_his = (g_B_eye_loss_his + g_B_lip_loss_his + + g_B_skin_loss_his * 0.1) * 0.01 + + self.losses['G_A_his_loss'] = self.loss_G_A_his + self.losses['G_B_his_loss'] = self.loss_G_A_his + + #vgg loss + vgg_s = self.vgg(self.real_A) + vgg_s.stop_gradient = True + vgg_fake_A = self.vgg(self.fake_A) + self.loss_A_vgg = self.criterionL2(vgg_fake_A, + vgg_s) * lambda_A * lambda_vgg + + vgg_r = self.vgg(self.real_B) + vgg_r.stop_gradient = True + vgg_fake_B = self.vgg(self.fake_B) + self.loss_B_vgg = self.criterionL2(vgg_fake_B, + vgg_r) * lambda_B * lambda_vgg + + self.loss_rec = (self.loss_cycle_A + self.loss_cycle_B + + self.loss_A_vgg + self.loss_B_vgg) * 0.2 + self.loss_idt = (self.loss_idt_A + self.loss_idt_B) * 0.2 + + self.losses['G_A_vgg_loss'] = self.loss_A_vgg + self.losses['G_B_vgg_loss'] = self.loss_B_vgg + self.losses['G_rec_loss'] = self.loss_rec + self.losses['G_idt_loss'] = self.loss_idt + + # bg consistency loss + mask_A_consis = paddle.cast( + (self.mask_A == 0), dtype='float32') + paddle.cast( + (self.mask_A == 10), dtype='float32') + paddle.cast( + (self.mask_A == 8), dtype='float32') + mask_A_consis = paddle.unsqueeze(paddle.clip(mask_A_consis, 0, 1), 1) + self.loss_G_bg_consis = self.criterionL1( + self.real_A * mask_A_consis, self.fake_A * mask_A_consis) * 0.1 + + # combined loss and calculate gradients + + self.loss_G = self.loss_G_A + self.loss_G_B + self.loss_rec + self.loss_idt + self.loss_G_A_his + self.loss_G_B_his + self.loss_G_bg_consis + self.loss_G.backward() + + def optimize_parameters(self): + """Calculate losses, gradients, and update network weights; called in every training iteration""" + # forward + self.forward() # compute fake images and reconstruction images. + # G_A and G_B + self.set_requires_grad( + [self.netD_A, self.netD_B], + False) # Ds require no gradients when optimizing Gs + # self.optimizer_G.clear_gradients() #zero_grad() # set G_A and G_B's gradients to zero + self.backward_G() # calculate gradients for G_A and G_B + self.optimizer_G.minimize( + self.loss_G) #step() # update G_A and G_B's weights + self.optimizer_G.clear_gradients() + # self.optimizer_G.clear_gradients() + # D_A and D_B + # self.set_requires_grad([self.netD_A, self.netD_B], True) + self.set_requires_grad(self.netD_A, True) + # self.optimizer_D.clear_gradients() #zero_grad() # set D_A and D_B's gradients to zero + self.backward_D_A() # calculate gradients for D_A + self.optimizer_DA.minimize( + self.loss_D_A) #step() # update D_A and D_B's weights + self.optimizer_DA.clear_gradients() #zero_g + self.set_requires_grad(self.netD_B, True) + # self.optimizer_DB.clear_gradients() #zero_grad() # set D_A and D_B's gradients to zero + + self.backward_D_B() # calculate graidents for D_B + self.optimizer_DB.minimize( + self.loss_D_B) #step() # update D_A and D_B's weights + self.optimizer_DB.clear_gradients( + ) #zero_grad() # set D_A and D_B's gradients to zero diff --git a/ppgan/models/pix2pix_model.py b/ppgan/models/pix2pix_model.py index c68926fdd20ade2f1d4cc02297371ac42fd3eacd..7801d8ca51f5527c9a05200a68466f82295f401f 100644 --- a/ppgan/models/pix2pix_model.py +++ b/ppgan/models/pix2pix_model.py @@ -31,7 +31,6 @@ class Pix2PixModel(BaseModel): """ BaseModel.__init__(self, opt) # specify the training losses you want to print out. The training/test scripts will call - self.loss_names = ['G_GAN', 'G_L1', 'D_real', 'D_fake'] # specify the images you want to save/display. The training/test scripts will call self.visual_names = ['real_A', 'fake_B', 'real_B'] # specify the models you want to save to the disk. @@ -81,8 +80,8 @@ class Pix2PixModel(BaseModel): """ AtoB = self.opt.dataset.train.direction == 'AtoB' - self.real_A = paddle.to_variable(input['A' if AtoB else 'B']) - self.real_B = paddle.to_variable(input['B' if AtoB else 'A']) + self.real_A = paddle.to_tensor(input['A' if AtoB else 'B']) + self.real_B = paddle.to_tensor(input['B' if AtoB else 'A']) self.image_paths = input['A_paths' if AtoB else 'B_paths'] @@ -114,6 +113,9 @@ class Pix2PixModel(BaseModel): else: self.loss_D.backward() + self.losses['D_fake_loss'] = self.loss_D_fake + self.losses['D_real_loss'] = self.loss_D_real + def backward_G(self): """Calculate GAN and L1 loss for the generator""" # First, G(A) should fake the discriminator @@ -134,6 +136,9 @@ class Pix2PixModel(BaseModel): else: self.loss_G.backward() + self.losses['G_adv_loss'] = self.loss_G_GAN + self.losses['G_L1_loss'] = self.loss_G_L1 + def optimize_parameters(self): # compute fake images: G(A) self.forward() diff --git a/ppgan/models/vgg.py b/ppgan/models/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..38337f9e9d51806a9d241c06b2d317fe93e55942 --- /dev/null +++ b/ppgan/models/vgg.py @@ -0,0 +1,51 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +from paddle.utils.download import get_weights_path_from_url +from paddle.vision.models.vgg import make_layers + +cfg = [ + 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, + 512, 512, 'M' +] + +model_urls = { + 'vgg16': ('https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams', + '89bbffc0f87d260be9b8cdc169c991c4') +} + + +class VGG(nn.Layer): + def __init__(self, features): + super(VGG, self).__init__() + self.features = features + + def forward(self, x): + x = self.features(x) + return x + + +def vgg16(pretrained=False): + features = make_layers(cfg) + model = VGG(features) + + if pretrained: + weight_path = get_weights_path_from_url(model_urls['vgg16'][0], + model_urls['vgg16'][1]) + param = paddle.load(weight_path) + model.load_dict(param) + + return model diff --git a/ppgan/modules/init.py b/ppgan/modules/init.py index 37e4257a1706d9cc2cdd7fcb50719ef8bdb577bc..7f85687a9e28c814d24ef73715b3af9315d871ff 100644 --- a/ppgan/modules/init.py +++ b/ppgan/modules/init.py @@ -80,7 +80,7 @@ def calculate_gain(nonlinearity, param=None): @paddle.no_grad() def constant_(x, value): - temp_value = paddle.fill_constant(x.shape, x.dtype, value) + temp_value = paddle.full(x.shape, value, x.dtype) x.set_value(temp_value) return x diff --git a/ppgan/modules/nn.py b/ppgan/modules/nn.py index 81178b12b0e774aba6c62c96bd6260311e1c42d5..65ba6ed9203d4fd7f39c8256dc6d11d1b4552c3b 100644 --- a/ppgan/modules/nn.py +++ b/ppgan/modules/nn.py @@ -1,5 +1,20 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle import paddle.nn as nn +import math class _SpectralNorm(nn.SpectralNorm): diff --git a/ppgan/modules/norm.py b/ppgan/modules/norm.py index 66833fcf01e93266fd920d7e9072856f95de6c78..67b7b43729dec3e635b0d2f21c4497906ed270b8 100644 --- a/ppgan/modules/norm.py +++ b/ppgan/modules/norm.py @@ -1,6 +1,7 @@ import paddle import functools import paddle.nn as nn +from .nn import Spectralnorm class Identity(nn.Layer): @@ -35,6 +36,8 @@ def build_norm_layer(norm_type='instance'): bias_attr=paddle.ParamAttr(initializer=nn.initializer.Constant(0.0), learning_rate=0.0, trainable=False)) + elif norm_type == 'spectral': + norm_layer = functools.partial(Spectralnorm) elif norm_type == 'none': def norm_layer(x): diff --git a/ppgan/solver/lr_scheduler.py b/ppgan/solver/lr_scheduler.py index 3c17e3da0f06848ec446a5fbf141762eeae0b918..39b6389ebd3ea09186ee1d04e497a8bc27a3165b 100644 --- a/ppgan/solver/lr_scheduler.py +++ b/ppgan/solver/lr_scheduler.py @@ -12,25 +12,8 @@ def build_lr_scheduler(cfg): 0, epoch + 1 - cfg.start_epoch) / float(cfg.decay_epochs + 1) return lr_l - scheduler = paddle.optimizer.lr_scheduler.LambdaLR( - cfg.learning_rate, lr_lambda=lambda_rule) + scheduler = paddle.optimizer.lr.LambdaDecay(cfg.learning_rate, + lr_lambda=lambda_rule) return scheduler else: raise NotImplementedError - - -# paddle.optimizer.lr_scheduler -class LinearDecay(paddle.optimizer.lr_scheduler._LRScheduler): - def __init__(self, learning_rate, step_per_epoch, start_epoch, - decay_epochs): - super(LinearDecay, self).__init__() - self.learning_rate = learning_rate - self.start_epoch = start_epoch - self.decay_epochs = decay_epochs - self.step_per_epoch = step_per_epoch - - def step(self): - cur_epoch = int(self.step_num // self.step_per_epoch) - decay_rate = 1.0 - max( - 0, cur_epoch + 1 - self.start_epoch) / float(self.decay_epochs + 1) - return self.create_lr_var(decay_rate * self.learning_rate) diff --git a/ppgan/utils/animate.py b/ppgan/utils/animate.py index 3ac08d9c33c5045f1b1cf725abc3a8e5ef9bff4a..df3a0e71caab05f86cd6a6fa113c43f9b3308a34 100644 --- a/ppgan/utils/animate.py +++ b/ppgan/utils/animate.py @@ -1,12 +1,8 @@ -import os -from tqdm import tqdm +import numpy as np +from scipy.spatial import ConvexHull import paddle -import imageio -from scipy.spatial import ConvexHull -import numpy as np - def normalize_kp(kp_source, kp_driving, diff --git a/ppgan/utils/options.py b/ppgan/utils/options.py index 682f4ac47fe5052677f661ee777aa957d366df9a..dd953359c177b5fcd30b9ef0074d068715bb0796 100644 --- a/ppgan/utils/options.py +++ b/ppgan/utils/options.py @@ -1,25 +1,64 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import argparse + def parse_args(): parser = argparse.ArgumentParser(description='Segmentron') - parser.add_argument('--config-file', metavar="FILE", + parser.add_argument('--config-file', + metavar="FILE", help='config file path') # cuda setting - parser.add_argument('--no-cuda', action='store_true', default=False, + parser.add_argument('--no-cuda', + action='store_true', + default=False, help='disables CUDA training') # checkpoint and log - parser.add_argument('--resume', type=str, default=None, + parser.add_argument('--resume', + type=str, + default=None, help='put the path to resuming file if needed') - parser.add_argument('--load', type=str, default=None, + parser.add_argument('--load', + type=str, + default=None, help='put the path to resuming file if needed') # for evaluation - parser.add_argument('--val-interval', type=int, default=1, + parser.add_argument('--val-interval', + type=int, + default=1, help='run validation every interval') - parser.add_argument('--evaluate-only', action='store_true', default=False, + parser.add_argument('--evaluate-only', + action='store_true', + default=False, help='skip validation during training') # config options - parser.add_argument('opts', help='See config for all options', - default=None, nargs=argparse.REMAINDER) + parser.add_argument('opts', + help='See config for all options', + default=None, + nargs=argparse.REMAINDER) + + #for inference + parser.add_argument("--source_path", + default="", + metavar="FILE", + help="path to source image") + parser.add_argument("--reference_dir", + default="", + help="path to reference images") + parser.add_argument("--model_path", default="", help="model for loading") + args = parser.parse_args() - return args \ No newline at end of file + return args diff --git a/ppgan/utils/preprocess.py b/ppgan/utils/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..cb4f90a2cabbbfadeea7e356cf3c173c8fe510a7 --- /dev/null +++ b/ppgan/utils/preprocess.py @@ -0,0 +1,222 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import numpy as np + + +def generate_P_from_lmks(lmks, resize, w, h): + """generate P from lmks""" + diff_size = (64, 64) + xs, ys = np.meshgrid(np.linspace(0, resize - 1, resize), + np.linspace(0, resize - 1, resize)) + xs = xs[None].repeat(68, axis=0) + ys = ys[None].repeat(68, axis=0) + fix = np.concatenate([ys, xs], axis=0) + + lmks = lmks.transpose(1, 0).reshape(-1, 1, 1) + + diff = fix - lmks + diff = diff.transpose(1, 2, 0) + diff = cv2.resize(diff, diff_size, interpolation=cv2.INTER_NEAREST) + diff = diff.transpose(2, 0, 1).reshape(136, -1) + norm = np.linalg.norm(diff, axis=0) + P_np = diff / norm + + return P_np + + +def copy_area(tar, src, lms): + rect = [ + int(min(lms[:, 1])) - 16, + int(min(lms[:, 0])) - 16, + int(max(lms[:, 1])) + 16 + 1, + int(max(lms[:, 0])) + 16 + 1 + ] + tar[rect[1]:rect[3], rect[0]:rect[2]] = \ + src[rect[1]:rect[3], rect[0]:rect[2]] + src[rect[1]:rect[3], rect[0]:rect[2]] = 0 + + +def rebound_box(mask, mask_B, mask_face): + """solver ps""" + index_tmp = mask.nonzero() + x_index = index_tmp[0] + y_index = index_tmp[1] + index_tmp = mask_B.nonzero() + x_B_index = index_tmp[0] + y_B_index = index_tmp[1] + mask_temp = np.copy(mask) + mask_B_temp = np.copy(mask_B) + mask_temp[min(x_index) - 16:max(x_index) + 17, min(y_index) - 16:max(y_index) + 17] =\ + mask_face[min(x_index) - + 16:max(x_index) + + 17, min(y_index) - + 16:max(y_index) + + 17] + mask_B_temp[min(x_B_index) - 16:max(x_B_index) + 17, min(y_B_index) - 16:max(y_B_index) + 17] =\ + mask_face[min(x_B_index) - + 16:max(x_B_index) + + 17, min(y_B_index) - + 16:max(y_B_index) + + 17] + return mask_temp, mask_B_temp + + +def calculate_consis_mask(mask, mask_B): + h_a, w_a = mask.shape[1:] + h_b, w_b = mask_B.shape[1:] + mask_transpose = np.transpose(mask, (1, 2, 0)) + mask_B_transpose = np.transpose(mask_B, (1, 2, 0)) + mask = cv2.resize(mask_transpose, + dsize=(w_a // 4, h_a // 4), + interpolation=cv2.INTER_NEAREST) + mask = np.transpose(mask, (2, 0, 1)) + mask_B = cv2.resize(mask_B_transpose, + dsize=(w_b // 4, h_b // 4), + interpolation=cv2.INTER_NEAREST) + mask_B = np.transpose(mask_B, (2, 0, 1)) + """calculate consistency mask between images""" + h_a, w_a = mask.shape[1:] + h_b, w_b = mask_B.shape[1:] + + mask_lip = mask[0] + mask_skin = mask[1] + mask_eye = mask[2] + + mask_B_lip = mask_B[0] + mask_B_skin = mask_B[1] + mask_B_eye = mask_B[2] + + maskA_one_hot = np.zeros((h_a * w_a, 3)) + maskA_one_hot[:, 0] = mask_skin.flatten() + maskA_one_hot[:, 1] = mask_eye.flatten() + maskA_one_hot[:, 2] = mask_lip.flatten() + + maskB_one_hot = np.zeros((h_b * w_b, 3)) + maskB_one_hot[:, 0] = mask_B_skin.flatten() + maskB_one_hot[:, 1] = mask_B_eye.flatten() + maskB_one_hot[:, 2] = mask_B_lip.flatten() + + con_mask = np.matmul(maskA_one_hot.reshape((h_a * w_a, 3)), + np.transpose(maskB_one_hot.reshape((h_b * w_b, 3)))) + con_mask = np.clip(con_mask, 0, 1) + return con_mask + + +def cal_hist(image): + """ + cal cumulative hist for channel list + """ + hists = [] + for i in range(0, 3): + channel = image[i] + # channel = image[i, :, :] + #channel = torch.from_numpy(channel) + hist, _ = np.histogram(channel, bins=256, range=(0, 255)) + #hist = torch.histc(channel, bins=256, min=0, max=256) + # refHist=hist.view(256,1) + sum = hist.sum() + pdf = [v / sum for v in hist] + for i in range(1, 256): + pdf[i] = pdf[i - 1] + pdf[i] + hists.append(pdf) + return hists + + +def cal_trans(ref, adj): + """ + calculate transfer function + algorithm refering to wiki item: Histogram matching + """ + table = list(range(0, 256)) + for i in list(range(1, 256)): + for j in list(range(1, 256)): + if ref[i] >= adj[j - 1] and ref[i] <= adj[j]: + table[i] = j + break + table[255] = 255 + return table + + +def histogram_matching(dstImg, refImg, index): + """ + perform histogram matching + dstImg is transformed to have the same the histogram with refImg's + index[0], index[1]: the index of pixels that need to be transformed in dstImg + index[2], index[3]: the index of pixels that to compute histogram in refImg + """ + dst_align = [dstImg[i, index[0], index[1]] for i in range(0, 3)] + ref_align = [refImg[i, index[2], index[3]] for i in range(0, 3)] + hist_ref = cal_hist(ref_align) + hist_dst = cal_hist(dst_align) + tables = [cal_trans(hist_dst[i], hist_ref[i]) for i in range(0, 3)] + + mid = dst_align.copy() + for i in range(0, 3): + for k in range(0, len(index[0])): + dst_align[i][k] = tables[i][int(mid[i][k])] + + for i in range(0, 3): + dstImg[i, index[0], index[1]] = dst_align[i] + + return dstImg + + +def hisMatch(input_data, target_data, mask_src, mask_tar, index): + """solver ps""" + mask_src = np.float32(np.clip(mask_src, 0, 1)) + mask_tar = np.float32(np.clip(mask_tar, 0, 1)) + input_masked = np.float32(input_data) * mask_src + target_masked = np.float32(target_data) * mask_tar + input_match = histogram_matching(input_masked, target_masked, index) + return input_match + + +def mask_preprocess(mask, mask_B): + """solver ps""" + index_tmp = mask.nonzero() + x_index = index_tmp[0] + y_index = index_tmp[1] + index_tmp = mask_B.nonzero() + x_B_index = index_tmp[0] + y_B_index = index_tmp[1] + index = [x_index, y_index, x_B_index, y_B_index] + index_2 = [x_B_index, y_B_index, x_index, y_index] + return [mask, mask_B, index, index_2] + + +def generate_mask_aug(mask, lmks): + + lms_eye_left = lmks[42:48] + lms_eye_right = lmks[36:42] + + mask_eye_left = np.zeros_like(mask) + mask_eye_right = np.zeros_like(mask) + + mask_face = np.float32(mask == 1) + np.float32(mask == 6) + + copy_area(mask_eye_left, mask_face, lms_eye_left) + copy_area(mask_eye_right, mask_face, lms_eye_right) + + mask_skin = mask_face + + mask_lip = np.float32(mask == 7) + np.float32(mask == 9) + + mask_eye = mask_eye_left + mask_eye_right + mask_aug = np.concatenate( + (np.expand_dims(mask_lip, 0), np.expand_dims( + mask_skin, 0), np.expand_dims(mask_eye, 0)), 0) + + return mask_aug diff --git a/tools/main.py b/tools/main.py index 064b984faf3ac6db56f933241225c128d309d2d3..e1dac1f53a548135113bf48572324b7d7958982d 100644 --- a/tools/main.py +++ b/tools/main.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os import sys @@ -30,7 +31,7 @@ def main(args, cfg): # build trainer trainer = Trainer(cfg) - + # continue train or evaluate, checkpoint need contain epoch and optimizer info if args.resume: trainer.resume(args.resume) @@ -50,4 +51,3 @@ if __name__ == '__main__': cfg = get_config(args.config_file) main(args, cfg) - \ No newline at end of file