Merge branch 'master' of https://github.com/PaddlePaddle/PaddleGAN into readme

e4932b70 · LielinJiang · 2e8c51f0 · 6c75d650 · 2e8c51f0 · 2e8c51f0
73 changed file
--- a/applications/DAIN/my_args.py
+++ b/applications/DAIN/my_args.py
-import os
-import datetime
-import argparse
-import numpy
-import networks
-
-modelnames = networks.__all__
-# import datasets
-datasetNames = ('Vimeo_90K_interp')  #datasets.__all__
-
-parser = argparse.ArgumentParser(description='DAIN')
-
-parser.add_argument('--debug', action='store_true', help='Enable debug mode')
-parser.add_argument('--netName',
-                    type=str,
-                    default='DAIN',
-                    choices=modelnames,
-                    help='model architecture: ' + ' | '.join(modelnames) +
-                    ' (default: DAIN)')
-
-parser.add_argument('--datasetName',
-                    default='Vimeo_90K_interp',
-                    choices=datasetNames,
-                    nargs='+',
-                    help='dataset type : ' + ' | '.join(datasetNames) +
-                    ' (default: Vimeo_90K_interp)')
-parser.add_argument('--video_path',
-                    default='',
-                    help='the path of selected videos')
-parser.add_argument('--output_path', default='', help='the output root path')
-
-parser.add_argument('--seed',
-                    type=int,
-                    default=1,
-                    help='random seed (default: 1)')
-
-parser.add_argument('--batch_size',
-                    '-b',
-                    type=int,
-                    default=1,
-                    help='batch size (default:1)')
-parser.add_argument('--channels',
-                    '-c',
-                    type=int,
-                    default=3,
-                    choices=[1, 3],
-                    help='channels of images (default:3)')
-parser.add_argument('--filter_size',
-                    '-f',
-                    type=int,
-                    default=4,
-                    help='the size of filters used (default: 4)',
-                    choices=[2, 4, 6, 5, 51])
-
-parser.add_argument('--time_step',
-                    type=float,
-                    default=0.5,
-                    help='choose the time steps')
-parser.add_argument(
-    '--alpha',
-    type=float,
-    nargs='+',
-    default=[0.0, 1.0],
-    help=
-    'the ration of loss for interpolated and rectified result (default: [0.0, 1.0])'
-)
-parser.add_argument('--frame_rate',
-                    type=int,
-                    default=None,
-                    help='frame rate of the input video')
-
-parser.add_argument('--patience',
-                    type=int,
-                    default=5,
-                    help='the patience of reduce on plateou')
-parser.add_argument('--factor',
-                    type=float,
-                    default=0.2,
-                    help='the factor of reduce on plateou')
-
-parser.add_argument('--saved_model',
-                    type=str,
-                    default='',
-                    help='path to the model weights')
-parser.add_argument('--no-date',
-                    action='store_true',
-                    help='don\'t append date timestamp to folder')
-parser.add_argument('--use_cuda',
-                    default=True,
-                    type=bool,
-                    help='use cuda or not')
-parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not')
-parser.add_argument('--remove_duplicates',
-                    default=True,
-                    type=bool,
-                    help='remove duplicate frames or not')
--- a/applications/DAIN/networks/__init__.py
+++ b/applications/DAIN/networks/__init__.py
-from .dain import DAIN
-from .dain_slowmotion import DAIN_slowmotion
-__all__ = ('DAIN', 'DAIN_slowmotion')
--- a/applications/DAIN/networks/dain.py
+++ b/applications/DAIN/networks/dain.py
-import paddle.fluid as fluid
-import resblock
-import pwcnet
-
-
-class DAIN(fluid.dygraph.Layer):
-    def __init__(self, channel=3, filter_size=4, timestep=0.5, training=True):
-        # base class initialization
-        super(DAIN, self).__init__()
-
-        self.filter_size = filter_size
-        self.training = training
-        self.timestep = timestep
-        assert (timestep == 0.5)
-        self.numFrames = int(1.0 / timestep) - 1
-
-        ctx_ch = 3 * 64 + 3
-        inplanes = 3 + 3 + 3 + 2 * 1 + 2 * 2 + 16 * 2 + 2 * ctx_ch
-
-        self.rectifyNet = resblock.__dict__['MultipleBasicBlock_4'](inplanes,
-                                                                    64)
-        self.flownets = pwcnet.__dict__['pwc_dc_net']()
-        self.div_flow = 20.0
-
-    def forward(self, input):
-        """
-        Parameters
-        ----------
-        input: shape (3, batch, 3, width, height)
-        -----------
-        """
-        losses = []
-        offsets = []
-        '''
-            STEP 1: sequeeze the input
-        '''
-        if self.training == True:
-
-            assert input.shape[0] == 3
-            input_0 = input[0]
-            input_1 = input[1]
-            input_2 = input[2]
-        else:
-            # print(input.shape[0])
-            assert input.shape[0] == 2
-            input_0 = input[0]
-            input_2 = input[1]
-
-        #prepare the input data of current scale
-        cur_input_0 = input_0
-        if self.training == True:
-            cur_input_1 = input_1
-        cur_input_2 = input_2
-        '''
-            STEP 3.2: concatenating the inputs.
-        '''
-        cur_offset_input = fluid.layers.concat([cur_input_0, cur_input_2],
-                                               axis=1)
-        '''
-            STEP 3.3: perform the estimation
-        '''
-        time_offsets = [
-            kk * self.timestep for kk in range(1, 1 + self.numFrames, 1)
-        ]
-
-        cur_offset_outputs = [
-            self.forward_flownets(self.flownets,
-                                  cur_offset_input,
-                                  time_offsets=time_offsets),
-            self.forward_flownets(self.flownets,
-                                  fluid.layers.concat(
-                                      [cur_input_2, cur_input_0], axis=1),
-                                  time_offsets=time_offsets[::-1])
-        ]
-
-        cur_offset_output = [cur_offset_outputs[0][0], cur_offset_outputs[1][0]]
-
-        # Warp image use warp-op in PWC-Net
-        ref0 = self.flownets.warp_nomask(cur_input_0, cur_offset_output[0])
-        ref2 = self.flownets.warp_nomask(cur_input_2, cur_offset_output[1])
-        cur_output = (ref0 + ref2) / 2.0
-
-        rectify_input = fluid.layers.concat([
-            cur_output, ref0, ref2, cur_offset_output[0], cur_offset_output[1]
-        ],
-                                            axis=1)
-
-        cur_output_rectified = self.rectifyNet(rectify_input) + cur_output
-        '''
-            STEP 3.5: for training phase, we collect the variables to be penalized.
-        '''
-        if self.training == True:
-            losses += [cur_output - cur_input_1]
-            losses += [cur_output_rectified - cur_input_1]
-            offsets += [cur_offset_output]
-        '''
-            STEP 4: return the results
-        '''
-        if self.training == True:
-            # if in the training phase, we output the losses to be minimized.
-            # return losses, loss_occlusion
-            return losses, offsets
-        else:
-            cur_outputs = [cur_output, cur_output_rectified]
-            return cur_outputs, cur_offset_output
-
-    def forward_flownets(self, model, input, time_offsets=None):
-
-        if time_offsets == None:
-            time_offsets = [0.5]
-        elif type(time_offsets) == float:
-            time_offsets = [time_offsets]
-        elif type(time_offsets) == list:
-            pass
-        # this is a single direction motion results, but not a bidirectional one
-        temp = model(input)
-
-        # single direction to bidirection should haven it.
-        temps = [
-            self.div_flow * temp * time_offset for time_offset in time_offsets
-        ]
-        # nearest interpolation won't be better i think
-        temps = [fluid.layers.resize_bilinear(temp, scale=4) for temp in temps]
-        return temps
--- a/applications/DAIN/networks/dain_slowmotion.py
+++ b/applications/DAIN/networks/dain_slowmotion.py
-import paddle.fluid as fluid
-import resblock
-import time
-import pwcnet
-
-
-class DAIN_slowmotion(fluid.dygraph.Layer):
-    def __init__(self, channel=3, filter_size=4, timestep=0.5, training=True):
-        # base class initialization
-        super(DAIN_slowmotion, self).__init__()
-
-        self.filter_size = filter_size
-        self.training = training
-        self.timestep = timestep
-        self.num_frames = int(1.0 / timestep) - 1
-
-        ctx_ch = 3 * 64 + 3
-        #        inplanes = 3 + 3 + 3 + 2*1 + 2*2 + 2
-        inplanes = 13
-
-        self.flownets = pwcnet.__dict__['pwc_dc_net']()
-        self.rectifyNet = resblock.__dict__['MultipleBasicBlock_4'](inplanes,
-                                                                    64)
-        self.div_flow = 20.0
-
-    def forward(self, input):
-        """
-        Parameters
-        ----------
-        input: shape (3, batch, 3, width, height)
-        -----------
-        """
-        losses = []
-        offsets = []
-        '''
-            STEP 1: sequeeze the input
-        '''
-        if self.training == True:
-
-            assert input.shape[0] == 3
-            input_0 = input[0]
-            input_1 = input[1]
-            input_2 = input[2]
-        else:
-            assert input.shape[0] == 2
-            input_0 = input[0]
-            input_2 = input[1]
-
-        #prepare the input data of current scale
-        cur_input_0 = input_0
-        if self.training == True:
-            cur_input_1 = input_1
-        cur_input_2 = input_2
-        '''
-            STEP 3.2: concatenating the inputs.
-        '''
-        cur_offset_input = fluid.layers.concat([cur_input_0, cur_input_2],
-                                               axis=1)
-        '''
-            STEP 3.3: perform the estimation
-        '''
-        time_offsets = [
-            kk * self.timestep for kk in range(1, 1 + self.num_frames, 1)
-        ]
-
-        cur_offset_outputs = [
-            self.forward_flownets(self.flownets,
-                                  cur_offset_input,
-                                  time_offsets=time_offsets),
-            self.forward_flownets(self.flownets,
-                                  fluid.layers.concat(
-                                      [cur_input_2, cur_input_0], axis=1),
-                                  time_offsets=time_offsets[::-1])
-        ]
-        '''
-            STEP 3.4: perform the frame interpolation process
-        '''
-        count = 0
-        for temp_0, temp_1, timeoffset in zip(cur_offset_outputs[0],
-                                              cur_offset_outputs[1],
-                                              time_offsets):
-            cur_offset_output = [temp_0, temp_1]
-
-            ref0 = self.flownets.warp_nomask(cur_input_0, cur_offset_output[0])
-            ref2 = self.flownets.warp_nomask(cur_input_2, cur_offset_output[1])
-            cur_output_temp = (ref0 + ref2) / 2.0
-
-            if count == 0:
-                cur_output = fluid.layers.unsqueeze(cur_output_temp, axes=0)
-            else:
-                cur_output_ = fluid.layers.unsqueeze(cur_output_temp, axes=0)
-                cur_output = fluid.layers.concat([cur_output, cur_output_],
-                                                 axis=0)
-
-            rectify_input = fluid.layers.concat([
-                cur_output_temp, ref0, ref2, cur_offset_output[0],
-                cur_offset_output[1]
-            ],
-                                                axis=1)
-
-            cur_output_rectified_temp = self.rectifyNet(
-                rectify_input) + cur_output_temp
-
-            if count == 0:
-                cur_output_rectified = fluid.layers.unsqueeze(
-                    cur_output_rectified_temp, axes=0)
-            else:
-                cur_output_rectified_ = fluid.layers.unsqueeze(
-                    cur_output_rectified_temp, axes=0)
-                cur_output_rectified = fluid.layers.concat(
-                    [cur_output_rectified, cur_output_rectified_], axis=0)
-
-            count += 1
-        '''
-            STEP 3.5: for training phase, we collect the variables to be penalized.
-        '''
-        if self.training == True:
-            losses += [cur_output - cur_input_1]
-            losses += [cur_output_rectified - cur_input_1]
-            offsets += [cur_offset_output]
-        '''
-            STEP 4: return the results
-        '''
-        if self.training == True:
-            # if in the training phase, we output the losses to be minimized.
-            # return losses, loss_occlusion
-            return losses, offsets
-        else:
-            cur_outputs = [cur_output, cur_output_rectified]
-            return cur_outputs, cur_offset_output
-
-    def forward_flownets(self, model, input, time_offsets=None):
-        if time_offsets == None:
-            time_offsets = [0.5]
-        elif type(time_offsets) == float:
-            time_offsets = [time_offsets]
-        elif type(time_offsets) == list:
-            pass
-        # this is a single direction motion results, but not a bidirectional one
-        temp = model(input)
-
-        # single direction to bidirection should haven it.
-        temps = [
-            self.div_flow * temp * time_offset for time_offset in time_offsets
-        ]
-        # nearest interpolation won't be better i think
-        temps = [fluid.layers.resize_bilinear(temp, scale=4) for temp in temps]
-        return temps
--- a/applications/DAIN/predict.py
+++ b/applications/DAIN/predict.py
-import os
-import sys
-
-cur_path = os.path.abspath(os.path.dirname(__file__))
-sys.path.append(cur_path)
-
-import time
-import glob
-import numpy as np
-from imageio import imread, imsave
-from tqdm import tqdm
-import cv2
-
-import paddle.fluid as fluid
-from paddle.utils.download import get_path_from_url
-from ppgan.utils.video import video2frames, frames2video
-
-from util import *
-from my_args import parser
-
-DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar'
-
-
-def infer_engine(model_dir,
-                 run_mode='fluid',
-                 batch_size=1,
-                 use_gpu=False,
-                 min_subgraph_size=3):
-    if not use_gpu and not run_mode == 'fluid':
-        raise ValueError(
-            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
-            .format(run_mode, use_gpu))
-    precision_map = {
-        'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
-        'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
-    }
-    config = fluid.core.AnalysisConfig(os.path.join(model_dir, 'model'),
-                                       os.path.join(model_dir, 'params'))
-    if use_gpu:
-        # initial GPU memory(M), device ID
-        config.enable_use_gpu(100, 0)
-        # optimize graph and fuse op
-        config.switch_ir_optim(True)
-    else:
-        config.disable_gpu()
-
-    if run_mode in precision_map.keys():
-        config.enable_tensorrt_engine(workspace_size=1 << 10,
-                                      max_batch_size=batch_size,
-                                      min_subgraph_size=min_subgraph_size,
-                                      precision_mode=precision_map[run_mode],
-                                      use_static=False,
-                                      use_calib_mode=False)
-
-    # disable print log when predict
-    config.disable_glog_info()
-    # enable shared memory
-    config.enable_memory_optim()
-    # disable feed, fetch OP, needed by zero_copy_run
-    config.switch_use_feed_fetch_ops(False)
-    predictor = fluid.core.create_paddle_predictor(config)
-    return predictor
-
-
-def executor(model_dir, use_gpu=False):
-    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    program, feed_names, fetch_targets = fluid.io.load_inference_model(
-        dirname=model_dir,
-        executor=exe,
-        model_filename='model',
-        params_filename='params')
-    return exe, program, fetch_targets
-
-
-class VideoFrameInterp(object):
-    def __init__(self,
-                 time_step,
-                 model_path,
-                 video_path,
-                 use_gpu=True,
-                 key_frame_thread=0.,
-                 output_path='output',
-                 remove_duplicates=True):
-        self.video_path = video_path
-        self.output_path = os.path.join(output_path, 'DAIN')
-        if model_path is None:
-            model_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path)
-
-        self.model_path = model_path
-        self.time_step = time_step
-        self.key_frame_thread = key_frame_thread
-
-        self.exe, self.program, self.fetch_targets = executor(model_path,
-                                                              use_gpu=use_gpu)
-
-    def run(self):
-        frame_path_input = os.path.join(self.output_path, 'frames-input')
-        frame_path_interpolated = os.path.join(self.output_path,
-                                               'frames-interpolated')
-        frame_path_combined = os.path.join(self.output_path, 'frames-combined')
-        video_path_output = os.path.join(self.output_path, 'videos-output')
-
-        if not os.path.exists(self.output_path):
-            os.makedirs(self.output_path)
-        if not os.path.exists(frame_path_input):
-            os.makedirs(frame_path_input)
-        if not os.path.exists(frame_path_interpolated):
-            os.makedirs(frame_path_interpolated)
-        if not os.path.exists(frame_path_combined):
-            os.makedirs(frame_path_combined)
-        if not os.path.exists(video_path_output):
-            os.makedirs(video_path_output)
-
-        timestep = self.time_step
-        num_frames = int(1.0 / timestep) - 1
-
-        if self.video_path.endswith('.mp4'):
-            videos = [self.video_path]
-        else:
-            videos = sorted(glob.glob(os.path.join(self.video_path, '*.mp4')))
-
-        for cnt, vid in enumerate(videos):
-            print("Interpolating video:", vid)
-            cap = cv2.VideoCapture(vid)
-            fps = cap.get(cv2.CAP_PROP_FPS)
-            print("Old fps (frame rate): ", fps)
-
-            times_interp = int(1.0 / timestep)
-            r2 = str(int(fps) * times_interp)
-            print("New fps (frame rate): ", r2)
-
-            out_path = video2frames(vid, frame_path_input)
-
-            vidname = vid.split('/')[-1].split('.')[0]
-
-            tot_timer = AverageMeter()
-            proc_timer = AverageMeter()
-            end = time.time()
-
-            frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
-            if remove_duplicates:
-                frames = remove_duplicates(out_path)
-
-            img = imread(frames[0])
-
-            int_width = img.shape[1]
-            int_height = img.shape[0]
-            channel = img.shape[2]
-            if not channel == 3:
-                continue
-
-            if int_width != ((int_width >> 7) << 7):
-                int_width_pad = (
-                    ((int_width >> 7) + 1) << 7)  # more than necessary
-                padding_left = int((int_width_pad - int_width) / 2)
-                padding_right = int_width_pad - int_width - padding_left
-            else:
-                int_width_pad = int_width
-                padding_left = 32
-                padding_right = 32
-
-            if int_height != ((int_height >> 7) << 7):
-                int_height_pad = (
-                    ((int_height >> 7) + 1) << 7)  # more than necessary
-                padding_top = int((int_height_pad - int_height) / 2)
-                padding_bottom = int_height_pad - int_height - padding_top
-            else:
-                int_height_pad = int_height
-                padding_top = 32
-                padding_bottom = 32
-
-            frame_num = len(frames)
-            print('processing {} frames, from video: {}'.format(frame_num, vid))
-
-            if not os.path.exists(os.path.join(frame_path_interpolated,
-                                               vidname)):
-                os.makedirs(os.path.join(frame_path_interpolated, vidname))
-            if not os.path.exists(os.path.join(frame_path_combined, vidname)):
-                os.makedirs(os.path.join(frame_path_combined, vidname))
-
-            for i in tqdm(range(frame_num - 1)):
-                first = frames[i]
-                second = frames[i + 1]
-
-                img_first = imread(first)
-                img_second = imread(second)
-                '''--------------Frame change test------------------------'''
-                img_first_gray = np.dot(img_first[..., :3],
-                                        [0.299, 0.587, 0.114])
-                img_second_gray = np.dot(img_second[..., :3],
-                                         [0.299, 0.587, 0.114])
-
-                img_first_gray = img_first_gray.flatten(order='C')
-                img_second_gray = img_second_gray.flatten(order='C')
-                corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1]
-                key_frame = False
-                if corr < self.key_frame_thread:
-                    key_frame = True
-                '''-------------------------------------------------------'''
-
-                X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
-                X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255
-
-                assert (X0.shape[1] == X1.shape[1])
-                assert (X0.shape[2] == X1.shape[2])
-
-                X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
-                    (padding_left, padding_right)), mode='edge')
-                X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
-                    (padding_left, padding_right)), mode='edge')
-
-                X0 = np.expand_dims(X0, axis=0)
-                X1 = np.expand_dims(X1, axis=0)
-
-                X0 = np.expand_dims(X0, axis=0)
-                X1 = np.expand_dims(X1, axis=0)
-
-                X = np.concatenate((X0, X1), axis=0)
-
-                proc_end = time.time()
-                o = self.exe.run(self.program,
-                                 fetch_list=self.fetch_targets,
-                                 feed={"image": X})
-
-                y_ = o[0]
-
-                proc_timer.update(time.time() - proc_end)
-                tot_timer.update(time.time() - end)
-                end = time.time()
-
-                y_ = [
-                    np.transpose(
-                        255.0 * item.clip(
-                            0, 1.0)[0, :, padding_top:padding_top + int_height,
-                                    padding_left:padding_left + int_width],
-                        (1, 2, 0)) for item in y_
-                ]
-                time_offsets = [
-                    kk * timestep for kk in range(1, 1 + num_frames, 1)
-                ]
-
-                count = 1
-                for item, time_offset in zip(y_, time_offsets):
-                    out_dir = os.path.join(
-                        frame_path_interpolated, vidname,
-                        "{:0>6d}_{:0>4d}.png".format(i, count))
-                    count = count + 1
-                    imsave(out_dir, np.round(item).astype(np.uint8))
-
-            num_frames = int(1.0 / timestep) - 1
-
-            input_dir = os.path.join(frame_path_input, vidname)
-            interpolated_dir = os.path.join(frame_path_interpolated, vidname)
-            combined_dir = os.path.join(frame_path_combined, vidname)
-            combine_frames(input_dir, interpolated_dir, combined_dir,
-                           num_frames)
-
-            frame_pattern_combined = os.path.join(frame_path_combined, vidname,
-                                                  '%08d.png')
-            video_pattern_output = os.path.join(video_path_output,
-                                                vidname + '.mp4')
-            if os.path.exists(video_pattern_output):
-                os.remove(video_pattern_output)
-            frames2video(frame_pattern_combined, video_pattern_output, r2)
-
-        return frame_pattern_combined, video_pattern_output
-
-
-if __name__ == '__main__':
-    args = parser.parse_args()
-    predictor = VideoFrameInterp(args.time_step,
-                                 args.saved_model,
-                                 args.video_path,
-                                 args.output_path,
-                                 remove_duplicates=args.remove_duplicates)
-    predictor.run()
--- a/applications/DAIN/pwcnet/__init__.py
+++ b/applications/DAIN/pwcnet/__init__.py
-from .pwcnet import *
--- a/applications/DAIN/pwcnet/correlation_op/README.md
+++ b/applications/DAIN/pwcnet/correlation_op/README.md
-自定义OP编译:
-2. sh make.sh编译成correlation_lib.so动态库
-3. 添加动态库路径到LD_LIBRARY_PATH：
-```
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python3.7 -c 'import paddle; print(paddle.sysconfig.get_lib())'`
-```
-4. 添加correlation op的python路径:
-```
-export PYTHONPATH=$PYTHONPATH:`pwd`
-```
-5. python test_correlation.py运行单测，验证是否加载成功。
-
-PS: 如果paddle whl包是从官网上下载的，需要使用gcc 4.8，即把make.sh中的g++ 改为 g++-4.8
--- a/applications/DAIN/pwcnet/correlation_op/correlation_op.cc
+++ b/applications/DAIN/pwcnet/correlation_op/correlation_op.cc
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include "paddle/fluid/framework/op_registry.h"
-
-namespace paddle {
-namespace operators {
-
-using Tensor = framework::Tensor;
-
-inline std::vector<int64_t> CorrelationOutputSize(int batch, int input_height, int input_width, int stride1, int stride2, int kernel_size, int pad_size, int max_displacement) {
-
-  std::vector<int64_t> output_shape({batch});
-  int kernel_radius = (kernel_size - 1) / 2;
-  int border_radius = kernel_radius + max_displacement;
-  int padded_input_height = input_height + 2 * pad_size;
-  int padded_input_width = input_width + 2 * pad_size;
-  int output_channel = ((max_displacement/stride2) * 2 + 1) * ((max_displacement/stride2) * 2 + 1);
-  output_shape.push_back(output_channel);
-  int output_height = std::ceil(static_cast<float>(padded_input_height - 2 * border_radius) / static_cast<float>(stride1)); 
-  int output_width = std::ceil(static_cast<float>(padded_input_width - 2 * border_radius) / static_cast<float>(stride1));
-  output_shape.push_back(output_height);
-  output_shape.push_back(output_width);
-  return output_shape;
-}
-
-class CorrelationOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override{
-    AddInput("Input1", "input1");
-    AddInput("Input2", "input2");
-    AddOutput("Output", "output");
-    AddAttr<int>("pad_size", "pad size for input1 and input2");
-    AddAttr<int>("kernel_size", "kernel size of input1 and input2");
-    AddAttr<int>("max_displacement", "max displacement of input1 and input2");
-    AddAttr<int>("stride1", "Input1 stride");
-    AddAttr<int>("stride2", "Input2 stride");
-    AddAttr<int>("corr_type_multiply", "correlation coefficient").SetDefault(1);
-    AddComment(R"DOC(Correlation of two feature map. Only support NCHW data format.)DOC");
-  }
-};
-
-class CorrelationOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override{
-    PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(input1) cannot be null");
-    PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(input2) cannot be null");
-    int stride1 = ctx->Attrs().Get<int>("stride1");
-    int stride2 = ctx->Attrs().Get<int>("stride2");
-    int max_displacement = ctx->Attrs().Get<int>("max_displacement");
-    int pad_size = ctx->Attrs().Get<int>("pad_size");
-    int kernel_size = ctx->Attrs().Get<int>("kernel_size");
-
-    auto in_dims = ctx->GetInputDim("Input1");
-    auto in2_dims = ctx->GetInputDim("Input2");
-    PADDLE_ENFORCE_EQ(in_dims.size() == 4, true, "input1 must be 4-dims");
-    PADDLE_ENFORCE_EQ(in2_dims.size() == 4, true, "input2 must be 4-dims");
-    std::vector<int64_t> output_shape = CorrelationOutputSize(in_dims[0], in_dims[2], in_dims[3], stride1, stride2, kernel_size, pad_size, max_displacement);
-    ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
-  }
-
- protected:
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override{
-    auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input1");
-    PADDLE_ENFORCE_EQ(input_data_type, ctx.Input<Tensor>("Input2")->type(), "Input1 and Input2 shoule have same type");
-    return framework::OpKernelType(input_data_type, ctx.GetPlace());
-  }
-};
-
-template <typename T>
-class CorrelationOpGradMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
- protected:
-  void Apply(GradOpPtr<T> op) const override {
-    op->SetType("correlation_grad");
-    op->SetInput("Input1", this->Input("Input1"));
-    op->SetInput("Input2", this->Input("Input2"));
-    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
-    op->SetOutput(framework::GradVarName("Input1"), this->InputGrad("Input1"));
-    op->SetOutput(framework::GradVarName("Input2"), this->InputGrad("Input2"));
-    op->SetAttrMap(this->Attrs());
-  }
-};
-
-class CorrelationOpGrad : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  void InferShape(framework::InferShapeContext* ctx) const override{
-    PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(Input1) should not be null");
-    PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(Input2) should not be null");
-    PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Output")), true, "Input(Output@GRAD) should not be null");
-
-    auto in1_dims = ctx->GetInputDim("Input1");
-    auto in2_dims = ctx->GetInputDim("Input2");
-    ctx->SetOutputDim(framework::GradVarName("Input1"), in1_dims);
-    ctx->SetOutputDim(framework::GradVarName("Input2"), in1_dims);
-  }
-
- protected:
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override{
-    const auto* var = ctx.InputVar(framework::GradVarName("Output"));
-    if (var == nullptr) {
-      PADDLE_THROW("cannot find Output@GRAD");
-    }
-    return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(ctx, "Input1"), ctx.GetPlace());
-  }
-};
-
-} // namespace operators
-} // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OPERATOR(correlation, ops::CorrelationOp, ops::CorrelationOpMaker,
-          ops::CorrelationOpGradMaker<paddle::framework::OpDesc>,
-          ops::CorrelationOpGradMaker<paddle::imperative::OpBase>);
-REGISTER_OPERATOR(correlation_grad, ops::CorrelationOpGrad);
--- a/applications/DAIN/pwcnet/correlation_op/correlation_op.cu
+++ b/applications/DAIN/pwcnet/correlation_op/correlation_op.cu
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include <vector>
-#include "paddle/fluid/framework/op_registry.h"
-
-#define THREADS_PER_BLOCK 32
-#define FULL_MASK 0xffffffff
-
-namespace paddle {
-namespace operators {
-
-using Tensor = framework::Tensor;
-
-template <typename T>
-__forceinline__ __device__ T warpReduceSum(T val) {
-  for (int offset = 16; offset > 0; offset /= 2) {
-    val += __shfl_down_sync(FULL_MASK, val, offset);
-  }
-  return val;
-}
-
-template <typename T>
-__forceinline__ __device__ T blockReduceSum(T val) {
-  static __shared__ T shared[32];
-  int lane = threadIdx.x % warpSize;
-  int wid = threadIdx.x / warpSize;
-
-  val = warpReduceSum(val);
-  if (lane == 0)
-    shared[wid] = val;
-
-  __syncthreads();
-  val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0;
-
-  if (wid == 0)
-    val = warpReduceSum(val);
-
-  return val;
-}
-
-template <typename T>
-__global__ void set_zero(T *x, int num) {
-  for(int i = blockIdx.x * blockDim.x + threadIdx.x; i < num; i += blockDim.x * gridDim.x)
-    x[i] = static_cast<T>(0);
-}
-
-template <typename T>
-__global__ void channel_first(const T *input, T *rinput, const int channel, const int height, const int width, const int pad_size) {
-  int n = blockIdx.x;
-  int h = blockIdx.y;
-  int w = blockIdx.z;
-
-  int ch_off = threadIdx.x;
-  T value;
-  int dimchw = channel * height * width;
-  int dimhw = height * width;
-
-  int p_dimw = (width + 2 * pad_size);
-  int p_dimh = (height + 2 * pad_size);
-  int p_dimchw = channel * p_dimw * p_dimh;
-  int p_dimcw = channel * p_dimw;
-
-  for (int c = ch_off; c < channel; c += THREADS_PER_BLOCK) {
-    value = input[n * dimchw + c * dimhw + h * width + w];
-    rinput[n * p_dimchw + (h + pad_size) * p_dimcw + (w + pad_size) * channel + c] = value;
-  }
-}
-
-template <typename T>
-__global__ void correlation_forward(T *output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int input_channel, const int input_height, const int input_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) {
-
-  int p_input_width = input_width + 2 * pad_size;
-  int p_input_height = input_height + 2 * pad_size;
-
-  int kernel_rad = (kernel_size - 1) / 2;
-  int displacement_rad = max_displacement / stride2;
-
-  int displacement_size = 2 * displacement_rad + 1;
-
-  int n = blockIdx.x;
-  int h1 = blockIdx.y * stride1 + max_displacement;
-  int w1 = blockIdx.z * stride1 + max_displacement;
-  int c = threadIdx.x;
-
-  int p_dimchw = p_input_height * p_input_width * input_channel;
-  int p_dimcw = p_input_width * input_channel;
-  int p_dimc = input_channel;
-
-  int t_dimchw = output_channel * output_height * output_width;
-  int t_dimhw = output_height * output_width;
-  int t_dimw = output_width;
-
-  int nelems = kernel_size * kernel_size * p_dimc;
-
-  for (int tj = -displacement_rad; tj <= displacement_rad; ++tj) {
-    for(int ti = -displacement_rad; ti <= displacement_rad; ++ti) {
-      int w2 = w1 + ti * stride2;
-      int h2 = h1 + tj * stride2;
-
-      T acc0 = 0;
-      for(int j = -kernel_rad; j <= kernel_rad; ++j) {
-        for(int i = -kernel_rad; i <= kernel_rad; ++i) {
-          for(int ch = c; ch < p_dimc; ch += blockDim.x) {
-            int index1 = n * p_dimchw + (h1 + j) * p_dimcw + (w1 + i) * p_dimc + ch;
-            int index2 = n * p_dimchw + (h2 + j) * p_dimcw + (w2 + i) * p_dimc + ch;
-            acc0 += static_cast<T>(rinput1[index1] * rinput2[index2]);
-          } 
-        }
-      }
-      if (blockDim.x == warpSize) {
-        __syncwarp();
-        acc0 = warpReduceSum(acc0);
-      } else {
-        __syncthreads();
-        acc0 = blockReduceSum(acc0);
-      }
-
-      if (threadIdx.x == 0) {
-        int tc = (tj + displacement_rad) * displacement_size + (ti + displacement_rad);
-        const int t_index = n * t_dimchw + tc * t_dimhw + blockIdx.y * t_dimw + blockIdx.z;
-        output[t_index] = static_cast<T>(acc0 / nelems);
-      }
-    }
-  }
-
-}
-
-//class CorrelationKernel<platform::CUDADeviceContext, T>
-template <typename T>
-class CorrelationKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext &ctx) const override {
-    PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must be CUDAPlace");
-
-    auto *input1 = ctx.Input<Tensor>("Input1");
-    auto *input2 = ctx.Input<Tensor>("Input2");
-    int pad_size = ctx.Attr<int>("pad_size");
-    int kernel_size = ctx.Attr<int>("kernel_size");
-    int stride1 = ctx.Attr<int>("stride1");
-    int stride2 = ctx.Attr<int>("stride2");
-    int max_displacement = ctx.Attr<int>("max_displacement");
-    int corr_type_multiply = ctx.Attr<int>("corr_type_multiply");
-
-    auto *output = ctx.Output<Tensor>("Output");
-    output->mutable_data<T>(ctx.GetPlace());
-    auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
-
-    // base on input1, NCHW
-    auto in_dims = input1->dims();
-    int N = in_dims[0];
-    int C = in_dims[1];
-    int H = in_dims[2];
-    int W = in_dims[3];
-
-    int padded_input_height = H + 2 * pad_size;
-    int padded_input_width = W + 2 * pad_size;
-
-    Tensor rinput1 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
-    rinput1.mutable_data<T>(ctx.GetPlace());
-
-    Tensor rinput2 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
-    rinput2.mutable_data<T>(ctx.GetPlace());
-
-    set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data<T>(), rinput1.numel());
-    set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data<T>(), rinput2.numel());
-    set_zero<<<(output->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(output->data<T>(), output->numel());
-
-    auto out_dims = output->dims();
-    int OC = out_dims[1];
-    int OH = out_dims[2];
-    int OW = out_dims[3];
-
-    dim3 blocks_grid(N, H, W);
-    dim3 threads_block(THREADS_PER_BLOCK);
-
-    channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input1->data<T>(), rinput1.data<T>(), C, H, W, pad_size);
-    channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input2->data<T>(), rinput2.data<T>(), C, H, W, pad_size);
-
-    dim3 threadsPerBlock(THREADS_PER_BLOCK);
-    dim3 totalBlocksCorr(N, OH, OW);
-
-    correlation_forward<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(output->data<T>(), OC, OH, OW, rinput1.data<T>(),
-C, H, W, rinput2.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
-  }
-};
-
-template <typename T>
-__global__ void correlation_backward_input1(int item, T *grad_input1, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) {
-
-  int n = item;
-  int h = blockIdx.x * stride1 + pad_size;
-  int w = blockIdx.y * stride1 + pad_size;
-  int c = blockIdx.z;
-  int tch_off = threadIdx.x;
-
-  int kernel_rad = (kernel_size - 1) / 2;
-  int displacement_rad = max_displacement / stride2;
-  int displacement_size = 2 * displacement_rad + 1;
-
-  int xmin = (w - kernel_rad - max_displacement) / stride1;
-  int ymin = (h - kernel_rad - max_displacement) / stride1;
-
-  int xmax = (w + kernel_rad - max_displacement) / stride1;
-  int ymax = (h + kernel_rad - max_displacement) / stride1;
-
-  if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) {
-    return;
-  }
-
-  if (xmin > xmax || ymin > ymax) {
-    return;
-  }
-
-  xmin = max(0, xmin);
-  xmax = min(output_width - 1, xmax);
-
-  ymin = max(0, ymin);
-  ymax = min(output_height - 1, ymax);
-
-  int p_input_width = input_width + 2 * pad_size;
-  int p_input_height = input_height + 2 * pad_size;
-  int p_dimchw = input_channel * p_input_height * p_input_width;
-  int p_dimcw = input_channel * p_input_width;
-  int p_dimc = input_channel;
-
-  int t_dimchw = output_channel * output_height * output_width;
-  int t_dimhw = output_height * output_width;
-  int t_dimw = output_width;
-
-  int o_dimchw = input_channel * input_height * input_width;
-  int o_dimhw = input_height * input_width;
-  int o_dimw = input_width;
-
-  int nelems = kernel_size * kernel_size * input_channel;
-
-  __shared__ T prod_sum[THREADS_PER_BLOCK];
-  prod_sum[tch_off] = 0;
-
-  for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) {
-    int i2 = (tc % displacement_size - displacement_rad) * stride2;
-    int j2 = (tc / displacement_size - displacement_rad) * stride2;
-
-    int index2 = n * p_dimchw + (h + j2) * p_dimcw + (w + i2) * p_dimc + c;
-
-    T val2 = rinput2[index2];
-    for (int j = ymin; j <= ymax; ++j) {
-      for (int i = xmin; i <= xmax; ++i) {
-        int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i;
-        prod_sum[tch_off] += grad_output[t_index] * val2;
-      }
-    }
-  }
-
-  __syncthreads();
-
-  if (tch_off == 0) {
-    T reduce_sum = 0;
-    for (int index = 0; index < THREADS_PER_BLOCK; index++) {
-      reduce_sum += prod_sum[index];
-    }
-    const int index1 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size);
-    grad_input1[index1] = static_cast<T>(reduce_sum / nelems);
-  }
-
-}
-
-template <typename T>
-__global__ void correlation_backward_input2(int item, T *grad_input2, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2){
-
-  int n = item;
-  int h = blockIdx.x * stride1 + pad_size;
-  int w = blockIdx.y * stride1 + pad_size;
-  int c = blockIdx.z;
-
-  int tch_off = threadIdx.x;
-
-  int kernel_rad = (kernel_size - 1) / 2;
-  int displacement_rad = max_displacement / stride2;
-  int displacement_size = 2 * displacement_rad + 1;
-
-  int p_input_width = input_width + 2 * pad_size;
-  int p_input_height = input_height + 2 * pad_size;
-  int p_dimchw = input_channel * p_input_height * p_input_width;
-  int p_dimcw = input_channel * p_input_width;
-  int p_dimc = input_channel;
-
-  int t_dimchw = output_channel * output_height * output_width;
-  int t_dimhw = output_height * output_width;
-  int t_dimw = output_width;
-
-  int o_dimchw = input_channel * input_height * input_width;
-  int o_dimhw = input_height * input_width;
-  int o_dimw = input_width;
-
-  int nelems = kernel_size * kernel_size * input_channel;
-
-  __shared__ T prod_sum[THREADS_PER_BLOCK];
-  prod_sum[tch_off] = 0;
-
-  for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) {
-    int i2 = (tc % displacement_size - displacement_rad) * stride2;
-    int j2 = (tc / displacement_size - displacement_rad) * stride2;
-
-    int xmin = (w - kernel_rad - max_displacement - i2) / stride1;
-    int ymin = (h - kernel_rad - max_displacement - j2) / stride1;
-
-    int xmax = (w + kernel_rad - max_displacement - i2) / stride1;
-    int ymax = (h + kernel_rad - max_displacement - j2) / stride1;
-
-    if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) {
-      continue;
-    }
-
-    if (xmin > xmax || ymin > ymax) {
-      continue;
-    }
-
-    xmin = max(0, xmin);
-    xmax = min(output_width - 1, xmax);
-
-    ymin = max(0, ymin);
-    ymax = min(output_height - 1, ymax);
-
-    int index1 = n * p_dimchw + (h - j2) * p_dimcw + (w - i2) * p_dimc + c;
-    T val1 = rinput1[index1];
-    for (int j = ymin; j <= ymax; ++j) {
-      for (int i = xmin; i <= xmax; ++i) {
-        int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i;
-        prod_sum[tch_off] += grad_output[t_index] * val1;
-      }
-    }
-  }
-
-  __syncthreads();
-
-  if (tch_off == 0) {
-    T reduce_sum = 0;
-    for (int index = 0; index < THREADS_PER_BLOCK; index++) {
-      reduce_sum += prod_sum[index];
-    }
-    const int index2 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size);
-    grad_input2[index2] = static_cast<T>(reduce_sum / nelems);
-  }
-}
-
-template <typename T>
-class CorrelationGradKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext &ctx) const override {
-    PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must use CUDAPlace.");
-    const auto *input1 = ctx.Input<Tensor>("Input1");
-    const auto *input2 = ctx.Input<Tensor>("Input2");
-    const auto *grad_output = ctx.Input<Tensor>(framework::GradVarName("Output"));
-    const int pad_size = ctx.Attr<int>("pad_size");
-    const int kernel_size = ctx.Attr<int>("kernel_size");
-    const int stride1 = ctx.Attr<int>("stride1");
-    const int stride2 = ctx.Attr<int>("stride2");
-    const int max_displacement = ctx.Attr<int>("max_displacement");
-    const int corr_type_multiply = ctx.Attr<int>("corr_type_multiply");
-
-    auto *grad_input1 = ctx.Output<Tensor>(framework::GradVarName("Input1"));
-    grad_input1->mutable_data<T>(ctx.GetPlace());
-    auto *grad_input2 = ctx.Output<Tensor>(framework::GradVarName("Input2"));
-    grad_input2->mutable_data<T>(ctx.GetPlace());
-    auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
-
-    auto in_dims = input1->dims();
-    int N = in_dims[0];
-    int C = in_dims[1];
-    int H = in_dims[2];
-    int W = in_dims[3];
-
-    int padded_input_height = H + 2 * pad_size;
-    int padded_input_width = W + 2 * pad_size;
-    
-    Tensor rinput1 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
-    rinput1.mutable_data<T>(ctx.GetPlace());
-
-    Tensor rinput2 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
-    rinput2.mutable_data<T>(ctx.GetPlace());
-
-    set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data<T>(), rinput1.numel());
-    set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data<T>(), rinput2.numel());
-    set_zero<<<(grad_input1->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input1->data<T>(), grad_input1->numel());
-    set_zero<<<(grad_input2->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input2->data<T>(), grad_input2->numel());
-
-    auto grad_out_dims = grad_output->dims();
-    int GOC = grad_out_dims[1];
-    int GOH = grad_out_dims[2];
-    int GOW = grad_out_dims[3];
-
-    dim3 blocks_grid(N, H, W);
-    dim3 threads_block(THREADS_PER_BLOCK);
-
-    channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input1->data<T>(), rinput1.data<T>(), C, H, W, pad_size);
-    channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input2->data<T>(), rinput2.data<T>(), C, H, W, pad_size);
-    
-    dim3 threadsPerBlock(THREADS_PER_BLOCK);
-    dim3 totalBlocksCorr(H, W, C);
-
-    for (int n = 0; n < N; n++) {
-      correlation_backward_input1<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(n, grad_input1->data<T>(), C, H, W, grad_output->data<T>(), GOC, GOH, GOW, rinput2.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
-    }
-
-    for (int n = 0; n < N; n++) {
-      correlation_backward_input2<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(n, grad_input2->data<T>(), C, H, W, grad_output->data<T>(), GOC, GOH, GOW, rinput1.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
-    }
-  }
-};
-
-} // namespace operators
-} // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    correlation, ops::CorrelationKernel<float>,
-    ops::CorrelationKernel<double>);
-REGISTER_OP_CUDA_KERNEL(
-    correlation_grad, ops::CorrelationGradKernel<float>,
-    ops::CorrelationGradKernel<double>);
-
--- a/applications/DAIN/pwcnet/correlation_op/make.sh
+++ b/applications/DAIN/pwcnet/correlation_op/make.sh
-# source /ssd1/vis/liufanglong/.bashrc
-#export PATH=/home/work/cuda-9.0/bin:$PATH
-#export PATH=/home/work/cuda-9.0/bin:$PATH
-#export LD_LIBRARY_PATH="/home/work/cuda-9.0/lib64:$LD_LIBRARY_PATH"
-#export LD_LIBRARY_PATH=/home/vis/chao/local/cudnn_v7.6/cuda/lib64:$LD_LIBRARY_PATH
-#export CPLUS_INCLUDE_PATH=/home/vis/chao/local/cudnn_v7.6/cuda/include:/ssd1/vis/liufanglong/local/fluid_1.1.0_for_slurm/nccl_2.3.5/include:$CPLUS_INCLUDE_PATH
-#export LD_LIBRARY_PATH=/ssd1/vis/liufanglong/local/fluid_1.1.0_for_slurm/nccl_2.3.5/lib:$LD_LIBRARY_PATH
-
-include_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_include())' )
-lib_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_lib())' )
-
-echo $include_dir
-echo $lib_dir
-
-OPS='correlation_op'
-for op in ${OPS}
-do
-nvcc ${op}.cu -c -o ${op}.cu.o -ccbin cc -DPADDLE_WITH_CUDA -DEIGEN_USE_GPU -DPADDLE_USE_DSO -DPADDLE_WITH_MKLDNN -Xcompiler -fPIC -std=c++11 -Xcompiler -fPIC -w --expt-relaxed-constexpr -O0 -g -DNVCC \
-    -I ${include_dir}/third_party/ \
-    -I ${include_dir}
-done
-
-# g++-4.8 correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
-# g++ ${OPS}.cu.o ${OPS}.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
-g++ correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
-  -I ${include_dir}/third_party/ \
-  -I ${include_dir} \
-  -L ${lib_dir} \
-  -L /usr/local/cuda/lib64/ -lpaddle_framework -lcudart
-
-# rm *.cu.o
--- a/applications/DAIN/pwcnet/correlation_op/test_correlation.py
+++ b/applications/DAIN/pwcnet/correlation_op/test_correlation.py
-import unittest
-from correlation import correlation
-import numpy as np
-import paddle.fluid as fluid
-from paddle.fluid.dygraph.base import to_variable
-
-
-def corr(x_1,
-         x_2,
-         pad_size=4,
-         kernel_size=1,
-         max_displacement=4,
-         stride1=1,
-         stride2=1,
-         corr_multiply=1):
-    K = kernel_size
-    # rinput1 = np.pad(x_1, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3)
-    # rinput2 = np.pad(x_2, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3)
-
-    rinput1 = np.pad(x_1, ((0, 0), (0, 0), (pad_size, pad_size),
-                           (pad_size, pad_size)),
-                     mode='constant')
-    rinput2 = np.pad(x_2, ((0, 0), (0, 0), (pad_size, pad_size),
-                           (pad_size, pad_size)),
-                     mode='constant')
-    rinput1 = np.transpose(rinput1, (0, 2, 3, 1))
-    rinput2 = np.transpose(rinput2, (0, 2, 3, 1))
-    B = int(rinput1.shape[0])
-    H = int(x_1.shape[2])
-    W = int(x_2.shape[3])
-    d = max_displacement
-    D = 2 * d + 1
-    output = np.zeros((B, D * D, H, W), dtype=np.float32)
-
-    for b in range(B):
-        for i in range(H):
-            for j in range(W):
-                for k in range(-d, d + 1):
-                    for l in range(-d, d + 1):
-                        x1_index = i + pad_size
-                        y1_index = j + pad_size
-                        x2_index = x1_index + k
-                        y2_index = y1_index + l
-                        output[b, l + d + D * (k + d), i,
-                               j] = np.mean(rinput1[b, x1_index:x1_index + K,
-                                                    y1_index:y1_index + K] *
-                                            rinput2[b, x2_index:x2_index + K,
-                                                    y2_index:y2_index + K])
-
-    return output
-
-
-class TestCorrelationOp(unittest.TestCase):
-    def test_check_output(self):
-        #x_shape = (1, 196, 3, 3)
-        np.random.seed(13)
-        np.set_printoptions(threshold=np.inf)
-        x_shape = (2, 10, 3, 3)
-        x_type = 'float32'
-        x1 = fluid.layers.data(name='x1',
-                               shape=x_shape,
-                               dtype=x_type,
-                               append_batch_size=False)
-        x2 = fluid.layers.data(name='x2',
-                               shape=x_shape,
-                               dtype=x_type,
-                               append_batch_size=False)
-
-        x1_np = np.random.randn(2, 3, 4, 5).astype(x_type)
-        x2_np = np.random.randn(2, 3, 4, 5).astype(x_type)
-        out_np = corr(x1_np,
-                      x2_np,
-                      pad_size=4,
-                      kernel_size=1,
-                      max_displacement=4,
-                      stride1=1,
-                      stride2=1)
-
-        out = correlation(x1,
-                          x2,
-                          pad_size=4,
-                          kernel_size=1,
-                          max_displacement=4,
-                          stride1=1,
-                          stride2=1)
-
-        place = fluid.CUDAPlace(0)
-        exe = fluid.Executor(place)
-        res = exe.run(feed={'x1': x1_np, 'x2': x2_np}, fetch_list=[out.name])
-
-        self.assertTrue(np.allclose(res[0], out_np))
-
-
-class Net(fluid.dygraph.Layer):
-    def __init__(self, name_scope):
-        super(Net, self).__init__(name_scope)
-
-    def forward(self, x1, x2):
-        y = correlation(x1,
-                        x2,
-                        pad_size=4,
-                        kernel_size=1,
-                        max_displacement=4,
-                        stride1=1,
-                        stride2=1)
-        return y
-
-
-class TestCorrelationOpDyGraph(unittest.TestCase):
-    def test_check_output(self):
-        np.random.seed(13)
-        np.set_printoptions(threshold=np.inf)
-        x_shape = (2, 10, 3, 3)
-        x_type = 'float32'
-        place = fluid.CUDAPlace(0)
-        with fluid.dygraph.guard(place):
-            x1_np = np.random.randn(2, 3, 4, 5).astype(x_type)
-            x2_np = np.random.randn(2, 3, 4, 5).astype(x_type)
-            out_np = corr(x1_np,
-                          x2_np,
-                          pad_size=4,
-                          kernel_size=1,
-                          max_displacement=4,
-                          stride1=1,
-                          stride2=1)
-
-            x1 = to_variable(x1_np)
-            x2 = to_variable(x2_np)
-            corr_pd = Net('corr_pd')
-            y = corr_pd(x1, x2)
-            out = y.numpy()
-            self.assertTrue(np.allclose(out, out_np))
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/applications/DAIN/pwcnet/pwcnet.py
+++ b/applications/DAIN/pwcnet/pwcnet.py
--- a/applications/DAIN/resblock/__init__.py
+++ b/applications/DAIN/resblock/__init__.py
-from .basicblock import *
--- a/applications/DAIN/resblock/basicblock.py
+++ b/applications/DAIN/resblock/basicblock.py
-import paddle.fluid as fluid
-from paddle.fluid.dygraph import Conv2D
-
-__all__ = ['MultipleBasicBlock', 'MultipleBasicBlock_4']
-
-
-def conv3x3(in_planes, out_planes, dilation=1, stride=1, param_attr=None):
-    return Conv2D(in_planes,
-                  out_planes,
-                  filter_size=3,
-                  stride=stride,
-                  padding=int(dilation * (3 - 1) / 2),
-                  dilation=dilation,
-                  bias_attr=False,
-                  param_attr=param_attr)
-
-
-class BasicBlock(fluid.dygraph.Layer):
-    expansion = 1
-
-    def __init__(self, inplanes, planes, dilation=1, stride=1, downsample=None):
-        super(BasicBlock, self).__init__()
-
-        param_attr = fluid.ParamAttr(
-            initializer=fluid.initializer.NormalInitializer(
-                loc=0.0, scale=1.0, seed=0))
-
-        self.conv1 = conv3x3(inplanes, planes, dilation, stride, param_attr)
-        self.conv2 = conv3x3(planes, planes, param_attr=param_attr)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        residual = x
-
-        out = self.conv1(x)
-        # out = self.bn1(out)
-        out = fluid.layers.relu(out)
-
-        out = self.conv2(out)
-        # out = self.bn2(out)
-
-        if self.downsample is not None:
-            residual = self.downsample(x)
-
-        out += residual
-        out = fluid.layers.relu(out)
-
-        return out
-
-
-class MultipleBasicBlock(fluid.dygraph.Layer):
-    def __init__(self,
-                 input_feature,
-                 block,
-                 num_blocks,
-                 intermediate_feature=64,
-                 dense=True):
-        super(MultipleBasicBlock, self).__init__()
-        self.dense = dense
-        self.num_block = num_blocks
-        self.intermediate_feature = intermediate_feature
-
-        param_attr = fluid.ParamAttr(
-            initializer=fluid.initializer.NormalInitializer(
-                loc=0.0, scale=1.0, seed=0))
-
-        self.block1 = Conv2D(input_feature,
-                             intermediate_feature,
-                             filter_size=7,
-                             stride=1,
-                             padding=3,
-                             bias_attr=True,
-                             param_attr=param_attr)
-
-        dim = intermediate_feature
-        self.block2 = block(dim, dim, dilation=1) if num_blocks >= 2 else None
-        self.block3 = block(dim, dim, dilation=1) if num_blocks >= 3 else None
-        self.block4 = block(dim, dim, dilation=1) if num_blocks >= 4 else None
-        self.block5 = Conv2D(dim, 3, 3, 1, 1)
-
-    def forward(self, x):
-        x = fluid.layers.relu(self.block1(x))
-        x = self.block2(x) if self.num_block >= 2 else x
-        x = self.block3(x) if self.num_block >= 3 else x
-        x = self.block4(x) if self.num_block >= 4 else x
-        x = self.block5(x)
-        return x
-
-
-def MultipleBasicBlock_4(input_feature, intermediate_feature=64):
-    model = MultipleBasicBlock(input_feature, BasicBlock, 4,
-                               intermediate_feature)
-    return model
--- a/applications/DAIN/run.sh
+++ b/applications/DAIN/run.sh
-cd pwcnet/correlation_op
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python -c 'import paddle; print(paddle.sysconfig.get_lib())'`
-export PYTHONPATH=$PYTHONPATH:`pwd`
-cd ../../
-
-VID_PATH=/paddle/work/github/DAIN/data/CBA.mp4
-OUT_PATH=output
-MODEL_PATH=DAIN_paddle_weight
-
-
-CUDA_VISIBLE_DEVICES=2 python predict.py \
-    --time_step 0.125 \
-    --video_path=$VID_PATH \
-    --output_path=$OUT_PATH \
-    --saved_model=$MODEL_PATH
\ No newline at end of file
--- a/applications/DAIN/util.py
+++ b/applications/DAIN/util.py
-import os, sys
-import glob
-import shutil
-import cv2
-
-
-class AverageMeter(object):
-    """Computes and stores the average and current value"""
-    def __init__(self):
-        self.reset()
-
-    def reset(self):
-        self.val = 0
-        self.avg = 0
-        self.sum = 0
-        self.count = 0
-
-    def update(self, val, n=1):
-        self.val = val
-        self.sum += val * n
-        self.count += n
-        self.avg = self.sum / self.count
-
-
-def combine_frames(input, interpolated, combined, num_frames):
-    frames1 = sorted(glob.glob(os.path.join(input, '*.png')))
-    frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png')))
-    num1 = len(frames1)
-    num2 = len(frames2)
-    # assert (num1 - 1) * num_frames == num2
-    for i in range(num1):
-        src = frames1[i]
-        imgname = int(src.split('/')[-1].split('.')[-2])
-        assert i == imgname
-        dst = os.path.join(combined, '{:08d}.png'.format(i * (num_frames + 1)))
-        shutil.copy2(src, dst)
-        if i < num1 - 1:
-            try:
-                for k in range(num_frames):
-                    src = frames2[i * num_frames + k]
-                    dst = os.path.join(
-                        combined,
-                        '{:08d}.png'.format(i * (num_frames + 1) + k + 1))
-                    shutil.copy2(src, dst)
-            except Exception as e:
-                print(e)
-                print(len(frames2), num_frames, i, k, i * num_frames + k)
-
-
-def remove_duplicates(paths):
-    def dhash(image, hash_size=8):
-        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-        resized = cv2.resize(gray, (hash_size + 1, hash_size))
-        diff = resized[:, 1:] > resized[:, :-1]
-        return sum([2**i for (i, v) in enumerate(diff.flatten()) if v])
-
-    hashes = {}
-    image_paths = sorted(glob.glob(os.path.join(paths, '*.png')))
-    for image_path in image_paths:
-        image = cv2.imread(image_path)
-        h = dhash(image)
-        p = hashes.get(h, [])
-        p.append(image_path)
-        hashes[h] = p
-
-    for (h, hashed_paths) in hashes.items():
-        if len(hashed_paths) > 1:
-            for p in hashed_paths[1:]:
-                os.remove(p)
-
-    frames = sorted(glob.glob(os.path.join(paths, '*.png')))
-    for fid, frame in enumerate(frames):
-        new_name = '{:08d}'.format(fid) + '.png'
-        new_name = os.path.join(paths, new_name)
-        os.rename(frame, new_name)
-
-    frames = sorted(glob.glob(os.path.join(paths, '*.png')))
-    return frames
--- a/applications/DeepRemaster/utils.py
+++ b/applications/DeepRemaster/utils.py
-import paddle
-from skimage import color
-import numpy as np
-from PIL import Image
-
-def convertLAB2RGB( lab ):
-   lab[:, :, 0:1] = lab[:, :, 0:1] * 100   # [0, 1] -> [0, 100]
-   lab[:, :, 1:3] = np.clip(lab[:, :, 1:3] * 255 - 128, -100, 100)  # [0, 1] -> [-128, 128]
-   rgb = color.lab2rgb( lab.astype(np.float64) )
-   return rgb
-
-def convertRGB2LABTensor( rgb ):
-   lab = color.rgb2lab( np.asarray( rgb ) ) # RGB -> LAB L[0, 100] a[-127, 128] b[-128, 127]
-   ab = np.clip(lab[:, :, 1:3] + 128, 0, 255) # AB --> [0, 255]
-   ab = paddle.to_tensor(ab.astype('float32')) / 255.
-   L = lab[:, :, 0] * 2.55 # L --> [0, 255]
-   L = Image.fromarray( np.uint8( L ) )
-
-   L = paddle.to_tensor(np.array(L).astype('float32')[..., np.newaxis] / 255.0)
-   return L, ab
-
-def addMergin(img, target_w, target_h, background_color=(0,0,0)):
-   width, height = img.size
-   if width==target_w and height==target_h:
-      return img
-   scale = max(target_w,target_h)/max(width, height)
-   width = int(width*scale/16.)*16
-   height = int(height*scale/16.)*16
-
-   img = img.resize((width, height), Image.BICUBIC)
-   xp = (target_w-width)//2
-   yp = (target_h-height)//2
-   result = Image.new(img.mode, (target_w, target_h), background_color)
-   result.paste(img, (xp, yp))
-   return result
--- a/applications/EDVR/data.py
+++ b/applications/EDVR/data.py
-import cv2
-
-import numpy as np
-
-
-def read_img(path, size=None, is_gt=False):
-    """read image by cv2
-    return: Numpy float32, HWC, BGR, [0,1]"""
-    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
-
-    img = img.astype(np.float32) / 255.
-    if img.ndim == 2:
-        img = np.expand_dims(img, axis=2)
-
-    if img.shape[2] > 3:
-        img = img[:, :, :3]
-    return img
-
-
-def get_test_neighbor_frames(crt_i, N, max_n, padding='new_info'):
-    """Generate an index list for reading N frames from a sequence of images
-    Args:
-        crt_i (int): current center index
-        max_n (int): max number of the sequence of images (calculated from 1)
-        N (int): reading N frames
-        padding (str): padding mode, one of replicate | reflection | new_info | circle
-            Example: crt_i = 0, N = 5
-            replicate: [0, 0, 0, 1, 2]
-            reflection: [2, 1, 0, 1, 2]
-            new_info: [4, 3, 0, 1, 2]
-            circle: [3, 4, 0, 1, 2]
-
-    Returns:
-        return_l (list [int]): a list of indexes
-    """
-    max_n = max_n - 1
-    n_pad = N // 2
-    return_l = []
-
-    for i in range(crt_i - n_pad, crt_i + n_pad + 1):
-        if i < 0:
-            if padding == 'replicate':
-                add_idx = 0
-            elif padding == 'reflection':
-                add_idx = -i
-            elif padding == 'new_info':
-                add_idx = (crt_i + n_pad) + (-i)
-            elif padding == 'circle':
-                add_idx = N + i
-            else:
-                raise ValueError('Wrong padding mode')
-        elif i > max_n:
-            if padding == 'replicate':
-                add_idx = max_n
-            elif padding == 'reflection':
-                add_idx = max_n * 2 - i
-            elif padding == 'new_info':
-                add_idx = (crt_i - n_pad) - (i - max_n)
-            elif padding == 'circle':
-                add_idx = i - N
-            else:
-                raise ValueError('Wrong padding mode')
-        else:
-            add_idx = i
-        return_l.append(add_idx)
-    # name_b = '{:08d}'.format(crt_i)
-    return return_l
-
-
-class EDVRDataset:
-    def __init__(self, frame_paths):
-        self.frames = frame_paths
-
-    def __getitem__(self, index):
-        indexs = get_test_neighbor_frames(index, 5, len(self.frames))
-        frame_list = []
-        for i in indexs:
-            img = read_img(self.frames[i])
-            frame_list.append(img)
-
-        img_LQs = np.stack(frame_list, axis=0)
-        # BGR to RGB, HWC to CHW, numpy to tensor
-        img_LQs = img_LQs[:, :, :, [2, 1, 0]]
-        img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32')
-
-        return img_LQs, self.frames[index]
-
-    def __len__(self):
-        return len(self.frames)
--- a/applications/EDVR/run.sh
+++ b/applications/EDVR/run.sh
-# examples of running programs:
-# bash ./run.sh inference EDVR ./configs/edvr_L.yaml
-# bash ./run.sh predict EDvR ./cofings/edvr_L.yaml
-
-# configs should be ./configs/xxx.yaml
-
-mode=$1
-name=$2
-configs=$3
-
-save_inference_dir="./data/inference_model"
-use_gpu=True
-fix_random_seed=False
-log_interval=1
-valid_interval=1
-
-weights="./weights/paddle_state_dict_L.npz"
-
-
-export CUDA_VISIBLE_DEVICES=6   #0,1,5,6 fast,  2,3,4,7 slow
-# export FLAGS_fast_eager_deletion_mode=1
-# export FLAGS_eager_delete_tensor_gb=0.0
-# export FLAGS_fraction_of_gpu_memory_to_use=0.98
-
-if [ "$mode"x == "predict"x ]; then
-    echo $mode $name $configs $weights
-    if [ "$weights"x != ""x ]; then
-        python predict.py --model_name=$name \
-                          --config=$configs \
-                          --log_interval=$log_interval \
-                          --video_path='' \
-                          --use_gpu=$use_gpu
-    else
-        python predict.py --model_name=$name \
-                          --config=$configs \
-                          --log_interval=$log_interval \
-                          --use_gpu=$use_gpu \
-                          --video_path=''
-    fi
-fi
-
--- a/applications/first_order_model/configs/vox-256.yaml
+++ b/applications/first_order_model/configs/vox-256.yaml
-dataset_params:
-  root_dir: data/vox-png
-  frame_shape: [256, 256, 3]
-  id_sampling: True
-  pairs_list: data/vox256.csv
-  augmentation_params:
-    flip_param:
-      horizontal_flip: True
-      time_flip: True
-    jitter_param:
-      brightness: 0.1
-      contrast: 0.1
-      saturation: 0.1
-      hue: 0.1
-
-
-model_params:
-  common_params:
-    num_kp: 10
-    num_channels: 3
-    estimate_jacobian: True
-  kp_detector_params:
-     temperature: 0.1
-     block_expansion: 32
-     max_features: 1024
-     scale_factor: 0.25
-     num_blocks: 5
-  generator_params:
-    block_expansion: 64
-    max_features: 512
-    num_down_blocks: 2
-    num_bottleneck_blocks: 6
-    estimate_occlusion_map: True
-    dense_motion_params:
-      block_expansion: 64
-      max_features: 1024
-      num_blocks: 5
-      scale_factor: 0.25
-  discriminator_params:
-    scales: [1]
-    block_expansion: 32
-    max_features: 512
-    num_blocks: 4
-    sn: True
-
-train_params:
-  num_epochs: 100
-  num_repeats: 75
-  epoch_milestones: [60, 90]
-  lr_generator: 2.0e-4
-  lr_discriminator: 2.0e-4
-  lr_kp_detector: 2.0e-4
-  batch_size: 40
-  scales: [1, 0.5, 0.25, 0.125]
-  checkpoint_freq: 50
-  transform_params:
-    sigma_affine: 0.05
-    sigma_tps: 0.005
-    points_tps: 5
-  loss_weights:
-    generator_gan: 0
-    discriminator_gan: 1
-    feature_matching: [10, 10, 10, 10]
-    perceptual: [10, 10, 10, 10, 10]
-    equivariance_value: 10
-    equivariance_jacobian: 10
-
-reconstruction_params:
-  num_videos: 1000
-  format: '.mp4'
-
-animate_params:
-  num_pairs: 50
-  format: '.mp4'
-  normalization_params:
-    adapt_movement_scale: False
-    use_relative_movement: True
-    use_relative_jacobian: True
-
-visualizer_params:
-  kp_size: 5
-  draw_border: True
-  colormap: 'gist_rainbow'
--- a/applications/run.sh
+++ b/applications/run.sh
-# 模型说明
-# 目前包含DAIN(插帧模型)，DeOldify(上色模型)，DeepRemaster(去噪与上色模型)，EDVR(基于连续帧(视频)超分辨率模型)，RealSR(基于图片的超分辨率模型)
-# 参数说明
-# input 输入视频的路径
-# output 输出视频保存的路径
-# proccess_order 要使用的模型及顺序
-
-python tools/video-enhance.py \
--input input.mp4  --output output --proccess_order DeOldify RealSR
--- a/applications/tools/first-order-demo.py
+++ b/applications/tools/first-order-demo.py
-import matplotlib
-matplotlib.use('Agg')
-import os
-import sys
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import argparse

-import yaml
-import pickle
-from argparse import ArgumentParser
-from tqdm import tqdm
-
-import imageio
-import numpy as np
-from skimage.transform import resize
-from skimage import img_as_ubyte
 import paddle
-
-from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator
-from ppgan.modules.keypoint_detector import KPDetector
-from ppgan.utils.animate import normalize_kp
-from scipy.spatial import ConvexHull
-
-paddle.disable_static()
-
-if sys.version_info[0] < 3:
-    raise Exception(
-        "You must use Python 3 or higher. Recommended version is Python 3.7")
-
-
-def load_checkpoints(config_path, checkpoint_path, cpu=False):
-
-    with open(config_path) as f:
-        config = yaml.load(f)
-
-    generator = OcclusionAwareGenerator(
-        **config['model_params']['generator_params'],
-        **config['model_params']['common_params'])
-
-    kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
-                             **config['model_params']['common_params'])
-
-    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
-    generator.set_state_dict(checkpoint['generator'])
-
-    kp_detector.set_state_dict(checkpoint['kp_detector'])
-
-    generator.eval()
-    kp_detector.eval()
-
-    return generator, kp_detector
-
-
-def make_animation(source_image,
-                   driving_video,
-                   generator,
-                   kp_detector,
-                   relative=True,
-                   adapt_movement_scale=True,
-                   cpu=False):
-    with paddle.no_grad():
-        predictions = []
-        source = paddle.to_tensor(source_image[np.newaxis].astype(
-            np.float32)).transpose([0, 3, 1, 2])
-        # if not cpu:
-        #     source = source.cuda()
-        driving = paddle.to_tensor(
-            np.array(driving_video)[np.newaxis].astype(np.float32)).transpose(
-                [0, 4, 1, 2, 3])
-        kp_source = kp_detector(source)
-        kp_driving_initial = kp_detector(driving[:, :, 0])
-
-        for frame_idx in tqdm(range(driving.shape[2])):
-            driving_frame = driving[:, :, frame_idx]
-            kp_driving = kp_detector(driving_frame)
-            kp_norm = normalize_kp(kp_source=kp_source,
-                                   kp_driving=kp_driving,
-                                   kp_driving_initial=kp_driving_initial,
-                                   use_relative_movement=relative,
-                                   use_relative_jacobian=relative,
-                                   adapt_movement_scale=adapt_movement_scale)
-            out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
-
-            predictions.append(
-                np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0])
-    return predictions
-
-
-def find_best_frame(source, driving, cpu=False):
-    import face_alignment
-
-    def normalize_kp(kp):
-        kp = kp - kp.mean(axis=0, keepdims=True)
-        area = ConvexHull(kp[:, :2]).volume
-        area = np.sqrt(area)
-        kp[:, :2] = kp[:, :2] / area
-        return kp
-
-    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
-                                      flip_input=True,
-                                      device='cpu' if cpu else 'cuda')
-    kp_source = fa.get_landmarks(255 * source)[0]
-    kp_source = normalize_kp(kp_source)
-    norm = float('inf')
-    frame_num = 0
-    for i, image in tqdm(enumerate(driving)):
-        kp_driving = fa.get_landmarks(255 * image)[0]
-        kp_driving = normalize_kp(kp_driving)
-        new_norm = (np.abs(kp_source - kp_driving)**2).sum()
-        if new_norm < norm:
-            norm = new_norm
-            frame_num = i
-    return frame_num
-
+from ppgan.apps.first_order_predictor import FirstOrderPredictor
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--config", default=None, help="path to config")
+parser.add_argument("--weight_path",
+                    default=None,
+                    help="path to checkpoint to restore")
+parser.add_argument("--source_image", type=str, help="path to source image")
+parser.add_argument("--driving_video", type=str, help="path to driving video")
+parser.add_argument("--output", default='output', help="path to output")
+parser.add_argument("--relative",
+                    dest="relative",
+                    action="store_true",
+                    help="use relative or absolute keypoint coordinates")
+parser.add_argument(
+    "--adapt_scale",
+    dest="adapt_scale",
+    action="store_true",
+    help="adapt movement scale based on convex hull of keypoints")
+
+parser.add_argument(
+    "--find_best_frame",
+    dest="find_best_frame",
+    action="store_true",
+    help=
+    "Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)"
+)
+
+parser.add_argument("--best_frame",
+                    dest="best_frame",
+                    type=int,
+                    default=None,
+                    help="Set frame to start from.")
+parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.")
+
+parser.set_defaults(relative=False)
+parser.set_defaults(adapt_scale=False)

 if __name__ == "__main__":
-    parser = ArgumentParser()
-    parser.add_argument("--config", required=True, help="path to config")
-    parser.add_argument("--checkpoint",
-                        default='vox-cpk.pth.tar',
-                        help="path to checkpoint to restore")
-
-    parser.add_argument("--source_image",
-                        default='sup-mat/source.png',
-                        help="path to source image")
-    parser.add_argument("--driving_video",
-                        default='sup-mat/source.png',
-                        help="path to driving video")
-    parser.add_argument("--result_video",
-                        default='result.mp4',
-                        help="path to output")
-
-    parser.add_argument("--relative",
-                        dest="relative",
-                        action="store_true",
-                        help="use relative or absolute keypoint coordinates")
-    parser.add_argument(
-        "--adapt_scale",
-        dest="adapt_scale",
-        action="store_true",
-        help="adapt movement scale based on convex hull of keypoints")
-
-    parser.add_argument(
-        "--find_best_frame",
-        dest="find_best_frame",
-        action="store_true",
-        help=
-        "Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)"
-    )
-
-    parser.add_argument("--best_frame",
-                        dest="best_frame",
-                        type=int,
-                        default=None,
-                        help="Set frame to start from.")
-
-    parser.add_argument("--cpu",
-                        dest="cpu",
-                        action="store_true",
-                        help="cpu mode.")
-
-    parser.set_defaults(relative=False)
-    parser.set_defaults(adapt_scale=False)
-
-    opt = parser.parse_args()
-
-    source_image = imageio.imread(opt.source_image)
-    reader = imageio.get_reader(opt.driving_video)
-    fps = reader.get_meta_data()['fps']
-    driving_video = []
-    try:
-        for im in reader:
-            driving_video.append(im)
-    except RuntimeError:
-        pass
-    reader.close()
-
-    source_image = resize(source_image, (256, 256))[..., :3]
-    driving_video = [
-        resize(frame, (256, 256))[..., :3] for frame in driving_video
-    ]
-    generator, kp_detector = load_checkpoints(config_path=opt.config,
-                                              checkpoint_path=opt.checkpoint,
-                                              cpu=opt.cpu)
-
-    if opt.find_best_frame or opt.best_frame is not None:
-        i = opt.best_frame if opt.best_frame is not None else find_best_frame(
-            source_image, driving_video, cpu=opt.cpu)
-        print("Best frame: " + str(i))
-        driving_forward = driving_video[i:]
-        driving_backward = driving_video[:(i + 1)][::-1]
-        predictions_forward = make_animation(
-            source_image,
-            driving_forward,
-            generator,
-            kp_detector,
-            relative=opt.relative,
-            adapt_movement_scale=opt.adapt_scale,
-            cpu=opt.cpu)
-        predictions_backward = make_animation(
-            source_image,
-            driving_backward,
-            generator,
-            kp_detector,
-            relative=opt.relative,
-            adapt_movement_scale=opt.adapt_scale,
-            cpu=opt.cpu)
-        predictions = predictions_backward[::-1] + predictions_forward[1:]
-    else:
-        predictions = make_animation(source_image,
-                                     driving_video,
-                                     generator,
-                                     kp_detector,
-                                     relative=opt.relative,
-                                     adapt_movement_scale=opt.adapt_scale,
-                                     cpu=opt.cpu)
-    imageio.mimsave(opt.result_video,
-                    [img_as_ubyte(frame) for frame in predictions],
-                    fps=fps)
+    args = parser.parse_args()
+
+    if args.cpu:
+        paddle.set_device('cpu')
+
+    predictor = FirstOrderPredictor(output=args.output,
+                                    weight_path=args.weight_path,
+                                    config=args.config,
+                                    relative=args.relative,
+                                    adapt_scale=args.adapt_scale,
+                                    find_best_frame=args.find_best_frame,
+                                    best_frame=args.best_frame)
+    predictor.run(args.source_image, args.driving_video)
--- a/applications/tools/ps_demo.py
+++ b/applications/tools/ps_demo.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import argparse
+from pathlib import Path
+
+from PIL import Image
+from fire import Fire
+import numpy as np
+
+import paddle
+import paddle.vision.transforms as T
+import ppgan.faceutils as futils
+from ppgan.utils.options import parse_args
+from ppgan.utils.config import get_config
+from ppgan.utils.setup import setup
+from ppgan.utils.filesystem import load
+from ppgan.engine.trainer import Trainer
+from ppgan.models.builder import build_model
+from ppgan.utils.preprocess import *
+
+
+def toImage(net_output):
+    img = net_output.squeeze(0).transpose(
+        (1, 2, 0)).numpy()  # [1,c,h,w]->[h,w,c]
+    img = (img * 255.0).clip(0, 255)
+    img = np.uint8(img)
+    img = Image.fromarray(img, mode='RGB')
+    return img
+
+
+def mask2image(mask: np.array, format="HWC"):
+    H, W = mask.shape
+
+    canvas = np.zeros((H, W, 3), dtype=np.uint8)
+    for i in range(int(mask.max())):
+        color = np.random.rand(1, 1, 3) * 255
+        canvas += (mask == i)[:, :, None] * color.astype(np.uint8)
+    return canvas
+
+
+class PreProcess:
+    def __init__(self, config, need_parser=True):
+        self.img_size = 256
+        self.transform = transform = T.Compose([
+            T.Resize(size=256),
+            T.Permute(to_rgb=False),
+        ])
+        self.norm = T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])
+        if need_parser:
+            self.face_parser = futils.mask.FaceParser()
+        self.up_ratio = 0.6 / 0.85
+        self.down_ratio = 0.2 / 0.85
+        self.width_ratio = 0.2 / 0.85
+
+    def __call__(self, image):
+        face = futils.dlib.detect(image)
+
+        if not face:
+            return
+        face_on_image = face[0]
+        image, face, crop_face = futils.dlib.crop(image, face_on_image,
+                                                  self.up_ratio,
+                                                  self.down_ratio,
+                                                  self.width_ratio)
+        np_image = np.array(image)
+        mask = self.face_parser.parse(
+            np.float32(cv2.resize(np_image, (512, 512))))
+        mask = cv2.resize(mask.numpy(), (self.img_size, self.img_size),
+                          interpolation=cv2.INTER_NEAREST)
+        mask = mask.astype(np.uint8)
+        mask_color = mask2image(mask)
+        cv2.imwrite('mask_temp.png', mask_color)
+        mask_tensor = paddle.to_tensor(mask)
+
+        lms = futils.dlib.landmarks(image, face) * self.img_size / image.width
+        lms = lms.round()
+
+        P_np = generate_P_from_lmks(lms, self.img_size, self.img_size,
+                                    self.img_size)
+
+        mask_aug = generate_mask_aug(mask, lms)
+
+        image = self.transform(np_image)
+
+        return [
+            self.norm(image),
+            np.float32(mask_aug),
+            np.float32(P_np),
+            np.float32(mask)
+        ], face_on_image, crop_face
+
+
+class PostProcess:
+    def __init__(self, config):
+        self.denoise = True
+        self.img_size = 256
+
+    def __call__(self, source: Image, result: Image):
+        # TODO: Refract -> name, resize
+        source = np.array(source)
+        result = np.array(result)
+
+        height, width = source.shape[:2]
+        small_source = cv2.resize(source, (self.img_size, self.img_size))
+        laplacian_diff = source.astype(np.float) - cv2.resize(
+            small_source, (width, height)).astype(np.float)
+        result = (cv2.resize(result,
+                             (width, height)) + laplacian_diff).round().clip(
+                                 0, 255).astype(np.uint8)
+        if self.denoise:
+            result = cv2.fastNlMeansDenoisingColored(result)
+        result = Image.fromarray(result).convert('RGB')
+        return result
+
+
+class Inference:
+    def __init__(self, config, model_path=''):
+        self.model = build_model(config)
+        self.preprocess = PreProcess(config)
+        self.model_path = model_path
+
+    def transfer(self, source, reference, with_face=False):
+        source_input, face, crop_face = self.preprocess(source)
+        reference_input, face, crop_face = self.preprocess(reference)
+
+        consis_mask = np.float32(
+            calculate_consis_mask(source_input[1], reference_input[1]))
+        consis_mask = paddle.to_tensor(np.expand_dims(consis_mask, 0))
+
+        if not (source_input and reference_input):
+            if with_face:
+                return None, None
+            return
+        for i in range(len(source_input) - 1):
+            source_input[i] = paddle.to_tensor(
+                np.expand_dims(source_input[i], 0))
+
+        for i in range(len(reference_input) - 1):
+            reference_input[i] = paddle.to_tensor(
+                np.expand_dims(reference_input[i], 0))
+
+        input_data = {
+            'image_A': source_input[0],
+            'image_B': reference_input[0],
+            'mask_A_aug': source_input[1],
+            'mask_B_aug': reference_input[1],
+            'P_A': source_input[2],
+            'P_B': reference_input[2],
+            'consis_mask': consis_mask
+        }
+        state_dicts = load(self.model_path)
+        net = getattr(self.model, 'netG')
+        net.set_dict(state_dicts['netG'])
+        result, _ = self.model.test(input_data)
+        print('result shape: ', result.shape)
+        min_, max_ = result.min(), result.max()
+        result += -min_
+        result = paddle.divide(result, max_ - min_ + 1e-5)
+        img = toImage(result)
+
+        if with_face:
+            return img, crop_face
+        img.save('before.png')
+
+        return img
+
+
+def main(args, cfg, save_path='transferred_image.png'):
+
+    setup(args, cfg)
+
+    inference = Inference(cfg, args.model_path)
+    postprocess = PostProcess(cfg)
+
+    source = Image.open(args.source_path).convert("RGB")
+    reference_paths = list(Path(args.reference_dir).glob("*"))
+    np.random.shuffle(reference_paths)
+    for reference_path in reference_paths:
+        if not reference_path.is_file():
+            print(reference_path, "is not a valid file.")
+            continue
+
+        reference = Image.open(reference_path).convert("RGB")
+
+        # Transfer the psgan from reference to source.
+        image, face = inference.transfer(source, reference, with_face=True)
+        image.save('before.png')
+        source_crop = source.crop(
+            (face.left(), face.top(), face.right(), face.bottom()))
+        image = postprocess(source_crop, image)
+        image.save(save_path)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    cfg = get_config(args.config_file)
+    main(args, cfg)
--- a/applications/tools/video-enhance.py
+++ b/applications/tools/video-enhance.py
-import sys
-sys.path.append('.')
-
-import argparse
-import paddle
-
-from DAIN.predict import VideoFrameInterp
-from DeepRemaster.predict import DeepReasterPredictor
-from DeOldify.predict import DeOldifyPredictor
-from RealSR.predict import RealSRPredictor
-from EDVR.predict import EDVRPredictor
-
-parser = argparse.ArgumentParser(description='Fix video')
-parser.add_argument('--input', type=str, default=None, help='Input video')
-parser.add_argument('--output', type=str, default='output', help='output dir')
-parser.add_argument('--DAIN_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-parser.add_argument('--DeepRemaster_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-parser.add_argument('--DeOldify_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-parser.add_argument('--RealSR_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-parser.add_argument('--EDVR_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-# DAIN args
-parser.add_argument('--time_step',
-                    type=float,
-                    default=0.5,
-                    help='choose the time steps')
-# DeepRemaster args
-parser.add_argument('--reference_dir',
-                    type=str,
-                    default=None,
-                    help='Path to the reference image directory')
-parser.add_argument('--colorization',
-                    action='store_true',
-                    default=False,
-                    help='Remaster with colorization')
-parser.add_argument('--mindim',
-                    type=int,
-                    default=360,
-                    help='Length of minimum image edges')
-# DeOldify args
-parser.add_argument('--render_factor',
-                    type=int,
-                    default=32,
-                    help='model inputsize=render_factor*16')
-#process order support model name:[DAIN, DeepRemaster, DeOldify, RealSR, EDVR]
-parser.add_argument('--proccess_order',
-                    type=str,
-                    default='none',
-                    nargs='+',
-                    help='Process order')
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-
-    orders = args.proccess_order
-    temp_video_path = None
-
-    for order in orders:
-        print('Model {} proccess start..'.format(order))
-        if temp_video_path is None:
-            temp_video_path = args.input
-        if order == 'DAIN':
-            predictor = VideoFrameInterp(args.time_step,
-                                         args.DAIN_weight,
-                                         temp_video_path,
-                                         output_path=args.output)
-            frames_path, temp_video_path = predictor.run()
-        elif order == 'DeepRemaster':
-            paddle.disable_static()
-            predictor = DeepReasterPredictor(
-                temp_video_path,
-                args.output,
-                weight_path=args.DeepRemaster_weight,
-                colorization=args.colorization,
-                reference_dir=args.reference_dir,
-                mindim=args.mindim)
-            frames_path, temp_video_path = predictor.run()
-            paddle.enable_static()
-        elif order == 'DeOldify':
-            paddle.disable_static()
-            predictor = DeOldifyPredictor(temp_video_path,
-                                          args.output,
-                                          weight_path=args.DeOldify_weight)
-            frames_path, temp_video_path = predictor.run()
-            paddle.enable_static()
-        elif order == 'RealSR':
-            paddle.disable_static()
-            predictor = RealSRPredictor(temp_video_path,
-                                        args.output,
-                                        weight_path=args.RealSR_weight)
-            frames_path, temp_video_path = predictor.run()
-            paddle.enable_static()
-        elif order == 'EDVR':
-            predictor = EDVRPredictor(temp_video_path,
-                                      args.output,
-                                      weight_path=args.EDVR_weight)
-            frames_path, temp_video_path = predictor.run()
-
-        print('Model {} output frames path:'.format(order), frames_path)
-        print('Model {} output video path:'.format(order), temp_video_path)
-        print('Model {} proccess done!'.format(order))
--- a/configs/cyclegan_cityscapes.yaml
+++ b/configs/cyclegan_cityscapes.yaml
@@ -36,16 +36,18 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 50
-    transform:
-      load_size: 286
-      crop_size: 256
-      preprocess: resize_and_crop
-      no_flip: False
-      normalize:
-        mean:
-          (127.5, 127.5, 127.5)
-        std:
-          (127.5, 127.5, 127.5)
+    transforms:
+      - name: Resize
+        size: [286, 286]
+        interpolation: 2 #cv2.INTER_CUBIC
+      - name: RandomCrop
+        output_size: [256, 256]
+      - name: RandomHorizontalFlip
+        prob: 0.5
+      - name: Permute
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
  test:
    name: SingleDataset
    dataroot: data/cityscapes/testB
@@ -55,17 +57,14 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 50
-    transform:
-      load_size: 256
-      crop_size: 256
-      preprocess: resize_and_crop
-      no_flip: True
-      normalize:
-        mean:
-          (127.5, 127.5, 127.5)
-        std:
-          (127.5, 127.5, 127.5)
-
+    transforms:
+      - name: Resize
+        size: [256, 256]
+        interpolation: 2 #cv2.INTER_CUBIC
+      - name: Permute
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]

 optimizer:
  name: Adam

--- a/configs/cyclegan_horse2zebra.yaml
+++ b/configs/cyclegan_horse2zebra.yaml
@@ -35,16 +35,18 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 50
-    transform:
-      load_size: 286
-      crop_size: 256
-      preprocess: resize_and_crop
-      no_flip: False
-      normalize:
-        mean:
-          (127.5, 127.5, 127.5)
-        std:
-          (127.5, 127.5, 127.5)
+    transforms:
+      - name: Resize
+        size: [286, 286]
+        interpolation: 2 #cv2.INTER_CUBIC
+      - name: RandomCrop
+        output_size: [256, 256]
+      - name: RandomHorizontalFlip
+        prob: 0.5
+      - name: Permute
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
  test:
    name: SingleDataset
    dataroot: data/horse2zebra/testA
@@ -55,15 +57,14 @@ dataset:
    serial_batches: False
    pool_size: 50
    transform:
-      load_size: 256
-      crop_size: 256
-      preprocess: resize_and_crop
-      no_flip: True
-      normalize:
-        mean:
-          (127.5, 127.5, 127.5)
-        std:
-          (127.5, 127.5, 127.5)
+      transform:
+      - name: Resize
+        size: [256, 256]
+        interpolation: 2 #cv2.INTER_CUBIC
+      - name: Permute
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]

 optimizer:
  name: Adam

--- a/configs/makeup.yaml
+++ b/configs/makeup.yaml
+epochs: 100
+isTrain: True
+output_dir: tmp
+checkpoints_dir: checkpoints
+lambda_A: 10.0
+lambda_B: 10.0
+lambda_identity: 0.5
+
+model:
+  name: MakeupModel
+  generator:
+    name: GeneratorPSGANAttention
+    conv_dim: 64
+    repeat_num: 6
+  discriminator:
+    name: NLayerDiscriminator
+    ndf: 64
+    n_layers: 3
+    input_nc: 3
+    norm_type: spectral
+  gan_mode: lsgan
+
+dataset:
+  train:
+    name: MakeupDataset
+    trans_size: 256
+    dataroot: MT-Dataset
+    cls_list: [non-makeup, makeup]
+    phase: train
+    pool_size: 16
+  test:
+    name: MakeupDataset
+    trans_size: 256
+    dataroot: MT-Dataset
+    cls_list: [non-makeup, makeup]
+    phase: test
+    pool_size: 16
+
+optimizer:
+  name: Adam
+  beta1: 0.5
+
+lr_scheduler:
+  name: linear
+  learning_rate: 0.0002
+  start_epoch: 100
+  decay_epochs: 100
+
+log_config:
+  interval: 10
+  visiual_interval: 500
+
+snapshot_config:
+  interval: 1
--- a/configs/pix2pix_cityscapes.yaml
+++ b/configs/pix2pix_cityscapes.yaml
@@ -33,16 +33,23 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 0
-    transform:
-      load_size: 286
-      crop_size: 256
-      preprocess: resize_and_crop
-      no_flip: False
-      normalize:
-        mean:
-          (127.5, 127.5, 127.5)
-        std:
-          (127.5, 127.5, 127.5)
+    transforms:
+      - name: Resize
+        size: [286, 286]
+        interpolation: 2 #cv2.INTER_CUBIC
+        keys: [image, image]
+      - name: PairedRandomCrop
+        output_size: [256, 256]
+        keys: [image, image]
+      - name: PairedRandomHorizontalFlip
+        prob: 0.5
+        keys: [image, image]
+      - name: Permute
+        keys: [image, image]
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
  test:
    name: PairedDataset
    dataroot: data/cityscapes/
@@ -53,16 +60,18 @@ dataset:
    output_nc: 3
    serial_batches: True
    pool_size: 50
-    transform:
-      load_size: 256
-      crop_size: 256
-      preprocess: resize_and_crop
-      no_flip: True
-      normalize:
-        mean:
-          (127.5, 127.5, 127.5)
-        std:
-          (127.5, 127.5, 127.5)
+    transforms:
+      - name: Resize
+        size: [256, 256]
+        interpolation: 2 #cv2.INTER_CUBIC
+        keys: [image, image]
+      - name: Permute
+        keys: [image, image]
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
+

 optimizer:
  name: Adam

--- a/configs/pix2pix_cityscapes_2gpus.yaml
+++ b/configs/pix2pix_cityscapes_2gpus.yaml
@@ -32,16 +32,23 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 0
-    transform:
-      load_size: 286
-      crop_size: 256
-      preprocess: resize_and_crop
-      no_flip: False
-      normalize:
-        mean:
-          (127.5, 127.5, 127.5)
-        std:
-          (127.5, 127.5, 127.5)
+    transforms:
+      - name: Resize
+        size: [286, 286]
+        interpolation: 2 #cv2.INTER_CUBIC
+        keys: [image, image]
+      - name: PairedRandomCrop
+        output_size: [256, 256]
+        keys: [image, image]
+      - name: PairedRandomHorizontalFlip
+        prob: 0.5
+        keys: [image, image]
+      - name: Permute
+        keys: [image, image]
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
  test:
    name: PairedDataset
    dataroot: data/cityscapes/
@@ -52,16 +59,17 @@ dataset:
    output_nc: 3
    serial_batches: True
    pool_size: 50
-    transform:
-      load_size: 256
-      crop_size: 256
-      preprocess: resize_and_crop
-      no_flip: True
-      normalize:
-        mean:
-          (127.5, 127.5, 127.5)
-        std:
-          (127.5, 127.5, 127.5)
+    transforms:
+      - name: Resize
+        size: [256, 256]
+        interpolation: 2 #cv2.INTER_CUBIC
+        keys: [image, image]
+      - name: Permute
+        keys: [image, image]
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]

 optimizer:
  name: Adam

--- a/configs/pix2pix_facades.yaml
+++ b/configs/pix2pix_facades.yaml
@@ -32,16 +32,23 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 0
-    transform:
-      load_size: 286
-      crop_size: 256
-      preprocess: resize_and_crop
-      no_flip: False
-      normalize:
-        mean:
-          (127.5, 127.5, 127.5)
-        std:
-          (127.5, 127.5, 127.5)
+    transforms:
+      - name: Resize
+        size: [286, 286]
+        interpolation: 2 #cv2.INTER_CUBIC
+        keys: [image, image]
+      - name: PairedRandomCrop
+        output_size: [256, 256]
+        keys: [image, image]
+      - name: PairedRandomHorizontalFlip
+        prob: 0.5
+        keys: [image, image]
+      - name: Permute
+        keys: [image, image]
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
  test:
    name: PairedDataset
    dataroot: data/facades/
@@ -52,16 +59,17 @@ dataset:
    output_nc: 3
    serial_batches: True
    pool_size: 50
-    transform:
-      load_size: 256
-      crop_size: 256
-      preprocess: resize_and_crop
-      no_flip: True
-      normalize:
-        mean:
-          (127.5, 127.5, 127.5)
-        std:
-          (127.5, 127.5, 127.5)
+    transforms:
+      - name: Resize
+        size: [256, 256]
+        interpolation: 2 #cv2.INTER_CUBIC
+        keys: [image, image]
+      - name: Permute
+        keys: [image, image]
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]

 optimizer:
  name: Adam

--- a/ppgan/apps/__init__.py
+++ b/ppgan/apps/__init__.py
+from .dain_predictor import DAINPredictor
+from .deepremaster_predictor import DeepRemasterPredictor
+from .deoldify_predictor import DeOldifyPredictor
+from .realsr_predictor import RealSRPredictor
+from .edvr_predictor import EDVRPredictor
+from .first_order_predictor import FirstOrderPredictor
--- a/ppgan/apps/base_predictor.py
+++ b/ppgan/apps/base_predictor.py
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import os
+import cv2
+import paddle
+
+
+class BasePredictor(object):
+    def __init__(self):
+        pass
+
+    def build_inference_model(self):
+        if paddle.in_dynamic_mode():
+            # todo self.model = build_model(self.cfg)
+            pass
+        else:
+            place = paddle.fluid.framework._current_expected_place()
+            self.exe = paddle.fluid.Executor(place)
+            file_names = os.listdir(self.weight_path)
+            for file_name in file_names:
+                if file_name.find('model') > -1:
+                    model_file = file_name
+                elif file_name.find('param') > -1:
+                    param_file = file_name
+
+            self.program, self.feed_names, self.fetch_targets = paddle.static.load_inference_model(
+                dirname=self.weight_path,
+                executor=self.exe,
+                model_filename=model_file,
+                params_filename=param_file)
+            print(self.feed_names)
+
+    def base_forward(self, inputs):
+        if paddle.in_dynamic_mode():
+            out = self.model(inputs)
+        else:
+            feed_dict = {}
+            if isinstance(inputs, dict):
+                feed_dict = inputs
+            elif isinstance(inputs, (list, tuple)):
+                for i, feed_name in enumerate(self.feed_names):
+                    feed_dict[feed_name] = inputs[i]
+            else:
+                feed_dict[self.feed_names[0]] = inputs
+
+            out = self.exe.run(self.program,
+                               fetch_list=self.fetch_targets,
+                               feed=feed_dict)
+
+        return out
+
+    def is_video(self, input):
+        try:
+            cv2.VideoCapture(input)
+            return True
+        except:
+            return False
+
+    def run(self):
+        raise NotImplementedError
--- a/applications/DAIN/demo.py
+++ b/applications/DAIN/demo.py
-import os, sys
-import math
-import random
-import time
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import os
+import cv2
 import glob
 import shutil
 import numpy as np
+from tqdm import tqdm
 from imageio import imread, imsave
-import cv2

+import paddle
 import paddle.fluid as fluid
+from paddle.utils.download import get_path_from_url
+from ppgan.utils.video import video2frames, frames2video
+
+from .base_predictor import BasePredictor
+
+DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar'
+
+
+class DAINPredictor(BasePredictor):
+    def __init__(self,
+                 output_path='output',
+                 weight_path=None,
+                 time_step=None,
+                 use_gpu=True,
+                 key_frame_thread=0.,
+                 remove_duplicates=False):
+        self.output_path = os.path.join(output_path, 'DAIN')
+        if weight_path is None:
+            cur_path = os.path.abspath(os.path.dirname(__file__))
+            weight_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path)
+
+        self.weight_path = weight_path
+        self.time_step = time_step
+        self.key_frame_thread = key_frame_thread
+        self.remove_duplicates = remove_duplicates
+
+        self.build_inference_model()
+
+    def run(self, video_path):
+        frame_path_input = os.path.join(self.output_path, 'frames-input')
+        frame_path_interpolated = os.path.join(self.output_path,
+                                               'frames-interpolated')
+        frame_path_combined = os.path.join(self.output_path, 'frames-combined')
+        video_path_output = os.path.join(self.output_path, 'videos-output')
+
+        if not os.path.exists(self.output_path):
+            os.makedirs(self.output_path)
+        if not os.path.exists(frame_path_input):
+            os.makedirs(frame_path_input)
+        if not os.path.exists(frame_path_interpolated):
+            os.makedirs(frame_path_interpolated)
+        if not os.path.exists(frame_path_combined):
+            os.makedirs(frame_path_combined)
+        if not os.path.exists(video_path_output):
+            os.makedirs(video_path_output)
+
+        timestep = self.time_step
+        num_frames = int(1.0 / timestep) - 1

-import networks
-from util import *
-from my_args import args
-
-if __name__ == '__main__':
-
-    DO_MiddleBurryOther = True
-
-    video_path = args.video_path
-    output_path = args.output_path
-    frame_path_input = os.path.join(output_path, 'frames-input')
-    frame_path_interpolated = os.path.join(output_path, 'frames-interpolated')
-    frame_path_combined = os.path.join(output_path, 'frames-combined')
-    video_path_input = os.path.join(output_path, 'videos-input')
-    video_path_output = os.path.join(output_path, 'videos-output')
-
-    if not os.path.exists(output_path):
-        os.makedirs(output_path)
-    if not os.path.exists(frame_path_input):
-        os.makedirs(frame_path_input)
-    if not os.path.exists(frame_path_interpolated):
-        os.makedirs(frame_path_interpolated)
-    if not os.path.exists(frame_path_combined):
-        os.makedirs(frame_path_combined)
-    if not os.path.exists(video_path_input):
-        os.makedirs(video_path_input)
-    if not os.path.exists(video_path_output):
-        os.makedirs(video_path_output)
-
-    args.KEY_FRAME_THREAD = 0.
-    saved_model = args.saved_model
-
-    timestep = args.time_step
-    num_frames = int(1.0 / timestep) - 1
-
-    image = fluid.data(name='image',
-                       shape=[2, 1, args.channels, -1, -1],
-                       dtype='float32')
-    DAIN = networks.__dict__["DAIN_slowmotion"](channel=args.channels,
-                                                filter_size=args.filter_size,
-                                                timestep=args.time_step,
-                                                training=False)
-    out = DAIN(image)
-    out = out[0][1]
-
-    place = fluid.CUDAPlace(0)
-    exe = fluid.Executor(place)
-    exe.run(fluid.default_startup_program())
-
-    fetch_list = [out.name]
-
-    inference_program = fluid.default_main_program().clone(for_test=True)
-    inference_program = fluid.io.load_persistables(exe, saved_model,
-                                                   inference_program)
-
-    if not DO_MiddleBurryOther:
-        sys.exit()
-
-    if video_path.endswith('.mp4'):
-        videos = [video_path]
-    else:
-        videos = sorted(glob.glob(os.path.join(video_path, '*.mp4')))
-    for cnt, vid in enumerate(videos):
-        print("Interpolating video:", vid)
-        cap = cv2.VideoCapture(vid)
+        cap = cv2.VideoCapture(video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
        print("Old fps (frame rate): ", fps)

-        timestep = args.time_step
        times_interp = int(1.0 / timestep)
        r2 = str(int(fps) * times_interp)
-
        print("New fps (frame rate): ", r2)

-        # set start and end of video
-        #ss = 0
-        #t = 10
-        #ss = time.strftime('%H:%M:%S', time.gmtime(ss))
-        #t = time.strftime('%H:%M:%S', time.gmtime(t))
-        #print(r, ss, t)
-        r = None
-        ss = None
-        t = None
+        out_path = video2frames(video_path, frame_path_input)

-        out_path = dump_frames_ffmpeg(vid, frame_path_input, r, ss, t)
-
-        vidname = vid.split('/')[-1].split('.')[0]
-
-        tot_timer = AverageMeter()
-        proc_timer = AverageMeter()
-        end = time.time()
+        vidname = video_path.split('/')[-1].split('.')[0]

        frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
+        orig_frames = len(frames)
+        need_frames = orig_frames * times_interp
+
+        if self.remove_duplicates:
+            frames = self.remove_duplicate_frames(out_path)
+            left_frames = len(frames)
+            timestep = left_frames / need_frames
+            num_frames = int(1.0 / timestep) - 1

        img = imread(frames[0])

@@ -110,7 +99,7 @@ if __name__ == '__main__':
        int_height = img.shape[0]
        channel = img.shape[2]
        if not channel == 3:
-            continue
+            return

        if int_width != ((int_width >> 7) << 7):
            int_width_pad = (((int_width >> 7) + 1) << 7)  # more than necessary
@@ -132,16 +121,13 @@ if __name__ == '__main__':
            padding_bottom = 32

        frame_num = len(frames)
-        print(os.path.join(frame_path_input, vidname, '*.png'))
-        print('processing {} frames, from video: {}'.format(frame_num, vid))

        if not os.path.exists(os.path.join(frame_path_interpolated, vidname)):
            os.makedirs(os.path.join(frame_path_interpolated, vidname))
        if not os.path.exists(os.path.join(frame_path_combined, vidname)):
            os.makedirs(os.path.join(frame_path_combined, vidname))

-        for i in range(frame_num - 1):
-            print(frames[i])
+        for i in tqdm(range(frame_num - 1)):
            first = frames[i]
            second = frames[i + 1]

@@ -155,79 +141,116 @@ if __name__ == '__main__':
            img_second_gray = img_second_gray.flatten(order='C')
            corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1]
            key_frame = False
-            if corr < args.KEY_FRAME_THREAD:
+            if corr < self.key_frame_thread:
                key_frame = True
            '''-------------------------------------------------------'''

            X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
            X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255

-            if key_frame:
-                y_ = [
-                    np.transpose(255.0 * X0.clip(0, 1.0), (1, 2, 0))
-                    for i in range(num_frames)
-                ]
-            else:
-                assert (X0.shape[1] == X1.shape[1])
-                assert (X0.shape[2] == X1.shape[2])
-
-                print("size before padding ", X0.shape)
-                X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
-                    (padding_left, padding_right)), mode='edge')
-                X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
-                    (padding_left, padding_right)), mode='edge')
-                print("size after padding ", X0.shape)
-
-                X0 = np.expand_dims(X0, axis=0)
-                X1 = np.expand_dims(X1, axis=0)
-
-                X0 = np.expand_dims(X0, axis=0)
-                X1 = np.expand_dims(X1, axis=0)
-
-                X = np.concatenate((X0, X1), axis=0)
-
-                proc_end = time.time()
-                o = exe.run(inference_program,
-                            fetch_list=fetch_list,
-                            feed={"image": X})
-                y_ = o[0]
-
-                proc_timer.update(time.time() - proc_end)
-                tot_timer.update(time.time() - end)
-                end = time.time()
-                print("*******current image process time \t " +
-                      str(time.time() - proc_end) + "s ******")
-
-                y_ = [
-                    np.transpose(
-                        255.0 * item.clip(
-                            0, 1.0)[0, :, padding_top:padding_top + int_height,
-                                    padding_left:padding_left + int_width],
-                        (1, 2, 0)) for item in y_
-                ]
-                time_offsets = [
-                    kk * timestep for kk in range(1, 1 + num_frames, 1)
-                ]
-
-                count = 1
-                for item, time_offset in zip(y_, time_offsets):
-                    out_dir = os.path.join(
-                        frame_path_interpolated, vidname,
-                        "{:0>4d}_{:0>4d}.png".format(i, count))
-                    count = count + 1
-                    imsave(out_dir, np.round(item).astype(np.uint8))
-
-        timestep = args.time_step
+            assert (X0.shape[1] == X1.shape[1])
+            assert (X0.shape[2] == X1.shape[2])
+
+            X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
+                (padding_left, padding_right)), mode='edge')
+            X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
+                (padding_left, padding_right)), mode='edge')
+
+            X0 = np.expand_dims(X0, axis=0)
+            X1 = np.expand_dims(X1, axis=0)
+
+            X0 = np.expand_dims(X0, axis=0)
+            X1 = np.expand_dims(X1, axis=0)
+
+            X = np.concatenate((X0, X1), axis=0)
+
+            o = self.base_forward(X)
+
+            y_ = o[0]
+
+            y_ = [
+                np.transpose(
+                    255.0 * item.clip(
+                        0, 1.0)[0, :, padding_top:padding_top + int_height,
+                                padding_left:padding_left + int_width],
+                    (1, 2, 0)) for item in y_
+            ]
+            time_offsets = [kk * timestep for kk in range(1, 1 + num_frames, 1)]
+
+            count = 1
+            for item, time_offset in zip(y_, time_offsets):
+                out_dir = os.path.join(frame_path_interpolated, vidname,
+                                       "{:0>6d}_{:0>4d}.png".format(i, count))
+                count = count + 1
+                imsave(out_dir, np.round(item).astype(np.uint8))
+
        num_frames = int(1.0 / timestep) - 1

        input_dir = os.path.join(frame_path_input, vidname)
        interpolated_dir = os.path.join(frame_path_interpolated, vidname)
        combined_dir = os.path.join(frame_path_combined, vidname)
-        combine_frames(input_dir, interpolated_dir, combined_dir, num_frames)
+        self.combine_frames(input_dir, interpolated_dir, combined_dir,
+                            num_frames)

        frame_pattern_combined = os.path.join(frame_path_combined, vidname,
                                              '%08d.png')
        video_pattern_output = os.path.join(video_path_output, vidname + '.mp4')
        if os.path.exists(video_pattern_output):
            os.remove(video_pattern_output)
-        frames_to_video_ffmpeg(frame_pattern_combined, video_pattern_output, r2)
+        frames2video(frame_pattern_combined, video_pattern_output, r2)
+
+        return frame_pattern_combined, video_pattern_output
+
+    def combine_frames(self, input, interpolated, combined, num_frames):
+        frames1 = sorted(glob.glob(os.path.join(input, '*.png')))
+        frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png')))
+        num1 = len(frames1)
+        num2 = len(frames2)
+
+        for i in range(num1):
+            src = frames1[i]
+            imgname = int(src.split('/')[-1].split('.')[-2])
+            assert i == imgname
+            dst = os.path.join(combined,
+                               '{:08d}.png'.format(i * (num_frames + 1)))
+            shutil.copy2(src, dst)
+            if i < num1 - 1:
+                try:
+                    for k in range(num_frames):
+                        src = frames2[i * num_frames + k]
+                        dst = os.path.join(
+                            combined,
+                            '{:08d}.png'.format(i * (num_frames + 1) + k + 1))
+                        shutil.copy2(src, dst)
+                except Exception as e:
+                    print(e)
+
+    def remove_duplicate_frames(self, paths):
+        def dhash(image, hash_size=8):
+            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+            resized = cv2.resize(gray, (hash_size + 1, hash_size))
+            diff = resized[:, 1:] > resized[:, :-1]
+            return sum([2**i for (i, v) in enumerate(diff.flatten()) if v])
+
+        hashes = {}
+        image_paths = sorted(glob.glob(os.path.join(paths, '*.png')))
+        for image_path in image_paths:
+            image = cv2.imread(image_path)
+            h = dhash(image)
+            p = hashes.get(h, [])
+            p.append(image_path)
+            hashes[h] = p
+
+        for (h, hashed_paths) in hashes.items():
+            if len(hashed_paths) > 1:
+                for p in hashed_paths[1:]:
+                    os.remove(p)
+
+        frames = sorted(glob.glob(os.path.join(paths, '*.png')))
+        for fid, frame in enumerate(frames):
+            new_name = '{:08d}'.format(fid) + '.png'
+            new_name = os.path.join(paths, new_name)
+            os.rename(frame, new_name)
+
+        frames = sorted(glob.glob(os.path.join(paths, '*.png')))
+        return frames
--- a/applications/DeepRemaster/predict.py
+++ b/applications/DeepRemaster/predict.py
-import os
-import sys
-
-cur_path = os.path.abspath(os.path.dirname(__file__))
-sys.path.append(cur_path)
-
-import paddle
-import paddle.nn as nn
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.

+import os
 import cv2
-from PIL import Image
+import subprocess
 import numpy as np
 from tqdm import tqdm
-import argparse
-import subprocess
-import utils
+from PIL import Image
+from skimage import color
+
+import paddle
 from ppgan.models.generators.remaster import NetworkR, NetworkC
 from paddle.utils.download import get_path_from_url
+from .base_predictor import BasePredictor

 DEEPREMASTER_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/deep_remaster.pdparams'

-parser = argparse.ArgumentParser(description='Remastering')
-parser.add_argument('--input', type=str, default=None, help='Input video')
-parser.add_argument('--output', type=str, default='output', help='output dir')
-parser.add_argument('--reference_dir',
-                    type=str,
-                    default=None,
-                    help='Path to the reference image directory')
-parser.add_argument('--colorization',
-                    action='store_true',
-                    default=False,
-                    help='Remaster without colorization')
-parser.add_argument('--mindim',
-                    type=int,
-                    default='360',
-                    help='Length of minimum image edges')
-
-
-class DeepReasterPredictor:
+
+def convertLAB2RGB(lab):
+    lab[:, :, 0:1] = lab[:, :, 0:1] * 100  # [0, 1] -> [0, 100]
+    lab[:, :, 1:3] = np.clip(lab[:, :, 1:3] * 255 - 128, -100,
+                             100)  # [0, 1] -> [-128, 128]
+    rgb = color.lab2rgb(lab.astype(np.float64))
+    return rgb
+
+
+def convertRGB2LABTensor(rgb):
+    lab = color.rgb2lab(
+        np.asarray(rgb))  # RGB -> LAB L[0, 100] a[-127, 128] b[-128, 127]
+    ab = np.clip(lab[:, :, 1:3] + 128, 0, 255)  # AB --> [0, 255]
+    ab = paddle.to_tensor(ab.astype('float32')) / 255.
+    L = lab[:, :, 0] * 2.55  # L --> [0, 255]
+    L = Image.fromarray(np.uint8(L))
+
+    L = paddle.to_tensor(np.array(L).astype('float32')[..., np.newaxis] / 255.0)
+    return L, ab
+
+
+def addMergin(img, target_w, target_h, background_color=(0, 0, 0)):
+    width, height = img.size
+    if width == target_w and height == target_h:
+        return img
+    scale = max(target_w, target_h) / max(width, height)
+    width = int(width * scale / 16.) * 16
+    height = int(height * scale / 16.) * 16
+
+    img = img.resize((width, height), Image.BICUBIC)
+    xp = (target_w - width) // 2
+    yp = (target_h - height) // 2
+    result = Image.new(img.mode, (target_w, target_h), background_color)
+    result.paste(img, (xp, yp))
+    return result
+
+
+class DeepRemasterPredictor(BasePredictor):
    def __init__(self,
-                 input,
-                 output,
+                 output='output',
                 weight_path=None,
                 colorization=False,
                 reference_dir=None,
                 mindim=360):
-        self.input = input
        self.output = os.path.join(output, 'DeepRemaster')
        self.colorization = colorization
        self.reference_dir = reference_dir
        self.mindim = mindim

        if weight_path is None:
+            cur_path = os.path.abspath(os.path.dirname(__file__))
            weight_path = get_path_from_url(DEEPREMASTER_WEIGHT_URL, cur_path)

-        state_dict, _ = paddle.load(weight_path)
+        self.weight_path = weight_path
+
+        state_dict = paddle.load(weight_path)

        self.modelR = NetworkR()
        self.modelR.load_dict(state_dict['modelR'])
@@ -63,7 +92,7 @@ class DeepReasterPredictor:
            self.modelC.load_dict(state_dict['modelC'])
            self.modelC.eval()

-    def run(self):
+    def run(self, video_path):
        outputdir = self.output
        outputdir_in = os.path.join(outputdir, 'input/')
        os.makedirs(outputdir_in, exist_ok=True)
@@ -94,9 +123,7 @@ class DeepReasterPredictor:

                refimgs = []
                for i, v in enumerate(refs):
-                    refimg = utils.addMergin(v,
-                                             target_w=target_w,
-                                             target_h=target_h)
+                    refimg = addMergin(v, target_w=target_w, target_h=target_h)
                    refimg = np.array(refimg).astype('float32').transpose(
                        2, 0, 1) / 255.0
                    refimgs.append(refimg)
@@ -105,7 +132,7 @@ class DeepReasterPredictor:
                refimgs = paddle.unsqueeze(refimgs, 0)

        # Load video
-        cap = cv2.VideoCapture(self.input)
+        cap = cv2.VideoCapture(video_path)
        nframes = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        v_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        v_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
@@ -156,7 +183,7 @@ class DeepReasterPredictor:
                    elif nchannels == 3:
                        cv2.imwrite(outputdir_in + '%07d.png' % index, frame)
                        frame = frame[:, :, ::-1]  ## BGR -> RGB
-                        frame_l, frame_ab = utils.convertRGB2LABTensor(frame)
+                        frame_l, frame_ab = convertRGB2LABTensor(frame)
                        frame_l = frame_l.transpose([2, 0, 1])
                        frame_ab = frame_ab.transpose([2, 0, 1])
                        frame_l = frame_l.reshape([
@@ -193,7 +220,7 @@ class DeepReasterPredictor:
                                (out_l, out_ab),
                                axis=0).detach().numpy().transpose((1, 2, 0))
                            out = Image.fromarray(
-                                np.uint8(utils.convertLAB2RGB(out) * 255))
+                                np.uint8(convertLAB2RGB(out) * 255))
                            out.save(outputdir_out + '%07d.png' % (index))
                        else:
                            raise ValueError('channels of imag3 must be 3!')
@@ -214,7 +241,7 @@ class DeepReasterPredictor:
                        output = paddle.concat(
                            (out_l, out_c), axis=0).numpy().transpose((1, 2, 0))
                        output = Image.fromarray(
-                            np.uint8(utils.convertLAB2RGB(output) * 255))
+                            np.uint8(convertLAB2RGB(output) * 255))
                        output.save(outputdir_out + '%07d.png' % index)

                it = it + 1
@@ -222,7 +249,7 @@ class DeepReasterPredictor:

            # Save result videos
            outfile = os.path.join(outputdir,
-                                   self.input.split('/')[-1].split('.')[0])
+                                   video_path.split('/')[-1].split('.')[0])
            cmd = 'ffmpeg -y -r %d -i %s%%07d.png -vcodec libx264 -pix_fmt yuv420p -r %d %s_in.mp4' % (
                fps, outputdir_in, fps, outfile)
            subprocess.call(cmd, shell=True)
@@ -236,14 +263,3 @@ class DeepReasterPredictor:
        cap.release()
        pbar.close()
        return outputdir_out, '%s_out.mp4' % outfile
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    paddle.disable_static()
-    predictor = DeepReasterPredictor(args.input,
-                                     args.output,
-                                     colorization=args.colorization,
-                                     reference_dir=args.reference_dir,
-                                     mindim=args.mindim)
-    predictor.run()
--- a/applications/DeOldify/predict.py
+++ b/applications/DeOldify/predict.py
-import os
-import sys
-
-cur_path = os.path.abspath(os.path.dirname(__file__))
-sys.path.append(cur_path)
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.

+import os
 import cv2
 import glob
-import argparse
 import numpy as np
-import paddle
-import pickle
-
 from PIL import Image
 from tqdm import tqdm
-from paddle import fluid
+
+import paddle
 from paddle.utils.download import get_path_from_url
 from ppgan.utils.video import frames2video, video2frames
 from ppgan.models.generators.deoldify import build_model

-parser = argparse.ArgumentParser(description='DeOldify')
-parser.add_argument('--input', type=str, default='none', help='Input video')
-parser.add_argument('--output', type=str, default='output', help='output dir')
-parser.add_argument('--render_factor',
-                    type=int,
-                    default=32,
-                    help='model inputsize=render_factor*16')
-parser.add_argument('--weight_path',
-                    type=str,
-                    default=None,
-                    help='Path to the reference image directory')
+from .base_predictor import BasePredictor

 DEOLDIFY_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams'


-class DeOldifyPredictor():
-    def __init__(self,
-                 input,
-                 output,
-                 batch_size=1,
-                 weight_path=None,
-                 render_factor=32):
-        self.input = input
+class DeOldifyPredictor(BasePredictor):
+    def __init__(self, output='output', weight_path=None, render_factor=32):
+        # self.input = input
        self.output = os.path.join(output, 'DeOldify')
        self.render_factor = render_factor
        self.model = build_model()
        if weight_path is None:
+            cur_path = os.path.abspath(os.path.dirname(__file__))
            weight_path = get_path_from_url(DEOLDIFY_WEIGHT_URL, cur_path)

-        state_dict, _ = paddle.load(weight_path)
+        state_dict = paddle.load(weight_path)
        self.model.load_dict(state_dict)
        self.model.eval()

@@ -85,8 +77,14 @@ class DeOldifyPredictor():
        final = Image.fromarray(final)
        return final

-    def run_single(self, img_path):
-        ori_img = Image.open(img_path).convert('LA').convert('RGB')
+    def run_image(self, img):
+        if isinstance(img, str):
+            ori_img = Image.open(img).convert('LA').convert('RGB')
+        elif isinstance(img, np.ndarray):
+            ori_img = Image.fromarray(img).convert('LA').convert('RGB')
+        elif isinstance(img, Image.Image):
+            ori_img = img
+
        img = self.norm(ori_img, self.render_factor)
        x = paddle.to_tensor(img[np.newaxis, ...])
        out = self.model(x)
@@ -97,9 +95,8 @@ class DeOldifyPredictor():
        pred_img = self.post_process(pred_img, ori_img)
        return pred_img

-    def run(self):
-        vid = self.input
-        base_name = os.path.basename(vid).split('.')[0]
+    def run_video(self, video):
+        base_name = os.path.basename(video).split('.')[0]
        output_path = os.path.join(self.output, base_name)
        pred_frame_path = os.path.join(output_path, 'frames_pred')

@@ -109,15 +106,15 @@ class DeOldifyPredictor():
        if not os.path.exists(pred_frame_path):
            os.makedirs(pred_frame_path)

-        cap = cv2.VideoCapture(vid)
+        cap = cv2.VideoCapture(video)
        fps = cap.get(cv2.CAP_PROP_FPS)

-        out_path = video2frames(vid, output_path)
+        out_path = video2frames(video, output_path)

        frames = sorted(glob.glob(os.path.join(out_path, '*.png')))

        for frame in tqdm(frames):
-            pred_img = self.run_single(frame)
+            pred_img = self.run_image(frame)

            frame_name = os.path.basename(frame)
            pred_img.save(os.path.join(pred_frame_path, frame_name))
@@ -130,15 +127,14 @@ class DeOldifyPredictor():

        return frame_pattern_combined, vid_out_path

+    def run(self, input):
+        if self.is_video(input):
+            return self.run_video(input)
+        else:
+            pred_img = self.run_image(input)

-if __name__ == '__main__':
-    paddle.disable_static()
-    args = parser.parse_args()
-
-    predictor = DeOldifyPredictor(args.input,
-                                  args.output,
-                                  weight_path=args.weight_path,
-                                  render_factor=args.render_factor)
-    frames_path, temp_video_path = predictor.run()
+            if self.output:
+                base_name = os.path.basename(input)
+                pred_img.save(os.path.join(self.output, base_name + '.png'))

-    print('output video path:', temp_video_path)
+            return pred_img
--- a/applications/EDVR/predict.py
+++ b/applications/EDVR/predict.py
@@ -13,44 +13,18 @@
 #limitations under the License.

 import os
-import sys
-
-cur_path = os.path.abspath(os.path.dirname(__file__))
-sys.path.append(cur_path)
-
+import cv2
 import time
-import argparse
-import ast
 import glob
 import numpy as np
-
-import paddle.fluid as fluid
-import cv2
-
 from tqdm import tqdm
-from data import EDVRDataset
+
 from paddle.utils.download import get_path_from_url
 from ppgan.utils.video import frames2video, video2frames

-EDVR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar'
-
+from .base_predictor import BasePredictor

-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--input',
-                        type=str,
-                        default=None,
-                        help='input video path')
-    parser.add_argument('--output',
-                        type=str,
-                        default='output',
-                        help='output path')
-    parser.add_argument('--weight_path',
-                        type=str,
-                        default=None,
-                        help='weight path')
-    args = parser.parse_args()
-    return args
+EDVR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar'


 def get_img(pred):
@@ -72,29 +46,107 @@ def save_img(img, framename):
    cv2.imwrite(framename, img)


-class EDVRPredictor:
-    def __init__(self, input, output, weight_path=None):
+def read_img(path, size=None, is_gt=False):
+    """read image by cv2
+    return: Numpy float32, HWC, BGR, [0,1]"""
+    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
+
+    img = img.astype(np.float32) / 255.
+    if img.ndim == 2:
+        img = np.expand_dims(img, axis=2)
+
+    if img.shape[2] > 3:
+        img = img[:, :, :3]
+    return img
+
+
+def get_test_neighbor_frames(crt_i, N, max_n, padding='new_info'):
+    """Generate an index list for reading N frames from a sequence of images
+    Args:
+        crt_i (int): current center index
+        max_n (int): max number of the sequence of images (calculated from 1)
+        N (int): reading N frames
+        padding (str): padding mode, one of replicate | reflection | new_info | circle
+            Example: crt_i = 0, N = 5
+            replicate: [0, 0, 0, 1, 2]
+            reflection: [2, 1, 0, 1, 2]
+            new_info: [4, 3, 0, 1, 2]
+            circle: [3, 4, 0, 1, 2]
+
+    Returns:
+        return_l (list [int]): a list of indexes
+    """
+    max_n = max_n - 1
+    n_pad = N // 2
+    return_l = []
+
+    for i in range(crt_i - n_pad, crt_i + n_pad + 1):
+        if i < 0:
+            if padding == 'replicate':
+                add_idx = 0
+            elif padding == 'reflection':
+                add_idx = -i
+            elif padding == 'new_info':
+                add_idx = (crt_i + n_pad) + (-i)
+            elif padding == 'circle':
+                add_idx = N + i
+            else:
+                raise ValueError('Wrong padding mode')
+        elif i > max_n:
+            if padding == 'replicate':
+                add_idx = max_n
+            elif padding == 'reflection':
+                add_idx = max_n * 2 - i
+            elif padding == 'new_info':
+                add_idx = (crt_i - n_pad) - (i - max_n)
+            elif padding == 'circle':
+                add_idx = i - N
+            else:
+                raise ValueError('Wrong padding mode')
+        else:
+            add_idx = i
+        return_l.append(add_idx)
+
+    return return_l
+
+
+class EDVRDataset:
+    def __init__(self, frame_paths):
+        self.frames = frame_paths
+
+    def __getitem__(self, index):
+        indexs = get_test_neighbor_frames(index, 5, len(self.frames))
+        frame_list = []
+        for i in indexs:
+            img = read_img(self.frames[i])
+            frame_list.append(img)
+
+        img_LQs = np.stack(frame_list, axis=0)
+        # BGR to RGB, HWC to CHW, numpy to tensor
+        img_LQs = img_LQs[:, :, :, [2, 1, 0]]
+        img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32')
+
+        return img_LQs, self.frames[index]
+
+    def __len__(self):
+        return len(self.frames)
+
+
+class EDVRPredictor(BasePredictor):
+    def __init__(self, output='output', weight_path=None):
        self.input = input
        self.output = os.path.join(output, 'EDVR')

-        place = fluid.CUDAPlace(
-            0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
-        self.exe = fluid.Executor(place)
-
        if weight_path is None:
+            cur_path = os.path.abspath(os.path.dirname(__file__))
            weight_path = get_path_from_url(EDVR_WEIGHT_URL, cur_path)

-        model_filename = 'EDVR_model.pdmodel'
-        params_filename = 'EDVR_params.pdparams'
+        self.weight_path = weight_path

-        out = fluid.io.load_inference_model(dirname=weight_path,
-                                            model_filename=model_filename,
-                                            params_filename=params_filename,
-                                            executor=self.exe)
-        self.infer_prog, self.feed_list, self.fetch_list = out
+        self.build_inference_model()

-    def run(self):
-        vid = self.input
+    def run(self, video_path):
+        vid = video_path
        base_name = os.path.basename(vid).split('.')[0]
        output_path = os.path.join(self.output, base_name)
        pred_frame_path = os.path.join(output_path, 'frames_pred')
@@ -119,11 +171,9 @@ class EDVRPredictor:
        for infer_iter, data in enumerate(tqdm(dataset)):
            data_feed_in = [data[0]]

-            infer_outs = self.exe.run(
-                self.infer_prog,
-                fetch_list=self.fetch_list,
-                feed={self.feed_list[0]: np.array(data_feed_in)})
-            infer_result_list = [item for item in infer_outs]
+            outs = self.base_forward(np.array(data_feed_in))
+
+            infer_result_list = [item for item in outs]

            frame_path = data[1]

@@ -144,9 +194,3 @@ class EDVRPredictor:
        frames2video(frame_pattern_combined, vid_out_path, str(int(fps)))

        return frame_pattern_combined, vid_out_path
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    predictor = EDVRPredictor(args.input, args.output, args.weight_path)
-    predictor.run()
--- a/ppgan/apps/first_order_predictor.py
+++ b/ppgan/apps/first_order_predictor.py
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import os
+import sys
+
+import yaml
+import pickle
+import imageio
+import numpy as np
+from tqdm import tqdm
+from skimage import img_as_ubyte
+from skimage.transform import resize
+from scipy.spatial import ConvexHull
+
+import paddle
+from paddle.utils.download import get_path_from_url
+from ppgan.utils.animate import normalize_kp
+from ppgan.modules.keypoint_detector import KPDetector
+from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator
+
+from .base_predictor import BasePredictor
+
+
+class FirstOrderPredictor(BasePredictor):
+    def __init__(self,
+                 output='output',
+                 weight_path=None,
+                 config=None,
+                 relative=False,
+                 adapt_scale=False,
+                 find_best_frame=False,
+                 best_frame=None):
+        if config is not None and isinstance(config, str):
+            self.cfg = yaml.load(config)
+        elif isinstance(config, dict):
+            self.cfg = config
+        elif config is None:
+            self.cfg = {
+                'model_params': {
+                    'common_params': {
+                        'num_kp': 10,
+                        'num_channels': 3,
+                        'estimate_jacobian': True
+                    },
+                    'kp_detector_params': {
+                        'temperature': 0.1,
+                        'block_expansion': 32,
+                        'max_features': 1024,
+                        'scale_factor': 0.25,
+                        'num_blocks': 5
+                    },
+                    'generator_params': {
+                        'block_expansion': 64,
+                        'max_features': 512,
+                        'num_down_blocks': 2,
+                        'num_bottleneck_blocks': 6,
+                        'estimate_occlusion_map': True,
+                        'dense_motion_params': {
+                            'block_expansion': 64,
+                            'max_features': 1024,
+                            'num_blocks': 5,
+                            'scale_factor': 0.25
+                        }
+                    }
+                }
+            }
+            if weight_path is None:
+                vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-cpk.pdparams'
+                cur_path = os.path.abspath(os.path.dirname(__file__))
+                weight_path = get_path_from_url(vox_cpk_weight_url, cur_path)
+
+        self.weight_path = weight_path
+        self.output = output
+        self.relative = relative
+        self.adapt_scale = adapt_scale
+        self.find_best_frame = find_best_frame
+        self.best_frame = best_frame
+        self.generator, self.kp_detector = self.load_checkpoints(
+            self.cfg, self.weight_path)
+
+    def run(self, source_image, driving_video):
+        source_image = imageio.imread(source_image)
+        reader = imageio.get_reader(driving_video)
+        fps = reader.get_meta_data()['fps']
+        driving_video = []
+        try:
+            for im in reader:
+                driving_video.append(im)
+        except RuntimeError:
+            pass
+        reader.close()
+
+        source_image = resize(source_image, (256, 256))[..., :3]
+        driving_video = [
+            resize(frame, (256, 256))[..., :3] for frame in driving_video
+        ]
+
+        if self.find_best_frame or self.best_frame is not None:
+            i = self.best_frame if self.best_frame is not None else self.find_best_frame_func(
+                source_image, driving_video)
+
+            print("Best frame: " + str(i))
+            driving_forward = driving_video[i:]
+            driving_backward = driving_video[:(i + 1)][::-1]
+            predictions_forward = self.make_animation(
+                source_image,
+                driving_forward,
+                self.generator,
+                self.kp_detector,
+                relative=self.relative,
+                adapt_movement_scale=self.adapt_scale)
+            predictions_backward = self.make_animation(
+                source_image,
+                driving_backward,
+                self.generator,
+                self.kp_detector,
+                relative=self.relative,
+                adapt_movement_scale=self.adapt_scale)
+            predictions = predictions_backward[::-1] + predictions_forward[1:]
+        else:
+            predictions = self.make_animation(
+                source_image,
+                driving_video,
+                self.generator,
+                self.kp_detector,
+                relative=self.relative,
+                adapt_movement_scale=self.adapt_scale)
+            imageio.mimsave(os.path.join(self.output, 'result.mp4'),
+                            [img_as_ubyte(frame) for frame in predictions],
+                            fps=fps)
+
+    def load_checkpoints(self, config, checkpoint_path):
+
+        generator = OcclusionAwareGenerator(
+            **config['model_params']['generator_params'],
+            **config['model_params']['common_params'])
+
+        kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
+                                 **config['model_params']['common_params'])
+
+        checkpoint = paddle.load(self.weight_path)
+        generator.set_state_dict(checkpoint['generator'])
+
+        kp_detector.set_state_dict(checkpoint['kp_detector'])
+
+        generator.eval()
+        kp_detector.eval()
+
+        return generator, kp_detector
+
+    def make_animation(self,
+                       source_image,
+                       driving_video,
+                       generator,
+                       kp_detector,
+                       relative=True,
+                       adapt_movement_scale=True):
+        with paddle.no_grad():
+            predictions = []
+            source = paddle.to_tensor(source_image[np.newaxis].astype(
+                np.float32)).transpose([0, 3, 1, 2])
+
+            driving = paddle.to_tensor(
+                np.array(driving_video)[np.newaxis].astype(
+                    np.float32)).transpose([0, 4, 1, 2, 3])
+            kp_source = kp_detector(source)
+            kp_driving_initial = kp_detector(driving[:, :, 0])
+
+            for frame_idx in tqdm(range(driving.shape[2])):
+                driving_frame = driving[:, :, frame_idx]
+                kp_driving = kp_detector(driving_frame)
+                kp_norm = normalize_kp(
+                    kp_source=kp_source,
+                    kp_driving=kp_driving,
+                    kp_driving_initial=kp_driving_initial,
+                    use_relative_movement=relative,
+                    use_relative_jacobian=relative,
+                    adapt_movement_scale=adapt_movement_scale)
+                out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
+
+                predictions.append(
+                    np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0])
+        return predictions
+
+    def find_best_frame_func(self, source, driving):
+        import face_alignment
+
+        def normalize_kp(kp):
+            kp = kp - kp.mean(axis=0, keepdims=True)
+            area = ConvexHull(kp[:, :2]).volume
+            area = np.sqrt(area)
+            kp[:, :2] = kp[:, :2] / area
+            return kp
+
+        fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
+                                          flip_input=True)
+
+        kp_source = fa.get_landmarks(255 * source)[0]
+        kp_source = normalize_kp(kp_source)
+        norm = float('inf')
+        frame_num = 0
+        for i, image in tqdm(enumerate(driving)):
+            kp_driving = fa.get_landmarks(255 * image)[0]
+            kp_driving = normalize_kp(kp_driving)
+            new_norm = (np.abs(kp_source - kp_driving)**2).sum()
+            if new_norm < norm:
+                norm = new_norm
+                frame_num = i
+        return frame_num
--- a/applications/RealSR/predict.py
+++ b/applications/RealSR/predict.py
-import os
-import sys
-
-cur_path = os.path.abspath(os.path.dirname(__file__))
-sys.path.append(cur_path)
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.

+import os
 import cv2
 import glob
-import argparse
 import numpy as np
-import paddle
-import pickle
-
 from PIL import Image
 from tqdm import tqdm

+import paddle
 from ppgan.models.generators import RRDBNet
 from ppgan.utils.video import frames2video, video2frames
 from paddle.utils.download import get_path_from_url
-
-parser = argparse.ArgumentParser(description='RealSR')
-parser.add_argument('--input', type=str, default='none', help='Input video')
-parser.add_argument('--output', type=str, default='output', help='output dir')
-parser.add_argument('--weight_path',
-                    type=str,
-                    default=None,
-                    help='Path to the reference image directory')
+from .base_predictor import BasePredictor

 REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DF2K_JPEG.pdparams'


-class RealSRPredictor():
-    def __init__(self, input, output, batch_size=1, weight_path=None):
+class RealSRPredictor(BasePredictor):
+    def __init__(self, output='output', weight_path=None):
        self.input = input
        self.output = os.path.join(output, 'RealSR')
        self.model = RRDBNet(3, 3, 64, 23)
        if weight_path is None:
+            cur_path = os.path.abspath(os.path.dirname(__file__))
            weight_path = get_path_from_url(REALSR_WEIGHT_URL, cur_path)

-        state_dict, _ = paddle.load(weight_path)
+        state_dict = paddle.load(weight_path)
        self.model.load_dict(state_dict)
        self.model.eval()

@@ -49,8 +49,14 @@ class RealSRPredictor():
        img = img.transpose((1, 2, 0))
        return (img * 255).clip(0, 255).astype('uint8')

-    def run_single(self, img_path):
-        ori_img = Image.open(img_path).convert('RGB')
+    def run_image(self, img):
+        if isinstance(img, str):
+            ori_img = Image.open(img).convert('RGB')
+        elif isinstance(img, np.ndarray):
+            ori_img = Image.fromarray(img).convert('RGB')
+        elif isinstance(img, Image.Image):
+            ori_img = img
+
        img = self.norm(ori_img)
        x = paddle.to_tensor(img[np.newaxis, ...])
        out = self.model(x)
@@ -59,9 +65,8 @@ class RealSRPredictor():
        pred_img = Image.fromarray(pred_img)
        return pred_img

-    def run(self):
-        vid = self.input
-        base_name = os.path.basename(vid).split('.')[0]
+    def run_video(self, video):
+        base_name = os.path.basename(video).split('.')[0]
        output_path = os.path.join(self.output, base_name)
        pred_frame_path = os.path.join(output_path, 'frames_pred')

@@ -71,15 +76,15 @@ class RealSRPredictor():
        if not os.path.exists(pred_frame_path):
            os.makedirs(pred_frame_path)

-        cap = cv2.VideoCapture(vid)
+        cap = cv2.VideoCapture(video)
        fps = cap.get(cv2.CAP_PROP_FPS)

-        out_path = video2frames(vid, output_path)
+        out_path = video2frames(video, output_path)

        frames = sorted(glob.glob(os.path.join(out_path, '*.png')))

        for frame in tqdm(frames):
-            pred_img = self.run_single(frame)
+            pred_img = self.run_image(frame)

            frame_name = os.path.basename(frame)
            pred_img.save(os.path.join(pred_frame_path, frame_name))
@@ -92,14 +97,14 @@ class RealSRPredictor():

        return frame_pattern_combined, vid_out_path

+    def run(self, input):
+        if self.is_video(input):
+            return self.run_video(input)
+        else:
+            pred_img = self.run_image(input)

-if __name__ == '__main__':
-    paddle.disable_static()
-    args = parser.parse_args()
-
-    predictor = RealSRPredictor(args.input,
-                                args.output,
-                                weight_path=args.weight_path)
-    frames_path, temp_video_path = predictor.run()
+            if self.output:
+                base_name = os.path.basename(input)
+                pred_img.save(os.path.join(self.output, base_name + '.png'))

-    print('output video path:', temp_video_path)
+            return pred_img
--- a/ppgan/datasets/__init__.py
+++ b/ppgan/datasets/__init__.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .unpaired_dataset import UnpairedDataset
 from .single_dataset import SingleDataset
 from .paired_dataset import PairedDataset
-from .sr_image_dataset import SRImageDataset
\ No newline at end of file
+from .sr_image_dataset import SRImageDataset
+from .makeup_dataset import MakeupDataset
--- a/ppgan/datasets/makeup_dataset.py
+++ b/ppgan/datasets/makeup_dataset.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cv2
+import os.path
+from .base_dataset import BaseDataset, get_transform
+from .transforms.makeup_transforms import get_makeup_transform
+import paddle.vision.transforms as T
+from PIL import Image
+import random
+import numpy as np
+from ..utils.preprocess import *
+
+from .builder import DATASETS
+
+
+@DATASETS.register()
+class MakeupDataset(BaseDataset):
+    def __init__(self, cfg):
+        """Initialize this dataset class.
+
+        Parameters:
+            opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
+        """
+        BaseDataset.__init__(self, cfg)
+        self.image_path = cfg.dataroot
+        self.mode = cfg.phase
+        self.transform = get_makeup_transform(cfg)
+
+        self.norm = T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])
+        self.transform_mask = get_makeup_transform(cfg, pic="mask")
+        self.trans_size = cfg.trans_size
+        self.cls_list = cfg.cls_list
+        self.cls_A = self.cls_list[0]
+        self.cls_B = self.cls_list[1]
+        for cls in self.cls_list:
+            setattr(
+                self, cls + "_list_path",
+                os.path.join(self.image_path, self.mode + '_' + cls + ".txt"))
+            setattr(self, cls + "_lines",
+                    open(getattr(self, cls + "_list_path"), 'r').readlines())
+            setattr(self, "num_of_" + cls + "_data",
+                    len(getattr(self, cls + "_lines")))
+        print('Start preprocessing dataset..!')
+        self.preprocess()
+        print('Finished preprocessing dataset..!')
+
+    def preprocess(self):
+        """preprocess image"""
+        for cls in self.cls_list:
+            setattr(self, cls + "_filenames", [])
+            setattr(self, cls + "_mask_filenames", [])
+            setattr(self, cls + "_lmks_filenames", [])
+
+            lines = getattr(self, cls + "_lines")
+            random.shuffle(lines)
+
+            for i, line in enumerate(lines):
+                splits = line.split()
+                getattr(self, cls + "_filenames").append(splits[0])
+                getattr(self, cls + "_mask_filenames").append(splits[1])
+                getattr(self, cls + "_lmks_filenames").append(splits[2])
+
+    def __getitem__(self, index):
+        """Return MANet and MDNet needed params.
+
+        Parameters:
+            index (int)      -- a random integer for data indexing
+
+        Returns a dictionary that contains needed params.
+        """
+        try:
+            index_A = random.randint(
+                0, getattr(self, "num_of_" + self.cls_A + "_data"))
+            index_B = random.randint(
+                0, getattr(self, "num_of_" + self.cls_B + "_data"))
+
+            if self.mode == 'test':
+                num_b = getattr(self, 'num_of_' + self.cls_list[1] + '_data')
+                index_A = int(index / num_b)
+                index_B = int(index % num_b)
+            image_A = Image.open(
+                os.path.join(self.image_path,
+                             getattr(self, self.cls_A +
+                                     "_filenames")[index_A])).convert("RGB")
+
+            image_B = Image.open(
+                os.path.join(self.image_path,
+                             getattr(self, self.cls_B +
+                                     "_filenames")[index_B])).convert("RGB")
+            mask_A = np.array(
+                Image.open(
+                    os.path.join(
+                        self.image_path,
+                        getattr(self,
+                                self.cls_A + "_mask_filenames")[index_A])))
+            mask_B = np.array(
+                Image.open(
+                    os.path.join(
+                        self.image_path,
+                        getattr(self, self.cls_B +
+                                "_mask_filenames")[index_B])).convert('L'))
+            image_A = np.array(image_A)
+            image_B = np.array(image_B)
+
+            image_A = self.transform(image_A)
+            image_B = self.transform(image_B)
+
+            mask_A = cv2.resize(mask_A, (256, 256),
+                                interpolation=cv2.INTER_NEAREST)
+            mask_B = cv2.resize(mask_B, (256, 256),
+                                interpolation=cv2.INTER_NEAREST)
+
+            lmks_A = np.loadtxt(
+                os.path.join(
+                    self.image_path,
+                    getattr(self, self.cls_A + "_lmks_filenames")[index_A]))
+            lmks_B = np.loadtxt(
+                os.path.join(
+                    self.image_path,
+                    getattr(self, self.cls_B + "_lmks_filenames")[index_B]))
+            lmks_A = lmks_A / image_A.shape[:2] * self.trans_size
+            lmks_B = lmks_B / image_B.shape[:2] * self.trans_size
+
+            P_A = generate_P_from_lmks(lmks_A, self.trans_size,
+                                       image_A.shape[0], image_A.shape[1])
+
+            P_B = generate_P_from_lmks(lmks_B, self.trans_size,
+                                       image_B.shape[0], image_B.shape[1])
+
+            mask_A_aug = generate_mask_aug(mask_A, lmks_A)
+            mask_B_aug = generate_mask_aug(mask_B, lmks_B)
+
+            consis_mask = calculate_consis_mask(mask_A_aug, mask_B_aug)
+            consis_mask_idt_A = calculate_consis_mask(mask_A_aug, mask_A_aug)
+            consis_mask_idt_B = calculate_consis_mask(mask_A_aug, mask_B_aug)
+
+        except Exception as e:
+            print(e)
+            return self.__getitem__(index + 1)
+        return {
+            'image_A': self.norm(image_A),
+            'image_B': self.norm(image_B),
+            'mask_A': np.float32(mask_A),
+            'mask_B': np.float32(mask_B),
+            'consis_mask': np.float32(consis_mask),
+            'P_A': np.float32(P_A),
+            'P_B': np.float32(P_B),
+            'consis_mask_idt_A': np.float32(consis_mask_idt_A),
+            'consis_mask_idt_B': np.float32(consis_mask_idt_B),
+            'mask_A_aug': np.float32(mask_A_aug),
+            'mask_B_aug': np.float32(mask_B_aug)
+        }
+
+    def __len__(self):
+        """Return the total number of images in the dataset.
+
+        As we have two datasets with potentially different number of images,
+        we take a maximum of
+        """
+        if self.mode == 'train':
+            num_A = getattr(self, 'num_of_' + self.cls_list[0] + '_data')
+            num_B = getattr(self, 'num_of_' + self.cls_list[1] + '_data')
+            return max(num_A, num_B)
+        elif self.mode == "test":
+            num_A = getattr(self, 'num_of_' + self.cls_list[0] + '_data')
+            num_B = getattr(self, 'num_of_' + self.cls_list[1] + '_data')
+            return num_A * num_B
+        return max(self.A_size, self.B_size)
--- a/ppgan/datasets/paired_dataset.py
+++ b/ppgan/datasets/paired_dataset.py
@@ -5,13 +5,13 @@ from .base_dataset import BaseDataset, get_params, get_transform
 from .image_folder import make_dataset

 from .builder import DATASETS
+from .transforms.builder import build_transforms


 @DATASETS.register()
 class PairedDataset(BaseDataset):
    """A dataset class for paired image dataset.
    """
-
    def __init__(self, cfg):
        """Initialize this dataset class.

@@ -19,11 +19,14 @@ class PairedDataset(BaseDataset):
            cfg (dict) -- stores all the experiment flags
        """
        BaseDataset.__init__(self, cfg)
-        self.dir_AB = os.path.join(cfg.dataroot, cfg.phase)  # get the image directory
-        self.AB_paths = sorted(make_dataset(self.dir_AB, cfg.max_dataset_size))  # get image paths
-        assert(self.cfg.transform.load_size >= self.cfg.transform.crop_size)   # crop_size should be smaller than the size of loaded image
+        self.dir_AB = os.path.join(cfg.dataroot,
+                                   cfg.phase)  # get the image directory
+        self.AB_paths = sorted(make_dataset(
+            self.dir_AB, cfg.max_dataset_size))  # get image paths
+
        self.input_nc = self.cfg.output_nc if self.cfg.direction == 'BtoA' else self.cfg.input_nc
        self.output_nc = self.cfg.input_nc if self.cfg.direction == 'BtoA' else self.cfg.output_nc
+        self.transforms = build_transforms(cfg.transforms)

    def __getitem__(self, index):
        """Return a data point and its metadata information.
@@ -49,27 +52,11 @@ class PairedDataset(BaseDataset):
        A = AB[:h, :w2, :]
        B = AB[:h, w2:, :]

-
        # apply the same transform to both A and B
-        # transform_params = get_params(self.opt, A.size)
-        transform_params = get_params(self.cfg.transform, (w2, h))
-
-        A_transform = get_transform(self.cfg.transform, transform_params, grayscale=(self.input_nc == 1))
-        B_transform = get_transform(self.cfg.transform, transform_params, grayscale=(self.output_nc == 1))
-
-        A = A_transform(A)
-        B = B_transform(B)
+        A, B = self.transforms((A, B))

        return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path}

    def __len__(self):
        """Return the total number of images in the dataset."""
        return len(self.AB_paths)
-
-    def get_path_by_indexs(self, indexs):
-        if isinstance(indexs, paddle.Variable):
-            indexs = indexs.numpy()
-        current_paths = []
-        for index in indexs:
-            current_paths.append(self.AB_paths[index])
-        return current_paths
--- a/ppgan/datasets/transforms/__init__.py
+++ b/ppgan/datasets/transforms/__init__.py
+from .transforms import RandomCrop, Resize, RandomHorizontalFlip, PairedRandomCrop, PairedRandomHorizontalFlip, Normalize, Permute
--- a/ppgan/datasets/transforms/builder.py
+++ b/ppgan/datasets/transforms/builder.py
--- a/applications/DAIN/pwcnet/correlation_op/correlation.py
+++ b/applications/DAIN/pwcnet/correlation_op/correlation.py
--- a/ppgan/datasets/transforms/transforms.py
+++ b/ppgan/datasets/transforms/transforms.py
--- a/ppgan/datasets/unpaired_dataset.py
+++ b/ppgan/datasets/unpaired_dataset.py
--- a/ppgan/faceutils/__init__.py
+++ b/ppgan/faceutils/__init__.py
--- a/ppgan/faceutils/dlibutils/__init__.py
+++ b/ppgan/faceutils/dlibutils/__init__.py
+from .dlib_utils import detect, crop, landmarks, crop_from_array
--- a/ppgan/faceutils/dlibutils/dlib_utils.py
+++ b/ppgan/faceutils/dlibutils/dlib_utils.py
--- a/ppgan/faceutils/image.py
+++ b/ppgan/faceutils/image.py
--- a/ppgan/faceutils/mask/__init__.py
+++ b/ppgan/faceutils/mask/__init__.py
--- a/ppgan/faceutils/mask/face_parser.py
+++ b/ppgan/faceutils/mask/face_parser.py
--- a/ppgan/faceutils/mask/model.py
+++ b/ppgan/faceutils/mask/model.py
--- a/ppgan/faceutils/mask/resnet.py
+++ b/ppgan/faceutils/mask/resnet.py
--- a/ppgan/models/__init__.py
+++ b/ppgan/models/__init__.py
--- a/ppgan/models/base_model.py
+++ b/ppgan/models/base_model.py
--- a/ppgan/models/builder.py
+++ b/ppgan/models/builder.py
--- a/ppgan/models/cycle_gan_model.py
+++ b/ppgan/models/cycle_gan_model.py
--- a/ppgan/models/discriminators/nlayers.py
+++ b/ppgan/models/discriminators/nlayers.py
--- a/ppgan/models/generators/__init__.py
+++ b/ppgan/models/generators/__init__.py
--- a/ppgan/models/generators/makeup.py
+++ b/ppgan/models/generators/makeup.py
--- a/ppgan/models/losses.py
+++ b/ppgan/models/losses.py
--- a/ppgan/models/makeup_model.py
+++ b/ppgan/models/makeup_model.py
--- a/ppgan/models/pix2pix_model.py
+++ b/ppgan/models/pix2pix_model.py
--- a/ppgan/models/vgg.py
+++ b/ppgan/models/vgg.py
--- a/ppgan/modules/init.py
+++ b/ppgan/modules/init.py
@@ -80,7 +80,7 @@ def calculate_gain(nonlinearity, param=None):

 @paddle.no_grad()
 def constant_(x, value):
-    temp_value = paddle.fill_constant(x.shape, x.dtype, value)
+    temp_value = paddle.full(x.shape, value, x.dtype)
    x.set_value(temp_value)
    return x


--- a/ppgan/modules/nn.py
+++ b/ppgan/modules/nn.py
--- a/ppgan/modules/norm.py
+++ b/ppgan/modules/norm.py
--- a/ppgan/solver/lr_scheduler.py
+++ b/ppgan/solver/lr_scheduler.py
--- a/ppgan/utils/animate.py
+++ b/ppgan/utils/animate.py
--- a/ppgan/utils/options.py
+++ b/ppgan/utils/options.py
--- a/ppgan/utils/preprocess.py
+++ b/ppgan/utils/preprocess.py
--- a/tools/main.py
+++ b/tools/main.py