Merge branch 'master' of https://github.com/PaddlePaddle/PaddleGAN into readme

e4932b70 · LielinJiang · 2e8c51f0 · 6c75d650 · 2e8c51f0 · 2e8c51f0
73 changed file
--- a/applications/DAIN/my_args.py
+++ b/applications/DAIN/my_args.py
-import os
-import datetime
-import argparse
-import numpy
-import networks
-modelnames = networks.__all__
-# import datasets
-datasetNames = ('Vimeo_90K_interp')  #datasets.__all__
-parser = argparse.ArgumentParser(description='DAIN')
-parser.add_argument('--debug', action='store_true', help='Enable debug mode')
-parser.add_argument('--netName',
-                    type=str,
-                    default='DAIN',
-                    choices=modelnames,
-                    help='model architecture: ' + ' | '.join(modelnames) +
-                    ' (default: DAIN)')
-parser.add_argument('--datasetName',
-                    default='Vimeo_90K_interp',
-                    choices=datasetNames,
-                    nargs='+',
-                    help='dataset type : ' + ' | '.join(datasetNames) +
-                    ' (default: Vimeo_90K_interp)')
-parser.add_argument('--video_path',
-                    default='',
-                    help='the path of selected videos')
-parser.add_argument('--output_path', default='', help='the output root path')
-parser.add_argument('--seed',
-                    type=int,
-                    default=1,
-                    help='random seed (default: 1)')
-parser.add_argument('--batch_size',
-                    '-b',
-                    type=int,
-                    default=1,
-                    help='batch size (default:1)')
-parser.add_argument('--channels',
-                    '-c',
-                    type=int,
-                    default=3,
-                    choices=[1, 3],
-                    help='channels of images (default:3)')
-parser.add_argument('--filter_size',
-                    '-f',
-                    type=int,
-                    default=4,
-                    help='the size of filters used (default: 4)',
-                    choices=[2, 4, 6, 5, 51])
-parser.add_argument('--time_step',
-                    type=float,
-                    default=0.5,
-                    help='choose the time steps')
-parser.add_argument(
-    '--alpha',
-    type=float,
-    nargs='+',
-    default=[0.0, 1.0],
-    help=
-    'the ration of loss for interpolated and rectified result (default: [0.0, 1.0])'
-)
-parser.add_argument('--frame_rate',
-                    type=int,
-                    default=None,
-                    help='frame rate of the input video')
-parser.add_argument('--patience',
-                    type=int,
-                    default=5,
-                    help='the patience of reduce on plateou')
-parser.add_argument('--factor',
-                    type=float,
-                    default=0.2,
-                    help='the factor of reduce on plateou')
-parser.add_argument('--saved_model',
-                    type=str,
-                    default='',
-                    help='path to the model weights')
-parser.add_argument('--no-date',
-                    action='store_true',
-                    help='don\'t append date timestamp to folder')
-parser.add_argument('--use_cuda',
-                    default=True,
-                    type=bool,
-                    help='use cuda or not')
-parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not')
-parser.add_argument('--remove_duplicates',
-                    default=True,
-                    type=bool,
-                    help='remove duplicate frames or not')
--- a/applications/DAIN/networks/__init__.py
+++ b/applications/DAIN/networks/__init__.py
-from .dain import DAIN
-from .dain_slowmotion import DAIN_slowmotion
-__all__ = ('DAIN', 'DAIN_slowmotion')
--- a/applications/DAIN/networks/dain.py
+++ b/applications/DAIN/networks/dain.py
-import paddle.fluid as fluid
-import resblock
-import pwcnet
-class DAIN(fluid.dygraph.Layer):
-    def __init__(self, channel=3, filter_size=4, timestep=0.5, training=True):
-        # base class initialization
-        super(DAIN, self).__init__()
-        self.filter_size = filter_size
-        self.training = training
-        self.timestep = timestep
-        assert (timestep == 0.5)
-        self.numFrames = int(1.0 / timestep) - 1
-        ctx_ch = 3 * 64 + 3
-        inplanes = 3 + 3 + 3 + 2 * 1 + 2 * 2 + 16 * 2 + 2 * ctx_ch
-        self.rectifyNet = resblock.__dict__['MultipleBasicBlock_4'](inplanes,
-                                                                    64)
-        self.flownets = pwcnet.__dict__['pwc_dc_net']()
-        self.div_flow = 20.0
-    def forward(self, input):
-        """
-        Parameters
-        ----------
-        input: shape (3, batch, 3, width, height)
-        -----------
-        """
-        losses = []
-        offsets = []
-        '''
-            STEP 1: sequeeze the input
-        '''
-        if self.training == True:
-            assert input.shape[0] == 3
-            input_0 = input[0]
-            input_1 = input[1]
-            input_2 = input[2]
-        else:
-            # print(input.shape[0])
-            assert input.shape[0] == 2
-            input_0 = input[0]
-            input_2 = input[1]
-        #prepare the input data of current scale
-        cur_input_0 = input_0
-        if self.training == True:
-            cur_input_1 = input_1
-        cur_input_2 = input_2
-        '''
-            STEP 3.2: concatenating the inputs.
-        '''
-        cur_offset_input = fluid.layers.concat([cur_input_0, cur_input_2],
-                                               axis=1)
-        '''
-            STEP 3.3: perform the estimation
-        '''
-        time_offsets = [
-            kk * self.timestep for kk in range(1, 1 + self.numFrames, 1)
-        ]
-        cur_offset_outputs = [
-            self.forward_flownets(self.flownets,
-                                  cur_offset_input,
-                                  time_offsets=time_offsets),
-            self.forward_flownets(self.flownets,
-                                  fluid.layers.concat(
-                                      [cur_input_2, cur_input_0], axis=1),
-                                  time_offsets=time_offsets[::-1])
-        ]
-        cur_offset_output = [cur_offset_outputs[0][0], cur_offset_outputs[1][0]]
-        # Warp image use warp-op in PWC-Net
-        ref0 = self.flownets.warp_nomask(cur_input_0, cur_offset_output[0])
-        ref2 = self.flownets.warp_nomask(cur_input_2, cur_offset_output[1])
-        cur_output = (ref0 + ref2) / 2.0
-        rectify_input = fluid.layers.concat([
-            cur_output, ref0, ref2, cur_offset_output[0], cur_offset_output[1]
-        ],
-                                            axis=1)
-        cur_output_rectified = self.rectifyNet(rectify_input) + cur_output
-        '''
-            STEP 3.5: for training phase, we collect the variables to be penalized.
-        '''
-        if self.training == True:
-            losses += [cur_output - cur_input_1]
-            losses += [cur_output_rectified - cur_input_1]
-            offsets += [cur_offset_output]
-        '''
-            STEP 4: return the results
-        '''
-        if self.training == True:
-            # if in the training phase, we output the losses to be minimized.
-            # return losses, loss_occlusion
-            return losses, offsets
-        else:
-            cur_outputs = [cur_output, cur_output_rectified]
-            return cur_outputs, cur_offset_output
-    def forward_flownets(self, model, input, time_offsets=None):
-        if time_offsets == None:
-            time_offsets = [0.5]
-        elif type(time_offsets) == float:
-            time_offsets = [time_offsets]
-        elif type(time_offsets) == list:
-            pass
-        # this is a single direction motion results, but not a bidirectional one
-        temp = model(input)
-        # single direction to bidirection should haven it.
-        temps = [
-            self.div_flow * temp * time_offset for time_offset in time_offsets
-        ]
-        # nearest interpolation won't be better i think
-        temps = [fluid.layers.resize_bilinear(temp, scale=4) for temp in temps]
-        return temps
--- a/applications/DAIN/networks/dain_slowmotion.py
+++ b/applications/DAIN/networks/dain_slowmotion.py
-import paddle.fluid as fluid
-import resblock
-import time
-import pwcnet
-class DAIN_slowmotion(fluid.dygraph.Layer):
-    def __init__(self, channel=3, filter_size=4, timestep=0.5, training=True):
-        # base class initialization
-        super(DAIN_slowmotion, self).__init__()
-        self.filter_size = filter_size
-        self.training = training
-        self.timestep = timestep
-        self.num_frames = int(1.0 / timestep) - 1
-        ctx_ch = 3 * 64 + 3
-        #        inplanes = 3 + 3 + 3 + 2*1 + 2*2 + 2
-        inplanes = 13
-        self.flownets = pwcnet.__dict__['pwc_dc_net']()
-        self.rectifyNet = resblock.__dict__['MultipleBasicBlock_4'](inplanes,
-                                                                    64)
-        self.div_flow = 20.0
-    def forward(self, input):
-        """
-        Parameters
-        ----------
-        input: shape (3, batch, 3, width, height)
-        -----------
-        """
-        losses = []
-        offsets = []
-        '''
-            STEP 1: sequeeze the input
-        '''
-        if self.training == True:
-            assert input.shape[0] == 3
-            input_0 = input[0]
-            input_1 = input[1]
-            input_2 = input[2]
-        else:
-            assert input.shape[0] == 2
-            input_0 = input[0]
-            input_2 = input[1]
-        #prepare the input data of current scale
-        cur_input_0 = input_0
-        if self.training == True:
-            cur_input_1 = input_1
-        cur_input_2 = input_2
-        '''
-            STEP 3.2: concatenating the inputs.
-        '''
-        cur_offset_input = fluid.layers.concat([cur_input_0, cur_input_2],
-                                               axis=1)
-        '''
-            STEP 3.3: perform the estimation
-        '''
-        time_offsets = [
-            kk * self.timestep for kk in range(1, 1 + self.num_frames, 1)
-        ]
-        cur_offset_outputs = [
-            self.forward_flownets(self.flownets,
-                                  cur_offset_input,
-                                  time_offsets=time_offsets),
-            self.forward_flownets(self.flownets,
-                                  fluid.layers.concat(
-                                      [cur_input_2, cur_input_0], axis=1),
-                                  time_offsets=time_offsets[::-1])
-        ]
-        '''
-            STEP 3.4: perform the frame interpolation process
-        '''
-        count = 0
-        for temp_0, temp_1, timeoffset in zip(cur_offset_outputs[0],
-                                              cur_offset_outputs[1],
-                                              time_offsets):
-            cur_offset_output = [temp_0, temp_1]
-            ref0 = self.flownets.warp_nomask(cur_input_0, cur_offset_output[0])
-            ref2 = self.flownets.warp_nomask(cur_input_2, cur_offset_output[1])
-            cur_output_temp = (ref0 + ref2) / 2.0
-            if count == 0:
-                cur_output = fluid.layers.unsqueeze(cur_output_temp, axes=0)
-            else:
-                cur_output_ = fluid.layers.unsqueeze(cur_output_temp, axes=0)
-                cur_output = fluid.layers.concat([cur_output, cur_output_],
-                                                 axis=0)
-            rectify_input = fluid.layers.concat([
-                cur_output_temp, ref0, ref2, cur_offset_output[0],
-                cur_offset_output[1]
-            ],
-                                                axis=1)
-            cur_output_rectified_temp = self.rectifyNet(
-                rectify_input) + cur_output_temp
-            if count == 0:
-                cur_output_rectified = fluid.layers.unsqueeze(
-                    cur_output_rectified_temp, axes=0)
-            else:
-                cur_output_rectified_ = fluid.layers.unsqueeze(
-                    cur_output_rectified_temp, axes=0)
-                cur_output_rectified = fluid.layers.concat(
-                    [cur_output_rectified, cur_output_rectified_], axis=0)
-            count += 1
-        '''
-            STEP 3.5: for training phase, we collect the variables to be penalized.
-        '''
-        if self.training == True:
-            losses += [cur_output - cur_input_1]
-            losses += [cur_output_rectified - cur_input_1]
-            offsets += [cur_offset_output]
-        '''
-            STEP 4: return the results
-        '''
-        if self.training == True:
-            # if in the training phase, we output the losses to be minimized.
-            # return losses, loss_occlusion
-            return losses, offsets
-        else:
-            cur_outputs = [cur_output, cur_output_rectified]
-            return cur_outputs, cur_offset_output
-    def forward_flownets(self, model, input, time_offsets=None):
-        if time_offsets == None:
-            time_offsets = [0.5]
-        elif type(time_offsets) == float:
-            time_offsets = [time_offsets]
-        elif type(time_offsets) == list:
-            pass
-        # this is a single direction motion results, but not a bidirectional one
-        temp = model(input)
-        # single direction to bidirection should haven it.
-        temps = [
-            self.div_flow * temp * time_offset for time_offset in time_offsets
-        ]
-        # nearest interpolation won't be better i think
-        temps = [fluid.layers.resize_bilinear(temp, scale=4) for temp in temps]
-        return temps
--- a/applications/DAIN/predict.py
+++ b/applications/DAIN/predict.py
-import os
-import sys
-cur_path = os.path.abspath(os.path.dirname(__file__))
-sys.path.append(cur_path)
-import time
-import glob
-import numpy as np
-from imageio import imread, imsave
-from tqdm import tqdm
-import cv2
-import paddle.fluid as fluid
-from paddle.utils.download import get_path_from_url
-from ppgan.utils.video import video2frames, frames2video
-from util import *
-from my_args import parser
-DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar'
-def infer_engine(model_dir,
-                 run_mode='fluid',
-                 batch_size=1,
-                 use_gpu=False,
-                 min_subgraph_size=3):
-    if not use_gpu and not run_mode == 'fluid':
-        raise ValueError(
-            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
-            .format(run_mode, use_gpu))
-    precision_map = {
-        'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
-        'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
-    }
-    config = fluid.core.AnalysisConfig(os.path.join(model_dir, 'model'),
-                                       os.path.join(model_dir, 'params'))
-    if use_gpu:
-        # initial GPU memory(M), device ID
-        config.enable_use_gpu(100, 0)
-        # optimize graph and fuse op
-        config.switch_ir_optim(True)
-    else:
-        config.disable_gpu()
-    if run_mode in precision_map.keys():
-        config.enable_tensorrt_engine(workspace_size=1 << 10,
-                                      max_batch_size=batch_size,
-                                      min_subgraph_size=min_subgraph_size,
-                                      precision_mode=precision_map[run_mode],
-                                      use_static=False,
-                                      use_calib_mode=False)
-    # disable print log when predict
-    config.disable_glog_info()
-    # enable shared memory
-    config.enable_memory_optim()
-    # disable feed, fetch OP, needed by zero_copy_run
-    config.switch_use_feed_fetch_ops(False)
-    predictor = fluid.core.create_paddle_predictor(config)
-    return predictor
-def executor(model_dir, use_gpu=False):
-    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    program, feed_names, fetch_targets = fluid.io.load_inference_model(
-        dirname=model_dir,
-        executor=exe,
-        model_filename='model',
-        params_filename='params')
-    return exe, program, fetch_targets
-class VideoFrameInterp(object):
-    def __init__(self,
-                 time_step,
-                 model_path,
-                 video_path,
-                 use_gpu=True,
-                 key_frame_thread=0.,
-                 output_path='output',
-                 remove_duplicates=True):
-        self.video_path = video_path
-        self.output_path = os.path.join(output_path, 'DAIN')
-        if model_path is None:
-            model_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path)
-        self.model_path = model_path
-        self.time_step = time_step
-        self.key_frame_thread = key_frame_thread
-        self.exe, self.program, self.fetch_targets = executor(model_path,
-                                                              use_gpu=use_gpu)
-    def run(self):
-        frame_path_input = os.path.join(self.output_path, 'frames-input')
-        frame_path_interpolated = os.path.join(self.output_path,
-                                               'frames-interpolated')
-        frame_path_combined = os.path.join(self.output_path, 'frames-combined')
-        video_path_output = os.path.join(self.output_path, 'videos-output')
-        if not os.path.exists(self.output_path):
-            os.makedirs(self.output_path)
-        if not os.path.exists(frame_path_input):
-            os.makedirs(frame_path_input)
-        if not os.path.exists(frame_path_interpolated):
-            os.makedirs(frame_path_interpolated)
-        if not os.path.exists(frame_path_combined):
-            os.makedirs(frame_path_combined)
-        if not os.path.exists(video_path_output):
-            os.makedirs(video_path_output)
-        timestep = self.time_step
-        num_frames = int(1.0 / timestep) - 1
-        if self.video_path.endswith('.mp4'):
-            videos = [self.video_path]
-        else:
-            videos = sorted(glob.glob(os.path.join(self.video_path, '*.mp4')))
-        for cnt, vid in enumerate(videos):
-            print("Interpolating video:", vid)
-            cap = cv2.VideoCapture(vid)
-            fps = cap.get(cv2.CAP_PROP_FPS)
-            print("Old fps (frame rate): ", fps)
-            times_interp = int(1.0 / timestep)
-            r2 = str(int(fps) * times_interp)
-            print("New fps (frame rate): ", r2)
-            out_path = video2frames(vid, frame_path_input)
-            vidname = vid.split('/')[-1].split('.')[0]
-            tot_timer = AverageMeter()
-            proc_timer = AverageMeter()
-            end = time.time()
-            frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
-            if remove_duplicates:
-                frames = remove_duplicates(out_path)
-            img = imread(frames[0])
-            int_width = img.shape[1]
-            int_height = img.shape[0]
-            channel = img.shape[2]
-            if not channel == 3:
-                continue
-            if int_width != ((int_width >> 7) << 7):
-                int_width_pad = (
-                    ((int_width >> 7) + 1) << 7)  # more than necessary
-                padding_left = int((int_width_pad - int_width) / 2)
-                padding_right = int_width_pad - int_width - padding_left
-            else:
-                int_width_pad = int_width
-                padding_left = 32
-                padding_right = 32
-            if int_height != ((int_height >> 7) << 7):
-                int_height_pad = (
-                    ((int_height >> 7) + 1) << 7)  # more than necessary
-                padding_top = int((int_height_pad - int_height) / 2)
-                padding_bottom = int_height_pad - int_height - padding_top
-            else:
-                int_height_pad = int_height
-                padding_top = 32
-                padding_bottom = 32
-            frame_num = len(frames)
-            print('processing {} frames, from video: {}'.format(frame_num, vid))
-            if not os.path.exists(os.path.join(frame_path_interpolated,
-                                               vidname)):
-                os.makedirs(os.path.join(frame_path_interpolated, vidname))
-            if not os.path.exists(os.path.join(frame_path_combined, vidname)):
-                os.makedirs(os.path.join(frame_path_combined, vidname))
-            for i in tqdm(range(frame_num - 1)):
-                first = frames[i]
-                second = frames[i + 1]
-                img_first = imread(first)
-                img_second = imread(second)
-                '''--------------Frame change test------------------------'''
-                img_first_gray = np.dot(img_first[..., :3],
-                                        [0.299, 0.587, 0.114])
-                img_second_gray = np.dot(img_second[..., :3],
-                                         [0.299, 0.587, 0.114])
-                img_first_gray = img_first_gray.flatten(order='C')
-                img_second_gray = img_second_gray.flatten(order='C')
-                corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1]
-                key_frame = False
-                if corr < self.key_frame_thread:
-                    key_frame = True
-                '''-------------------------------------------------------'''
-                X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
-                X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255
-                assert (X0.shape[1] == X1.shape[1])
-                assert (X0.shape[2] == X1.shape[2])
-                X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
-                    (padding_left, padding_right)), mode='edge')
-                X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
-                    (padding_left, padding_right)), mode='edge')
-                X0 = np.expand_dims(X0, axis=0)
-                X1 = np.expand_dims(X1, axis=0)
-                X0 = np.expand_dims(X0, axis=0)
-                X1 = np.expand_dims(X1, axis=0)
-                X = np.concatenate((X0, X1), axis=0)
-                proc_end = time.time()
-                o = self.exe.run(self.program,
-                                 fetch_list=self.fetch_targets,
-                                 feed={"image": X})
-                y_ = o[0]
-                proc_timer.update(time.time() - proc_end)
-                tot_timer.update(time.time() - end)
-                end = time.time()
-                y_ = [
-                    np.transpose(
-                        255.0 * item.clip(
-                            0, 1.0)[0, :, padding_top:padding_top + int_height,
-                                    padding_left:padding_left + int_width],
-                        (1, 2, 0)) for item in y_
-                ]
-                time_offsets = [
-                    kk * timestep for kk in range(1, 1 + num_frames, 1)
-                ]
-                count = 1
-                for item, time_offset in zip(y_, time_offsets):
-                    out_dir = os.path.join(
-                        frame_path_interpolated, vidname,
-                        "{:0>6d}_{:0>4d}.png".format(i, count))
-                    count = count + 1
-                    imsave(out_dir, np.round(item).astype(np.uint8))
-            num_frames = int(1.0 / timestep) - 1
-            input_dir = os.path.join(frame_path_input, vidname)
-            interpolated_dir = os.path.join(frame_path_interpolated, vidname)
-            combined_dir = os.path.join(frame_path_combined, vidname)
-            combine_frames(input_dir, interpolated_dir, combined_dir,
-                           num_frames)
-            frame_pattern_combined = os.path.join(frame_path_combined, vidname,
-                                                  '%08d.png')
-            video_pattern_output = os.path.join(video_path_output,
-                                                vidname + '.mp4')
-            if os.path.exists(video_pattern_output):
-                os.remove(video_pattern_output)
-            frames2video(frame_pattern_combined, video_pattern_output, r2)
-        return frame_pattern_combined, video_pattern_output
-if __name__ == '__main__':
-    args = parser.parse_args()
-    predictor = VideoFrameInterp(args.time_step,
-                                 args.saved_model,
-                                 args.video_path,
-                                 args.output_path,
-                                 remove_duplicates=args.remove_duplicates)
-    predictor.run()
--- a/applications/DAIN/pwcnet/__init__.py
+++ b/applications/DAIN/pwcnet/__init__.py
-from .pwcnet import *
--- a/applications/DAIN/pwcnet/correlation_op/README.md
+++ b/applications/DAIN/pwcnet/correlation_op/README.md
-自定义OP编译:
-2. sh make.sh编译成correlation_lib.so动态库
-3. 添加动态库路径到LD_LIBRARY_PATH：
-```
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python3.7 -c 'import paddle; print(paddle.sysconfig.get_lib())'`
-```
-4. 添加correlation op的python路径:
-```
-export PYTHONPATH=$PYTHONPATH:`pwd`
-```
-5. python test_correlation.py运行单测，验证是否加载成功。
-PS: 如果paddle whl包是从官网上下载的，需要使用gcc 4.8，即把make.sh中的g++ 改为 g++-4.8
--- a/applications/DAIN/pwcnet/correlation_op/correlation_op.cc
+++ b/applications/DAIN/pwcnet/correlation_op/correlation_op.cc
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include "paddle/fluid/framework/op_registry.h"
-namespace paddle {
-namespace operators {
-using Tensor = framework::Tensor;
-inline std::vector<int64_t> CorrelationOutputSize(int batch, int input_height, int input_width, int stride1, int stride2, int kernel_size, int pad_size, int max_displacement) {
-  std::vector<int64_t> output_shape({batch});
-  int kernel_radius = (kernel_size - 1) / 2;
-  int border_radius = kernel_radius + max_displacement;
-  int padded_input_height = input_height + 2 * pad_size;
-  int padded_input_width = input_width + 2 * pad_size;
-  int output_channel = ((max_displacement/stride2) * 2 + 1) * ((max_displacement/stride2) * 2 + 1);
-  output_shape.push_back(output_channel);
-  int output_height = std::ceil(static_cast<float>(padded_input_height - 2 * border_radius) / static_cast<float>(stride1)); 
-  int output_width = std::ceil(static_cast<float>(padded_input_width - 2 * border_radius) / static_cast<float>(stride1));
-  output_shape.push_back(output_height);
-  output_shape.push_back(output_width);
-  return output_shape;
-}
-class CorrelationOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override{
-    AddInput("Input1", "input1");
-    AddInput("Input2", "input2");
-    AddOutput("Output", "output");
-    AddAttr<int>("pad_size", "pad size for input1 and input2");
-    AddAttr<int>("kernel_size", "kernel size of input1 and input2");
-    AddAttr<int>("max_displacement", "max displacement of input1 and input2");
-    AddAttr<int>("stride1", "Input1 stride");
-    AddAttr<int>("stride2", "Input2 stride");
-    AddAttr<int>("corr_type_multiply", "correlation coefficient").SetDefault(1);
-    AddComment(R"DOC(Correlation of two feature map. Only support NCHW data format.)DOC");
-  }
-};
-class CorrelationOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-  void InferShape(framework::InferShapeContext* ctx) const override{
-    PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(input1) cannot be null");
-    PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(input2) cannot be null");
-    int stride1 = ctx->Attrs().Get<int>("stride1");
-    int stride2 = ctx->Attrs().Get<int>("stride2");
-    int max_displacement = ctx->Attrs().Get<int>("max_displacement");
-    int pad_size = ctx->Attrs().Get<int>("pad_size");
-    int kernel_size = ctx->Attrs().Get<int>("kernel_size");
-    auto in_dims = ctx->GetInputDim("Input1");
-    auto in2_dims = ctx->GetInputDim("Input2");
-    PADDLE_ENFORCE_EQ(in_dims.size() == 4, true, "input1 must be 4-dims");
-    PADDLE_ENFORCE_EQ(in2_dims.size() == 4, true, "input2 must be 4-dims");
-    std::vector<int64_t> output_shape = CorrelationOutputSize(in_dims[0], in_dims[2], in_dims[3], stride1, stride2, kernel_size, pad_size, max_displacement);
-    ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
-  }
- protected:
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override{
-    auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input1");
-    PADDLE_ENFORCE_EQ(input_data_type, ctx.Input<Tensor>("Input2")->type(), "Input1 and Input2 shoule have same type");
-    return framework::OpKernelType(input_data_type, ctx.GetPlace());
-  }
-};
-template <typename T>
-class CorrelationOpGradMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
- protected:
-  void Apply(GradOpPtr<T> op) const override {
-    op->SetType("correlation_grad");
-    op->SetInput("Input1", this->Input("Input1"));
-    op->SetInput("Input2", this->Input("Input2"));
-    op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output"));
-    op->SetOutput(framework::GradVarName("Input1"), this->InputGrad("Input1"));
-    op->SetOutput(framework::GradVarName("Input2"), this->InputGrad("Input2"));
-    op->SetAttrMap(this->Attrs());
-  }
-};
-class CorrelationOpGrad : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-  void InferShape(framework::InferShapeContext* ctx) const override{
-    PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(Input1) should not be null");
-    PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(Input2) should not be null");
-    PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Output")), true, "Input(Output@GRAD) should not be null");
-    auto in1_dims = ctx->GetInputDim("Input1");
-    auto in2_dims = ctx->GetInputDim("Input2");
-    ctx->SetOutputDim(framework::GradVarName("Input1"), in1_dims);
-    ctx->SetOutputDim(framework::GradVarName("Input2"), in1_dims);
-  }
- protected:
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override{
-    const auto* var = ctx.InputVar(framework::GradVarName("Output"));
-    if (var == nullptr) {
-      PADDLE_THROW("cannot find Output@GRAD");
-    }
-    return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(ctx, "Input1"), ctx.GetPlace());
-  }
-};
-} // namespace operators
-} // namespace paddle
-namespace ops = paddle::operators;
-REGISTER_OPERATOR(correlation, ops::CorrelationOp, ops::CorrelationOpMaker,
-          ops::CorrelationOpGradMaker<paddle::framework::OpDesc>,
-          ops::CorrelationOpGradMaker<paddle::imperative::OpBase>);
-REGISTER_OPERATOR(correlation_grad, ops::CorrelationOpGrad);
--- a/applications/DAIN/pwcnet/correlation_op/correlation_op.cu
+++ b/applications/DAIN/pwcnet/correlation_op/correlation_op.cu
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-#include <vector>
-#include "paddle/fluid/framework/op_registry.h"
-#define THREADS_PER_BLOCK 32
-#define FULL_MASK 0xffffffff
-namespace paddle {
-namespace operators {
-using Tensor = framework::Tensor;
-template <typename T>
-__forceinline__ __device__ T warpReduceSum(T val) {
-  for (int offset = 16; offset > 0; offset /= 2) {
-    val += __shfl_down_sync(FULL_MASK, val, offset);
-  }
-  return val;
-}
-template <typename T>
-__forceinline__ __device__ T blockReduceSum(T val) {
-  static __shared__ T shared[32];
-  int lane = threadIdx.x % warpSize;
-  int wid = threadIdx.x / warpSize;
-  val = warpReduceSum(val);
-  if (lane == 0)
-    shared[wid] = val;
-  __syncthreads();
-  val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0;
-  if (wid == 0)
-    val = warpReduceSum(val);
-  return val;
-}
-template <typename T>
-__global__ void set_zero(T *x, int num) {
-  for(int i = blockIdx.x * blockDim.x + threadIdx.x; i < num; i += blockDim.x * gridDim.x)
-    x[i] = static_cast<T>(0);
-}
-template <typename T>
-__global__ void channel_first(const T *input, T *rinput, const int channel, const int height, const int width, const int pad_size) {
-  int n = blockIdx.x;
-  int h = blockIdx.y;
-  int w = blockIdx.z;
-  int ch_off = threadIdx.x;
-  T value;
-  int dimchw = channel * height * width;
-  int dimhw = height * width;
-  int p_dimw = (width + 2 * pad_size);
-  int p_dimh = (height + 2 * pad_size);
-  int p_dimchw = channel * p_dimw * p_dimh;
-  int p_dimcw = channel * p_dimw;
-  for (int c = ch_off; c < channel; c += THREADS_PER_BLOCK) {
-    value = input[n * dimchw + c * dimhw + h * width + w];
-    rinput[n * p_dimchw + (h + pad_size) * p_dimcw + (w + pad_size) * channel + c] = value;
-  }
-}
-template <typename T>
-__global__ void correlation_forward(T *output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int input_channel, const int input_height, const int input_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) {
-  int p_input_width = input_width + 2 * pad_size;
-  int p_input_height = input_height + 2 * pad_size;
-  int kernel_rad = (kernel_size - 1) / 2;
-  int displacement_rad = max_displacement / stride2;
-  int displacement_size = 2 * displacement_rad + 1;
-  int n = blockIdx.x;
-  int h1 = blockIdx.y * stride1 + max_displacement;
-  int w1 = blockIdx.z * stride1 + max_displacement;
-  int c = threadIdx.x;
-  int p_dimchw = p_input_height * p_input_width * input_channel;
-  int p_dimcw = p_input_width * input_channel;
-  int p_dimc = input_channel;
-  int t_dimchw = output_channel * output_height * output_width;
-  int t_dimhw = output_height * output_width;
-  int t_dimw = output_width;
-  int nelems = kernel_size * kernel_size * p_dimc;
-  for (int tj = -displacement_rad; tj <= displacement_rad; ++tj) {
-    for(int ti = -displacement_rad; ti <= displacement_rad; ++ti) {
-      int w2 = w1 + ti * stride2;
-      int h2 = h1 + tj * stride2;
-      T acc0 = 0;
-      for(int j = -kernel_rad; j <= kernel_rad; ++j) {
-        for(int i = -kernel_rad; i <= kernel_rad; ++i) {
-          for(int ch = c; ch < p_dimc; ch += blockDim.x) {
-            int index1 = n * p_dimchw + (h1 + j) * p_dimcw + (w1 + i) * p_dimc + ch;
-            int index2 = n * p_dimchw + (h2 + j) * p_dimcw + (w2 + i) * p_dimc + ch;
-            acc0 += static_cast<T>(rinput1[index1] * rinput2[index2]);
-          } 
-        }
-      }
-      if (blockDim.x == warpSize) {
-        __syncwarp();
-        acc0 = warpReduceSum(acc0);
-      } else {
-        __syncthreads();
-        acc0 = blockReduceSum(acc0);
-      }
-      if (threadIdx.x == 0) {
-        int tc = (tj + displacement_rad) * displacement_size + (ti + displacement_rad);
-        const int t_index = n * t_dimchw + tc * t_dimhw + blockIdx.y * t_dimw + blockIdx.z;
-        output[t_index] = static_cast<T>(acc0 / nelems);
-      }
-    }
-  }
-}
-//class CorrelationKernel<platform::CUDADeviceContext, T>
-template <typename T>
-class CorrelationKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext &ctx) const override {
-    PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must be CUDAPlace");
-    auto *input1 = ctx.Input<Tensor>("Input1");
-    auto *input2 = ctx.Input<Tensor>("Input2");
-    int pad_size = ctx.Attr<int>("pad_size");
-    int kernel_size = ctx.Attr<int>("kernel_size");
-    int stride1 = ctx.Attr<int>("stride1");
-    int stride2 = ctx.Attr<int>("stride2");
-    int max_displacement = ctx.Attr<int>("max_displacement");
-    int corr_type_multiply = ctx.Attr<int>("corr_type_multiply");
-    auto *output = ctx.Output<Tensor>("Output");
-    output->mutable_data<T>(ctx.GetPlace());
-    auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
-    // base on input1, NCHW
-    auto in_dims = input1->dims();
-    int N = in_dims[0];
-    int C = in_dims[1];
-    int H = in_dims[2];
-    int W = in_dims[3];
-    int padded_input_height = H + 2 * pad_size;
-    int padded_input_width = W + 2 * pad_size;
-    Tensor rinput1 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
-    rinput1.mutable_data<T>(ctx.GetPlace());
-    Tensor rinput2 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
-    rinput2.mutable_data<T>(ctx.GetPlace());
-    set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data<T>(), rinput1.numel());
-    set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data<T>(), rinput2.numel());
-    set_zero<<<(output->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(output->data<T>(), output->numel());
-    auto out_dims = output->dims();
-    int OC = out_dims[1];
-    int OH = out_dims[2];
-    int OW = out_dims[3];
-    dim3 blocks_grid(N, H, W);
-    dim3 threads_block(THREADS_PER_BLOCK);
-    channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input1->data<T>(), rinput1.data<T>(), C, H, W, pad_size);
-    channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input2->data<T>(), rinput2.data<T>(), C, H, W, pad_size);
-    dim3 threadsPerBlock(THREADS_PER_BLOCK);
-    dim3 totalBlocksCorr(N, OH, OW);
-    correlation_forward<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(output->data<T>(), OC, OH, OW, rinput1.data<T>(),
-C, H, W, rinput2.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
-  }
-};
-template <typename T>
-__global__ void correlation_backward_input1(int item, T *grad_input1, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) {
-  int n = item;
-  int h = blockIdx.x * stride1 + pad_size;
-  int w = blockIdx.y * stride1 + pad_size;
-  int c = blockIdx.z;
-  int tch_off = threadIdx.x;
-  int kernel_rad = (kernel_size - 1) / 2;
-  int displacement_rad = max_displacement / stride2;
-  int displacement_size = 2 * displacement_rad + 1;
-  int xmin = (w - kernel_rad - max_displacement) / stride1;
-  int ymin = (h - kernel_rad - max_displacement) / stride1;
-  int xmax = (w + kernel_rad - max_displacement) / stride1;
-  int ymax = (h + kernel_rad - max_displacement) / stride1;
-  if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) {
-    return;
-  }
-  if (xmin > xmax || ymin > ymax) {
-    return;
-  }
-  xmin = max(0, xmin);
-  xmax = min(output_width - 1, xmax);
-  ymin = max(0, ymin);
-  ymax = min(output_height - 1, ymax);
-  int p_input_width = input_width + 2 * pad_size;
-  int p_input_height = input_height + 2 * pad_size;
-  int p_dimchw = input_channel * p_input_height * p_input_width;
-  int p_dimcw = input_channel * p_input_width;
-  int p_dimc = input_channel;
-  int t_dimchw = output_channel * output_height * output_width;
-  int t_dimhw = output_height * output_width;
-  int t_dimw = output_width;
-  int o_dimchw = input_channel * input_height * input_width;
-  int o_dimhw = input_height * input_width;
-  int o_dimw = input_width;
-  int nelems = kernel_size * kernel_size * input_channel;
-  __shared__ T prod_sum[THREADS_PER_BLOCK];
-  prod_sum[tch_off] = 0;
-  for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) {
-    int i2 = (tc % displacement_size - displacement_rad) * stride2;
-    int j2 = (tc / displacement_size - displacement_rad) * stride2;
-    int index2 = n * p_dimchw + (h + j2) * p_dimcw + (w + i2) * p_dimc + c;
-    T val2 = rinput2[index2];
-    for (int j = ymin; j <= ymax; ++j) {
-      for (int i = xmin; i <= xmax; ++i) {
-        int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i;
-        prod_sum[tch_off] += grad_output[t_index] * val2;
-      }
-    }
-  }
-  __syncthreads();
-  if (tch_off == 0) {
-    T reduce_sum = 0;
-    for (int index = 0; index < THREADS_PER_BLOCK; index++) {
-      reduce_sum += prod_sum[index];
-    }
-    const int index1 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size);
-    grad_input1[index1] = static_cast<T>(reduce_sum / nelems);
-  }
-}
-template <typename T>
-__global__ void correlation_backward_input2(int item, T *grad_input2, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2){
-  int n = item;
-  int h = blockIdx.x * stride1 + pad_size;
-  int w = blockIdx.y * stride1 + pad_size;
-  int c = blockIdx.z;
-  int tch_off = threadIdx.x;
-  int kernel_rad = (kernel_size - 1) / 2;
-  int displacement_rad = max_displacement / stride2;
-  int displacement_size = 2 * displacement_rad + 1;
-  int p_input_width = input_width + 2 * pad_size;
-  int p_input_height = input_height + 2 * pad_size;
-  int p_dimchw = input_channel * p_input_height * p_input_width;
-  int p_dimcw = input_channel * p_input_width;
-  int p_dimc = input_channel;
-  int t_dimchw = output_channel * output_height * output_width;
-  int t_dimhw = output_height * output_width;
-  int t_dimw = output_width;
-  int o_dimchw = input_channel * input_height * input_width;
-  int o_dimhw = input_height * input_width;
-  int o_dimw = input_width;
-  int nelems = kernel_size * kernel_size * input_channel;
-  __shared__ T prod_sum[THREADS_PER_BLOCK];
-  prod_sum[tch_off] = 0;
-  for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) {
-    int i2 = (tc % displacement_size - displacement_rad) * stride2;
-    int j2 = (tc / displacement_size - displacement_rad) * stride2;
-    int xmin = (w - kernel_rad - max_displacement - i2) / stride1;
-    int ymin = (h - kernel_rad - max_displacement - j2) / stride1;
-    int xmax = (w + kernel_rad - max_displacement - i2) / stride1;
-    int ymax = (h + kernel_rad - max_displacement - j2) / stride1;
-    if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) {
-      continue;
-    }
-    if (xmin > xmax || ymin > ymax) {
-      continue;
-    }
-    xmin = max(0, xmin);
-    xmax = min(output_width - 1, xmax);
-    ymin = max(0, ymin);
-    ymax = min(output_height - 1, ymax);
-    int index1 = n * p_dimchw + (h - j2) * p_dimcw + (w - i2) * p_dimc + c;
-    T val1 = rinput1[index1];
-    for (int j = ymin; j <= ymax; ++j) {
-      for (int i = xmin; i <= xmax; ++i) {
-        int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i;
-        prod_sum[tch_off] += grad_output[t_index] * val1;
-      }
-    }
-  }
-  __syncthreads();
-  if (tch_off == 0) {
-    T reduce_sum = 0;
-    for (int index = 0; index < THREADS_PER_BLOCK; index++) {
-      reduce_sum += prod_sum[index];
-    }
-    const int index2 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size);
-    grad_input2[index2] = static_cast<T>(reduce_sum / nelems);
-  }
-}
-template <typename T>
-class CorrelationGradKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext &ctx) const override {
-    PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must use CUDAPlace.");
-    const auto *input1 = ctx.Input<Tensor>("Input1");
-    const auto *input2 = ctx.Input<Tensor>("Input2");
-    const auto *grad_output = ctx.Input<Tensor>(framework::GradVarName("Output"));
-    const int pad_size = ctx.Attr<int>("pad_size");
-    const int kernel_size = ctx.Attr<int>("kernel_size");
-    const int stride1 = ctx.Attr<int>("stride1");
-    const int stride2 = ctx.Attr<int>("stride2");
-    const int max_displacement = ctx.Attr<int>("max_displacement");
-    const int corr_type_multiply = ctx.Attr<int>("corr_type_multiply");
-    auto *grad_input1 = ctx.Output<Tensor>(framework::GradVarName("Input1"));
-    grad_input1->mutable_data<T>(ctx.GetPlace());
-    auto *grad_input2 = ctx.Output<Tensor>(framework::GradVarName("Input2"));
-    grad_input2->mutable_data<T>(ctx.GetPlace());
-    auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
-    auto in_dims = input1->dims();
-    int N = in_dims[0];
-    int C = in_dims[1];
-    int H = in_dims[2];
-    int W = in_dims[3];
-    int padded_input_height = H + 2 * pad_size;
-    int padded_input_width = W + 2 * pad_size;
-    Tensor rinput1 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
-    rinput1.mutable_data<T>(ctx.GetPlace());
-    Tensor rinput2 = ctx.AllocateTmpTensor<T, platform::CUDADeviceContext>({N, padded_input_height, padded_input_width, C}, dev_ctx);
-    rinput2.mutable_data<T>(ctx.GetPlace());
-    set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data<T>(), rinput1.numel());
-    set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data<T>(), rinput2.numel());
-    set_zero<<<(grad_input1->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input1->data<T>(), grad_input1->numel());
-    set_zero<<<(grad_input2->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input2->data<T>(), grad_input2->numel());
-    auto grad_out_dims = grad_output->dims();
-    int GOC = grad_out_dims[1];
-    int GOH = grad_out_dims[2];
-    int GOW = grad_out_dims[3];
-    dim3 blocks_grid(N, H, W);
-    dim3 threads_block(THREADS_PER_BLOCK);
-    channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input1->data<T>(), rinput1.data<T>(), C, H, W, pad_size);
-    channel_first<T><<<blocks_grid, threads_block, 0, dev_ctx.stream()>>>(input2->data<T>(), rinput2.data<T>(), C, H, W, pad_size);
-    dim3 threadsPerBlock(THREADS_PER_BLOCK);
-    dim3 totalBlocksCorr(H, W, C);
-    for (int n = 0; n < N; n++) {
-      correlation_backward_input1<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(n, grad_input1->data<T>(), C, H, W, grad_output->data<T>(), GOC, GOH, GOW, rinput2.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
-    }
-    for (int n = 0; n < N; n++) {
-      correlation_backward_input2<T><<<totalBlocksCorr, threadsPerBlock, 0, dev_ctx.stream()>>>(n, grad_input2->data<T>(), C, H, W, grad_output->data<T>(), GOC, GOH, GOW, rinput1.data<T>(), pad_size, kernel_size, max_displacement, stride1, stride2);
-    }
-  }
-};
-} // namespace operators
-} // namespace paddle
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    correlation, ops::CorrelationKernel<float>,
-    ops::CorrelationKernel<double>);
-REGISTER_OP_CUDA_KERNEL(
-    correlation_grad, ops::CorrelationGradKernel<float>,
-    ops::CorrelationGradKernel<double>);
--- a/applications/DAIN/pwcnet/correlation_op/make.sh
+++ b/applications/DAIN/pwcnet/correlation_op/make.sh
-# source /ssd1/vis/liufanglong/.bashrc
-#export PATH=/home/work/cuda-9.0/bin:$PATH
-#export PATH=/home/work/cuda-9.0/bin:$PATH
-#export LD_LIBRARY_PATH="/home/work/cuda-9.0/lib64:$LD_LIBRARY_PATH"
-#export LD_LIBRARY_PATH=/home/vis/chao/local/cudnn_v7.6/cuda/lib64:$LD_LIBRARY_PATH
-#export CPLUS_INCLUDE_PATH=/home/vis/chao/local/cudnn_v7.6/cuda/include:/ssd1/vis/liufanglong/local/fluid_1.1.0_for_slurm/nccl_2.3.5/include:$CPLUS_INCLUDE_PATH
-#export LD_LIBRARY_PATH=/ssd1/vis/liufanglong/local/fluid_1.1.0_for_slurm/nccl_2.3.5/lib:$LD_LIBRARY_PATH
-include_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_include())' )
-lib_dir=$( python -c 'import paddle; print(paddle.sysconfig.get_lib())' )
-echo $include_dir
-echo $lib_dir
-OPS='correlation_op'
-for op in ${OPS}
-do
-nvcc ${op}.cu -c -o ${op}.cu.o -ccbin cc -DPADDLE_WITH_CUDA -DEIGEN_USE_GPU -DPADDLE_USE_DSO -DPADDLE_WITH_MKLDNN -Xcompiler -fPIC -std=c++11 -Xcompiler -fPIC -w --expt-relaxed-constexpr -O0 -g -DNVCC \
-    -I ${include_dir}/third_party/ \
-    -I ${include_dir}
-done
-# g++-4.8 correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
-# g++ ${OPS}.cu.o ${OPS}.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
-g++ correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \
-  -I ${include_dir}/third_party/ \
-  -I ${include_dir} \
-  -L ${lib_dir} \
-  -L /usr/local/cuda/lib64/ -lpaddle_framework -lcudart
-# rm *.cu.o
--- a/applications/DAIN/pwcnet/correlation_op/test_correlation.py
+++ b/applications/DAIN/pwcnet/correlation_op/test_correlation.py
-import unittest
-from correlation import correlation
-import numpy as np
-import paddle.fluid as fluid
-from paddle.fluid.dygraph.base import to_variable
-def corr(x_1,
-         x_2,
-         pad_size=4,
-         kernel_size=1,
-         max_displacement=4,
-         stride1=1,
-         stride2=1,
-         corr_multiply=1):
-    K = kernel_size
-    # rinput1 = np.pad(x_1, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3)
-    # rinput2 = np.pad(x_2, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3)
-    rinput1 = np.pad(x_1, ((0, 0), (0, 0), (pad_size, pad_size),
-                           (pad_size, pad_size)),
-                     mode='constant')
-    rinput2 = np.pad(x_2, ((0, 0), (0, 0), (pad_size, pad_size),
-                           (pad_size, pad_size)),
-                     mode='constant')
-    rinput1 = np.transpose(rinput1, (0, 2, 3, 1))
-    rinput2 = np.transpose(rinput2, (0, 2, 3, 1))
-    B = int(rinput1.shape[0])
-    H = int(x_1.shape[2])
-    W = int(x_2.shape[3])
-    d = max_displacement
-    D = 2 * d + 1
-    output = np.zeros((B, D * D, H, W), dtype=np.float32)
-    for b in range(B):
-        for i in range(H):
-            for j in range(W):
-                for k in range(-d, d + 1):
-                    for l in range(-d, d + 1):
-                        x1_index = i + pad_size
-                        y1_index = j + pad_size
-                        x2_index = x1_index + k
-                        y2_index = y1_index + l
-                        output[b, l + d + D * (k + d), i,
-                               j] = np.mean(rinput1[b, x1_index:x1_index + K,
-                                                    y1_index:y1_index + K] *
-                                            rinput2[b, x2_index:x2_index + K,
-                                                    y2_index:y2_index + K])
-    return output
-class TestCorrelationOp(unittest.TestCase):
-    def test_check_output(self):
-        #x_shape = (1, 196, 3, 3)
-        np.random.seed(13)
-        np.set_printoptions(threshold=np.inf)
-        x_shape = (2, 10, 3, 3)
-        x_type = 'float32'
-        x1 = fluid.layers.data(name='x1',
-                               shape=x_shape,
-                               dtype=x_type,
-                               append_batch_size=False)
-        x2 = fluid.layers.data(name='x2',
-                               shape=x_shape,
-                               dtype=x_type,
-                               append_batch_size=False)
-        x1_np = np.random.randn(2, 3, 4, 5).astype(x_type)
-        x2_np = np.random.randn(2, 3, 4, 5).astype(x_type)
-        out_np = corr(x1_np,
-                      x2_np,
-                      pad_size=4,
-                      kernel_size=1,
-                      max_displacement=4,
-                      stride1=1,
-                      stride2=1)
-        out = correlation(x1,
-                          x2,
-                          pad_size=4,
-                          kernel_size=1,
-                          max_displacement=4,
-                          stride1=1,
-                          stride2=1)
-        place = fluid.CUDAPlace(0)
-        exe = fluid.Executor(place)
-        res = exe.run(feed={'x1': x1_np, 'x2': x2_np}, fetch_list=[out.name])
-        self.assertTrue(np.allclose(res[0], out_np))
-class Net(fluid.dygraph.Layer):
-    def __init__(self, name_scope):
-        super(Net, self).__init__(name_scope)
-    def forward(self, x1, x2):
-        y = correlation(x1,
-                        x2,
-                        pad_size=4,
-                        kernel_size=1,
-                        max_displacement=4,
-                        stride1=1,
-                        stride2=1)
-        return y
-class TestCorrelationOpDyGraph(unittest.TestCase):
-    def test_check_output(self):
-        np.random.seed(13)
-        np.set_printoptions(threshold=np.inf)
-        x_shape = (2, 10, 3, 3)
-        x_type = 'float32'
-        place = fluid.CUDAPlace(0)
-        with fluid.dygraph.guard(place):
-            x1_np = np.random.randn(2, 3, 4, 5).astype(x_type)
-            x2_np = np.random.randn(2, 3, 4, 5).astype(x_type)
-            out_np = corr(x1_np,
-                          x2_np,
-                          pad_size=4,
-                          kernel_size=1,
-                          max_displacement=4,
-                          stride1=1,
-                          stride2=1)
-            x1 = to_variable(x1_np)
-            x2 = to_variable(x2_np)
-            corr_pd = Net('corr_pd')
-            y = corr_pd(x1, x2)
-            out = y.numpy()
-            self.assertTrue(np.allclose(out, out_np))
-if __name__ == '__main__':
-    unittest.main()
--- a/applications/DAIN/pwcnet/pwcnet.py
+++ b/applications/DAIN/pwcnet/pwcnet.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import numpy as np
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.dygraph import Conv2D, Conv2DTranspose
-from paddle.fluid.contrib import correlation
-__all__ = ['pwc_dc_net']
-class PWCDCNet(fluid.dygraph.Layer):
-    def __init__(self, md=4):
-        super(PWCDCNet, self).__init__()
-        self.md = md
-        self.param_attr = fluid.ParamAttr(
-            regularizer=fluid.regularizer.L2DecayRegularizer(
-                regularization_coeff=0.0004),
-            initializer=fluid.initializer.MSRAInitializer(uniform=True,
-                                                          fan_in=None,
-                                                          seed=0))
-        self.conv1a = Conv2D(3, 16, 3, 2, 1, param_attr=self.param_attr)
-        self.conv1aa = Conv2D(16, 16, 3, 1, 1, param_attr=self.param_attr)
-        self.conv1b = Conv2D(16, 16, 3, 1, 1, param_attr=self.param_attr)
-        self.conv2a = Conv2D(16, 32, 3, 2, 1, param_attr=self.param_attr)
-        self.conv2aa = Conv2D(32, 32, 3, 1, 1, param_attr=self.param_attr)
-        self.conv2b = Conv2D(32, 32, 3, 1, 1, param_attr=self.param_attr)
-        self.conv3a = Conv2D(32, 64, 3, 2, 1, param_attr=self.param_attr)
-        self.conv3aa = Conv2D(64, 64, 3, 1, 1, param_attr=self.param_attr)
-        self.conv3b = Conv2D(64, 64, 3, 1, 1, param_attr=self.param_attr)
-        self.conv4a = Conv2D(64, 96, 3, 2, 1, param_attr=self.param_attr)
-        self.conv4aa = Conv2D(96, 96, 3, 1, 1, param_attr=self.param_attr)
-        self.conv4b = Conv2D(96, 96, 3, 1, 1, param_attr=self.param_attr)
-        self.conv5a = Conv2D(96, 128, 3, 2, 1, param_attr=self.param_attr)
-        self.conv5aa = Conv2D(128, 128, 3, 1, 1, param_attr=self.param_attr)
-        self.conv5b = Conv2D(128, 128, 3, 1, 1, param_attr=self.param_attr)
-        self.conv6aa = Conv2D(128, 196, 3, 2, 1, param_attr=self.param_attr)
-        self.conv6a = Conv2D(196, 196, 3, 1, 1, param_attr=self.param_attr)
-        self.conv6b = Conv2D(196, 196, 3, 1, 1, param_attr=self.param_attr)
-        nd = (2 * self.md + 1)**2
-        dd = np.cumsum([128, 128, 96, 64, 32], dtype=np.int32).astype(np.int)
-        dd = [int(d) for d in dd]
-        od = nd
-        self.conv6_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr)
-        self.conv6_1 = Conv2D(od + dd[0],
-                              128,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv6_2 = Conv2D(od + dd[1],
-                              96,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv6_3 = Conv2D(od + dd[2],
-                              64,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv6_4 = Conv2D(od + dd[3],
-                              32,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.predict_flow6 = Conv2D(od + dd[4],
-                                    2,
-                                    3,
-                                    1,
-                                    1,
-                                    param_attr=self.param_attr)
-        self.deconv6 = Conv2DTranspose(2,
-                                       2,
-                                       4,
-                                       stride=2,
-                                       padding=1,
-                                       param_attr=self.param_attr)
-        self.upfeat6 = Conv2DTranspose(od + dd[4],
-                                       2,
-                                       4,
-                                       stride=2,
-                                       padding=1,
-                                       param_attr=self.param_attr)
-        od = nd + 128 + 4
-        self.conv5_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr)
-        self.conv5_1 = Conv2D(od + dd[0],
-                              128,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv5_2 = Conv2D(od + dd[1],
-                              96,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv5_3 = Conv2D(od + dd[2],
-                              64,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv5_4 = Conv2D(od + dd[3],
-                              32,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.predict_flow5 = Conv2D(od + dd[4],
-                                    2,
-                                    3,
-                                    1,
-                                    1,
-                                    param_attr=self.param_attr)
-        self.deconv5 = Conv2DTranspose(2,
-                                       2,
-                                       4,
-                                       stride=2,
-                                       padding=1,
-                                       param_attr=self.param_attr)
-        self.upfeat5 = Conv2DTranspose(od + dd[4],
-                                       2,
-                                       4,
-                                       stride=2,
-                                       padding=1,
-                                       param_attr=self.param_attr)
-        od = nd + 96 + 4
-        self.conv4_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr)
-        self.conv4_1 = Conv2D(od + dd[0],
-                              128,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv4_2 = Conv2D(od + dd[1],
-                              96,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv4_3 = Conv2D(od + dd[2],
-                              64,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv4_4 = Conv2D(od + dd[3],
-                              32,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.predict_flow4 = Conv2D(od + dd[4],
-                                    2,
-                                    3,
-                                    1,
-                                    1,
-                                    param_attr=self.param_attr)
-        self.deconv4 = Conv2DTranspose(2,
-                                       2,
-                                       4,
-                                       stride=2,
-                                       padding=1,
-                                       param_attr=self.param_attr)
-        self.upfeat4 = Conv2DTranspose(od + dd[4],
-                                       2,
-                                       4,
-                                       stride=2,
-                                       padding=1,
-                                       param_attr=self.param_attr)
-        od = nd + 64 + 4
-        self.conv3_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr)
-        self.conv3_1 = Conv2D(od + dd[0],
-                              128,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv3_2 = Conv2D(od + dd[1],
-                              96,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv3_3 = Conv2D(od + dd[2],
-                              64,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv3_4 = Conv2D(od + dd[3],
-                              32,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.predict_flow3 = Conv2D(od + dd[4],
-                                    2,
-                                    3,
-                                    1,
-                                    1,
-                                    param_attr=self.param_attr)
-        self.deconv3 = Conv2DTranspose(2,
-                                       2,
-                                       4,
-                                       stride=2,
-                                       padding=1,
-                                       param_attr=self.param_attr)
-        self.upfeat3 = Conv2DTranspose(od + dd[4],
-                                       2,
-                                       4,
-                                       stride=2,
-                                       padding=1,
-                                       param_attr=self.param_attr)
-        od = nd + 32 + 4
-        self.conv2_0 = Conv2D(od, 128, 3, 1, 1, param_attr=self.param_attr)
-        self.conv2_1 = Conv2D(od + dd[0],
-                              128,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv2_2 = Conv2D(od + dd[1],
-                              96,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv2_3 = Conv2D(od + dd[2],
-                              64,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.conv2_4 = Conv2D(od + dd[3],
-                              32,
-                              3,
-                              1,
-                              1,
-                              param_attr=self.param_attr)
-        self.predict_flow2 = Conv2D(od + dd[4],
-                                    2,
-                                    3,
-                                    1,
-                                    1,
-                                    param_attr=self.param_attr)
-        #        self.deconv2 = Conv2DTranspose(2, 2, 4, stride=2, padding=1, param_attr=self.param_attr)
-        self.dc_conv1 = Conv2D(od + dd[4],
-                               128,
-                               3,
-                               1,
-                               1,
-                               dilation=1,
-                               param_attr=self.param_attr)
-        self.dc_conv2 = Conv2D(128,
-                               128,
-                               3,
-                               1,
-                               2,
-                               dilation=2,
-                               param_attr=self.param_attr)
-        self.dc_conv3 = Conv2D(128,
-                               128,
-                               3,
-                               1,
-                               4,
-                               dilation=4,
-                               param_attr=self.param_attr)
-        self.dc_conv4 = Conv2D(128,
-                               96,
-                               3,
-                               1,
-                               8,
-                               dilation=8,
-                               param_attr=self.param_attr)
-        self.dc_conv5 = Conv2D(96,
-                               64,
-                               3,
-                               1,
-                               16,
-                               dilation=16,
-                               param_attr=self.param_attr)
-        self.dc_conv6 = Conv2D(64,
-                               32,
-                               3,
-                               1,
-                               1,
-                               dilation=1,
-                               param_attr=self.param_attr)
-        self.dc_conv7 = Conv2D(32, 2, 3, 1, 1, param_attr=self.param_attr)
-    def warp(self, x, flo):
-        """
-        warp an image/tensor (im2) back to im1, according to the optical flow
-        x: [B, C, H, W] (im2)
-        flo: [B, 2, H, W] flow
-        """
-        x_shape = fluid.layers.shape(x)
-        B, H, W = x_shape[0], x_shape[2], x_shape[3]
-        bb = fluid.layers.range(0, B, 1, 'float32')
-        xx = fluid.layers.range(0, W, 1, 'float32')
-        yy = fluid.layers.range(0, H, 1, 'float32')
-        _, yy, xx = paddle.tensor.meshgrid(bb, yy, xx)
-        yy = fluid.layers.unsqueeze(yy, [1])
-        xx = fluid.layers.unsqueeze(xx, [1])
-        grid = fluid.layers.concat(input=[xx, yy], axis=1)
-        flo = flo
-        vgrid = fluid.layers.elementwise_add(grid, flo)
-        vgrid_0 = 2.0 * fluid.layers.slice(
-            vgrid, axes=[1], starts=[0], ends=[1]) / (W - 1.) - 1.0
-        vgrid_1 = 2.0 * fluid.layers.slice(
-            vgrid, axes=[1], starts=[1], ends=[2]) / (H - 1.) - 1.0
-        vgrid = fluid.layers.concat(input=[vgrid_0, vgrid_1], axis=1)
-        vgrid = fluid.layers.transpose(vgrid, [0, 2, 3, 1])
-        output = fluid.layers.grid_sampler(name='grid_sample', x=x, grid=vgrid)
-        mask = fluid.layers.zeros_like(x)
-        mask = mask + 1.0
-        mask = fluid.layers.grid_sampler(name='grid_sample', x=mask, grid=vgrid)
-        mask_temp1 = fluid.layers.cast(mask < 0.9990, 'float32')
-        mask = mask * (1 - mask_temp1)
-        mask = fluid.layers.cast(mask > 0, 'float32')
-        outwarp = fluid.layers.elementwise_mul(output, mask)
-        return outwarp
-    def warp_nomask(self, x, flo):
-        """
-        warp an image/tensor (im2) back to im1, according to the optical flow
-        x: [B, C, H, W] (im2)
-        flo: [B, 2, H, W] flow
-        """
-        B, C, H, W = x.shape
-        # mesh grid
-        #        xx = fluid.layers.range(0, W, 1, 'float32')
-        #        xx = fluid.layers.reshape(xx, shape=[1, -1])
-        #        xx = fluid.layers.expand(x=xx, expand_times=[H, 1])
-        #        xx = fluid.layers.reshape(xx, shape=[1, 1, H, W])
-        #        xx = fluid.layers.expand(x=xx, expand_times=[B, 1, 1, 1])
-        #
-        #        yy = fluid.layers.range(0, H, 1, 'float32')
-        #        yy = fluid.layers.reshape(yy, shape=[-1, 1])
-        #        yy = fluid.layers.expand(x=yy, expand_times=[1, W])
-        #        yy = fluid.layers.reshape(x=yy, shape=[1, 1, H, W])
-        #        yy = fluid.layers.expand(x=yy, expand_times=[B, 1, 1, 1])
-        x_shape = fluid.layers.shape(x)
-        B, H, W = x_shape[0], x_shape[2], x_shape[3]
-        bb = fluid.layers.range(0, B, 1, 'float32')
-        xx = fluid.layers.range(0, W, 1, 'float32')
-        #        xx = fluid.layers.reshape(xx, shape=[1, -1])
-        yy = fluid.layers.range(0, H, 1, 'float32')
-        #        yy = fluid.layers.reshape(yy, shape=[1, -1])
-        _, yy, xx = paddle.tensor.meshgrid(bb, yy, xx)
-        yy = fluid.layers.unsqueeze(yy, [1])
-        xx = fluid.layers.unsqueeze(xx, [1])
-        grid = fluid.layers.concat(input=[xx, yy], axis=1)
-        flo = flo
-        vgrid = fluid.layers.elementwise_add(grid, flo)
-        #vgrid_0 = 2.0 * fluid.layers.slice(vgrid, axes=[1], starts=[0], ends=[1]) / max(W - 1, 1) - 1.0
-        #vgrid_1 = 2.0 * fluid.layers.slice(vgrid, axes=[1], starts=[1], ends=[2]) / max(H - 1, 1) - 1.0
-        vgrid_0 = 2.0 * fluid.layers.slice(
-            vgrid, axes=[1], starts=[0], ends=[1]) / (W - 1.) - 1.0
-        vgrid_1 = 2.0 * fluid.layers.slice(
-            vgrid, axes=[1], starts=[1], ends=[2]) / (H - 1.) - 1.0
-        vgrid = fluid.layers.concat(input=[vgrid_0, vgrid_1], axis=1)
-        vgrid = fluid.layers.transpose(vgrid, [0, 2, 3, 1])
-        output = fluid.layers.grid_sampler(name='grid_sample', x=x, grid=vgrid)
-        return output
-    def corr(self, x_1, x_2):
-        out = correlation(x_1,
-                          x_2,
-                          pad_size=self.md,
-                          kernel_size=1,
-                          max_displacement=self.md,
-                          stride1=1,
-                          stride2=1,
-                          corr_type_multiply=1)
-        return out
-    def forward(self, x, output_more=False):
-        im1 = fluid.layers.slice(x, axes=[1], starts=[0], ends=[3])
-        im2 = fluid.layers.slice(x, axes=[1], starts=[3], ends=[6])
-        # print("\n\n********************PWC Net details *************** \n\n")
-        c11 = fluid.layers.leaky_relu(self.conv1a(im1), 0.1)
-        c11 = fluid.layers.leaky_relu(self.conv1aa(c11), 0.1)
-        c11 = fluid.layers.leaky_relu(self.conv1b(c11), 0.1)
-        c21 = fluid.layers.leaky_relu(self.conv1a(im2), 0.1)
-        c21 = fluid.layers.leaky_relu(self.conv1aa(c21), 0.1)
-        c21 = fluid.layers.leaky_relu(self.conv1b(c21), 0.1)
-        c12 = fluid.layers.leaky_relu(self.conv2a(c11), 0.1)
-        c12 = fluid.layers.leaky_relu(self.conv2aa(c12), 0.1)
-        c12 = fluid.layers.leaky_relu(self.conv2b(c12), 0.1)
-        c22 = fluid.layers.leaky_relu(self.conv2a(c21), 0.1)
-        c22 = fluid.layers.leaky_relu(self.conv2aa(c22), 0.1)
-        c22 = fluid.layers.leaky_relu(self.conv2b(c22), 0.1)
-        c13 = fluid.layers.leaky_relu(self.conv3a(c12), 0.1)
-        c13 = fluid.layers.leaky_relu(self.conv3aa(c13), 0.1)
-        c13 = fluid.layers.leaky_relu(self.conv3b(c13), 0.1)
-        c23 = fluid.layers.leaky_relu(self.conv3a(c22), 0.1)
-        c23 = fluid.layers.leaky_relu(self.conv3aa(c23), 0.1)
-        c23 = fluid.layers.leaky_relu(self.conv3b(c23), 0.1)
-        c14 = fluid.layers.leaky_relu(self.conv4a(c13), 0.1)
-        c14 = fluid.layers.leaky_relu(self.conv4aa(c14), 0.1)
-        c14 = fluid.layers.leaky_relu(self.conv4b(c14), 0.1)
-        c24 = fluid.layers.leaky_relu(self.conv4a(c23), 0.1)
-        c24 = fluid.layers.leaky_relu(self.conv4aa(c24), 0.1)
-        c24 = fluid.layers.leaky_relu(self.conv4b(c24), 0.1)
-        c15 = fluid.layers.leaky_relu(self.conv5a(c14), 0.1)
-        c15 = fluid.layers.leaky_relu(self.conv5aa(c15), 0.1)
-        c15 = fluid.layers.leaky_relu(self.conv5b(c15), 0.1)
-        c25 = fluid.layers.leaky_relu(self.conv5a(c24), 0.1)
-        c25 = fluid.layers.leaky_relu(self.conv5aa(c25), 0.1)
-        c25 = fluid.layers.leaky_relu(self.conv5b(c25), 0.1)
-        c16 = fluid.layers.leaky_relu(self.conv6aa(c15), 0.1)
-        c16 = fluid.layers.leaky_relu(self.conv6a(c16), 0.1)
-        c16 = fluid.layers.leaky_relu(self.conv6b(c16), 0.1)
-        c26 = fluid.layers.leaky_relu(self.conv6aa(c25), 0.1)
-        c26 = fluid.layers.leaky_relu(self.conv6a(c26), 0.1)
-        c26 = fluid.layers.leaky_relu(self.conv6b(c26), 0.1)
-        corr6 = self.corr(c16, c26)
-        corr6 = fluid.layers.leaky_relu(corr6, alpha=0.1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv6_0(corr6), 0.1), corr6],
-            axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv6_1(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv6_2(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv6_3(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv6_4(x), 0.1), x], axis=1)
-        flow6 = self.predict_flow6(x)
-        up_flow6 = self.deconv6(flow6)
-        up_feat6 = self.upfeat6(x)
-        warp5 = self.warp(c25, up_flow6 * 0.625)
-        corr5 = self.corr(c15, warp5)
-        corr5 = fluid.layers.leaky_relu(corr5, alpha=0.1)
-        x = fluid.layers.concat(input=[corr5, c15, up_flow6, up_feat6], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv5_0(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv5_1(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv5_2(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv5_3(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv5_4(x), 0.1), x], axis=1)
-        flow5 = self.predict_flow5(x)
-        up_flow5 = self.deconv5(flow5)
-        up_feat5 = self.upfeat5(x)
-        warp4 = self.warp(c24, up_flow5 * 1.25)
-        corr4 = self.corr(c14, warp4)
-        corr4 = fluid.layers.leaky_relu(corr4, alpha=0.1)
-        x = fluid.layers.concat(input=[corr4, c14, up_flow5, up_feat5], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv4_0(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv4_1(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv4_2(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv4_3(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv4_4(x), 0.1), x], axis=1)
-        flow4 = self.predict_flow4(x)
-        up_flow4 = self.deconv4(flow4)
-        up_feat4 = self.upfeat4(x)
-        warp3 = self.warp(c23, up_flow4 * 2.5)
-        corr3 = self.corr(c13, warp3)
-        corr3 = fluid.layers.leaky_relu(corr3, alpha=0.1)
-        x = fluid.layers.concat(input=[corr3, c13, up_flow4, up_feat4], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv3_0(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv3_1(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv3_2(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv3_3(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv3_4(x), 0.1), x], axis=1)
-        flow3 = self.predict_flow3(x)
-        up_flow3 = self.deconv3(flow3)
-        up_feat3 = self.upfeat3(x)
-        warp2 = self.warp(c22, up_flow3 * 5.0)
-        corr2 = self.corr(c12, warp2)
-        corr2 = fluid.layers.leaky_relu(corr2, alpha=0.1)
-        x = fluid.layers.concat(input=[corr2, c12, up_flow3, up_feat3], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv2_0(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv2_1(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv2_2(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv2_3(x), 0.1), x], axis=1)
-        x = fluid.layers.concat(
-            input=[fluid.layers.leaky_relu(self.conv2_4(x), 0.1), x], axis=1)
-        flow2 = self.predict_flow2(x)
-        x = fluid.layers.leaky_relu(
-            self.dc_conv4(
-                fluid.layers.leaky_relu(
-                    self.dc_conv3(
-                        fluid.layers.leaky_relu(
-                            self.dc_conv2(
-                                fluid.layers.leaky_relu(self.dc_conv1(x), 0.1)),
-                            0.1)), 0.1)), 0.1)
-        flow2 += self.dc_conv7(
-            fluid.layers.leaky_relu(
-                self.dc_conv6(fluid.layers.leaky_relu(self.dc_conv5(x), 0.1)),
-                0.1))
-        if not output_more:
-            return flow2
-        else:
-            return [flow2, flow3, flow4, flow5, flow6]
-def pwc_dc_net(path=None):
-    model = PWCDCNet()
-    if path is not None:
-        import pickle
-        data = pickle.load(open(path, 'rb'))
-        weight_list = []
-        for k, v in data.items():
-            weight_list.append(v)
-        param_dict = {}
-        for i, param in enumerate(model.parameters()):
-            param_dict[param.name] = weight_list[i]
-        model.load_dict(param_dict)
-    return model
--- a/applications/DAIN/resblock/__init__.py
+++ b/applications/DAIN/resblock/__init__.py
-from .basicblock import *
--- a/applications/DAIN/resblock/basicblock.py
+++ b/applications/DAIN/resblock/basicblock.py
-import paddle.fluid as fluid
-from paddle.fluid.dygraph import Conv2D
-__all__ = ['MultipleBasicBlock', 'MultipleBasicBlock_4']
-def conv3x3(in_planes, out_planes, dilation=1, stride=1, param_attr=None):
-    return Conv2D(in_planes,
-                  out_planes,
-                  filter_size=3,
-                  stride=stride,
-                  padding=int(dilation * (3 - 1) / 2),
-                  dilation=dilation,
-                  bias_attr=False,
-                  param_attr=param_attr)
-class BasicBlock(fluid.dygraph.Layer):
-    expansion = 1
-    def __init__(self, inplanes, planes, dilation=1, stride=1, downsample=None):
-        super(BasicBlock, self).__init__()
-        param_attr = fluid.ParamAttr(
-            initializer=fluid.initializer.NormalInitializer(
-                loc=0.0, scale=1.0, seed=0))
-        self.conv1 = conv3x3(inplanes, planes, dilation, stride, param_attr)
-        self.conv2 = conv3x3(planes, planes, param_attr=param_attr)
-        self.downsample = downsample
-        self.stride = stride
-    def forward(self, x):
-        residual = x
-        out = self.conv1(x)
-        # out = self.bn1(out)
-        out = fluid.layers.relu(out)
-        out = self.conv2(out)
-        # out = self.bn2(out)
-        if self.downsample is not None:
-            residual = self.downsample(x)
-        out += residual
-        out = fluid.layers.relu(out)
-        return out
-class MultipleBasicBlock(fluid.dygraph.Layer):
-    def __init__(self,
-                 input_feature,
-                 block,
-                 num_blocks,
-                 intermediate_feature=64,
-                 dense=True):
-        super(MultipleBasicBlock, self).__init__()
-        self.dense = dense
-        self.num_block = num_blocks
-        self.intermediate_feature = intermediate_feature
-        param_attr = fluid.ParamAttr(
-            initializer=fluid.initializer.NormalInitializer(
-                loc=0.0, scale=1.0, seed=0))
-        self.block1 = Conv2D(input_feature,
-                             intermediate_feature,
-                             filter_size=7,
-                             stride=1,
-                             padding=3,
-                             bias_attr=True,
-                             param_attr=param_attr)
-        dim = intermediate_feature
-        self.block2 = block(dim, dim, dilation=1) if num_blocks >= 2 else None
-        self.block3 = block(dim, dim, dilation=1) if num_blocks >= 3 else None
-        self.block4 = block(dim, dim, dilation=1) if num_blocks >= 4 else None
-        self.block5 = Conv2D(dim, 3, 3, 1, 1)
-    def forward(self, x):
-        x = fluid.layers.relu(self.block1(x))
-        x = self.block2(x) if self.num_block >= 2 else x
-        x = self.block3(x) if self.num_block >= 3 else x
-        x = self.block4(x) if self.num_block >= 4 else x
-        x = self.block5(x)
-        return x
-def MultipleBasicBlock_4(input_feature, intermediate_feature=64):
-    model = MultipleBasicBlock(input_feature, BasicBlock, 4,
-                               intermediate_feature)
-    return model
--- a/applications/DAIN/run.sh
+++ b/applications/DAIN/run.sh
-cd pwcnet/correlation_op
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python -c 'import paddle; print(paddle.sysconfig.get_lib())'`
-export PYTHONPATH=$PYTHONPATH:`pwd`
-cd ../../
-VID_PATH=/paddle/work/github/DAIN/data/CBA.mp4
-OUT_PATH=output
-MODEL_PATH=DAIN_paddle_weight
-CUDA_VISIBLE_DEVICES=2 python predict.py \
-    --time_step 0.125 \
-    --video_path=$VID_PATH \
-    --output_path=$OUT_PATH \
-    --saved_model=$MODEL_PATH
\ No newline at end of file
--- a/applications/DAIN/util.py
+++ b/applications/DAIN/util.py
-import os, sys
-import glob
-import shutil
-import cv2
-class AverageMeter(object):
-    """Computes and stores the average and current value"""
-    def __init__(self):
-        self.reset()
-    def reset(self):
-        self.val = 0
-        self.avg = 0
-        self.sum = 0
-        self.count = 0
-    def update(self, val, n=1):
-        self.val = val
-        self.sum += val * n
-        self.count += n
-        self.avg = self.sum / self.count
-def combine_frames(input, interpolated, combined, num_frames):
-    frames1 = sorted(glob.glob(os.path.join(input, '*.png')))
-    frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png')))
-    num1 = len(frames1)
-    num2 = len(frames2)
-    # assert (num1 - 1) * num_frames == num2
-    for i in range(num1):
-        src = frames1[i]
-        imgname = int(src.split('/')[-1].split('.')[-2])
-        assert i == imgname
-        dst = os.path.join(combined, '{:08d}.png'.format(i * (num_frames + 1)))
-        shutil.copy2(src, dst)
-        if i < num1 - 1:
-            try:
-                for k in range(num_frames):
-                    src = frames2[i * num_frames + k]
-                    dst = os.path.join(
-                        combined,
-                        '{:08d}.png'.format(i * (num_frames + 1) + k + 1))
-                    shutil.copy2(src, dst)
-            except Exception as e:
-                print(e)
-                print(len(frames2), num_frames, i, k, i * num_frames + k)
-def remove_duplicates(paths):
-    def dhash(image, hash_size=8):
-        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-        resized = cv2.resize(gray, (hash_size + 1, hash_size))
-        diff = resized[:, 1:] > resized[:, :-1]
-        return sum([2**i for (i, v) in enumerate(diff.flatten()) if v])
-    hashes = {}
-    image_paths = sorted(glob.glob(os.path.join(paths, '*.png')))
-    for image_path in image_paths:
-        image = cv2.imread(image_path)
-        h = dhash(image)
-        p = hashes.get(h, [])
-        p.append(image_path)
-        hashes[h] = p
-    for (h, hashed_paths) in hashes.items():
-        if len(hashed_paths) > 1:
-            for p in hashed_paths[1:]:
-                os.remove(p)
-    frames = sorted(glob.glob(os.path.join(paths, '*.png')))
-    for fid, frame in enumerate(frames):
-        new_name = '{:08d}'.format(fid) + '.png'
-        new_name = os.path.join(paths, new_name)
-        os.rename(frame, new_name)
-    frames = sorted(glob.glob(os.path.join(paths, '*.png')))
-    return frames
--- a/applications/DeepRemaster/utils.py
+++ b/applications/DeepRemaster/utils.py
-import paddle
-from skimage import color
-import numpy as np
-from PIL import Image
-def convertLAB2RGB( lab ):
-   lab[:, :, 0:1] = lab[:, :, 0:1] * 100   # [0, 1] -> [0, 100]
-   lab[:, :, 1:3] = np.clip(lab[:, :, 1:3] * 255 - 128, -100, 100)  # [0, 1] -> [-128, 128]
-   rgb = color.lab2rgb( lab.astype(np.float64) )
-   return rgb
-def convertRGB2LABTensor( rgb ):
-   lab = color.rgb2lab( np.asarray( rgb ) ) # RGB -> LAB L[0, 100] a[-127, 128] b[-128, 127]
-   ab = np.clip(lab[:, :, 1:3] + 128, 0, 255) # AB --> [0, 255]
-   ab = paddle.to_tensor(ab.astype('float32')) / 255.
-   L = lab[:, :, 0] * 2.55 # L --> [0, 255]
-   L = Image.fromarray( np.uint8( L ) )
-   L = paddle.to_tensor(np.array(L).astype('float32')[..., np.newaxis] / 255.0)
-   return L, ab
-def addMergin(img, target_w, target_h, background_color=(0,0,0)):
-   width, height = img.size
-   if width==target_w and height==target_h:
-      return img
-   scale = max(target_w,target_h)/max(width, height)
-   width = int(width*scale/16.)*16
-   height = int(height*scale/16.)*16
-   img = img.resize((width, height), Image.BICUBIC)
-   xp = (target_w-width)//2
-   yp = (target_h-height)//2
-   result = Image.new(img.mode, (target_w, target_h), background_color)
-   result.paste(img, (xp, yp))
-   return result
--- a/applications/EDVR/data.py
+++ b/applications/EDVR/data.py
-import cv2
-import numpy as np
-def read_img(path, size=None, is_gt=False):
-    """read image by cv2
-    return: Numpy float32, HWC, BGR, [0,1]"""
-    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
-    img = img.astype(np.float32) / 255.
-    if img.ndim == 2:
-        img = np.expand_dims(img, axis=2)
-    if img.shape[2] > 3:
-        img = img[:, :, :3]
-    return img
-def get_test_neighbor_frames(crt_i, N, max_n, padding='new_info'):
-    """Generate an index list for reading N frames from a sequence of images
-    Args:
-        crt_i (int): current center index
-        max_n (int): max number of the sequence of images (calculated from 1)
-        N (int): reading N frames
-        padding (str): padding mode, one of replicate | reflection | new_info | circle
-            Example: crt_i = 0, N = 5
-            replicate: [0, 0, 0, 1, 2]
-            reflection: [2, 1, 0, 1, 2]
-            new_info: [4, 3, 0, 1, 2]
-            circle: [3, 4, 0, 1, 2]
-    Returns:
-        return_l (list [int]): a list of indexes
-    """
-    max_n = max_n - 1
-    n_pad = N // 2
-    return_l = []
-    for i in range(crt_i - n_pad, crt_i + n_pad + 1):
-        if i < 0:
-            if padding == 'replicate':
-                add_idx = 0
-            elif padding == 'reflection':
-                add_idx = -i
-            elif padding == 'new_info':
-                add_idx = (crt_i + n_pad) + (-i)
-            elif padding == 'circle':
-                add_idx = N + i
-            else:
-                raise ValueError('Wrong padding mode')
-        elif i > max_n:
-            if padding == 'replicate':
-                add_idx = max_n
-            elif padding == 'reflection':
-                add_idx = max_n * 2 - i
-            elif padding == 'new_info':
-                add_idx = (crt_i - n_pad) - (i - max_n)
-            elif padding == 'circle':
-                add_idx = i - N
-            else:
-                raise ValueError('Wrong padding mode')
-        else:
-            add_idx = i
-        return_l.append(add_idx)
-    # name_b = '{:08d}'.format(crt_i)
-    return return_l
-class EDVRDataset:
-    def __init__(self, frame_paths):
-        self.frames = frame_paths
-    def __getitem__(self, index):
-        indexs = get_test_neighbor_frames(index, 5, len(self.frames))
-        frame_list = []
-        for i in indexs:
-            img = read_img(self.frames[i])
-            frame_list.append(img)
-        img_LQs = np.stack(frame_list, axis=0)
-        # BGR to RGB, HWC to CHW, numpy to tensor
-        img_LQs = img_LQs[:, :, :, [2, 1, 0]]
-        img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32')
-        return img_LQs, self.frames[index]
-    def __len__(self):
-        return len(self.frames)
--- a/applications/EDVR/run.sh
+++ b/applications/EDVR/run.sh
-# examples of running programs:
-# bash ./run.sh inference EDVR ./configs/edvr_L.yaml
-# bash ./run.sh predict EDvR ./cofings/edvr_L.yaml
-# configs should be ./configs/xxx.yaml
-mode=$1
-name=$2
-configs=$3
-save_inference_dir="./data/inference_model"
-use_gpu=True
-fix_random_seed=False
-log_interval=1
-valid_interval=1
-weights="./weights/paddle_state_dict_L.npz"
-export CUDA_VISIBLE_DEVICES=6   #0,1,5,6 fast,  2,3,4,7 slow
-# export FLAGS_fast_eager_deletion_mode=1
-# export FLAGS_eager_delete_tensor_gb=0.0
-# export FLAGS_fraction_of_gpu_memory_to_use=0.98
-if [ "$mode"x == "predict"x ]; then
-    echo $mode $name $configs $weights
-    if [ "$weights"x != ""x ]; then
-        python predict.py --model_name=$name \
-                          --config=$configs \
-                          --log_interval=$log_interval \
-                          --video_path='' \
-                          --use_gpu=$use_gpu
-    else
-        python predict.py --model_name=$name \
-                          --config=$configs \
-                          --log_interval=$log_interval \
-                          --use_gpu=$use_gpu \
-                          --video_path=''
-    fi
-fi
--- a/applications/first_order_model/configs/vox-256.yaml
+++ b/applications/first_order_model/configs/vox-256.yaml
-dataset_params:
-  root_dir: data/vox-png
-  frame_shape: [256, 256, 3]
-  id_sampling: True
-  pairs_list: data/vox256.csv
-  augmentation_params:
-    flip_param:
-      horizontal_flip: True
-      time_flip: True
-    jitter_param:
-      brightness: 0.1
-      contrast: 0.1
-      saturation: 0.1
-      hue: 0.1
-model_params:
-  common_params:
-    num_kp: 10
-    num_channels: 3
-    estimate_jacobian: True
-  kp_detector_params:
-     temperature: 0.1
-     block_expansion: 32
-     max_features: 1024
-     scale_factor: 0.25
-     num_blocks: 5
-  generator_params:
-    block_expansion: 64
-    max_features: 512
-    num_down_blocks: 2
-    num_bottleneck_blocks: 6
-    estimate_occlusion_map: True
-    dense_motion_params:
-      block_expansion: 64
-      max_features: 1024
-      num_blocks: 5
-      scale_factor: 0.25
-  discriminator_params:
-    scales: [1]
-    block_expansion: 32
-    max_features: 512
-    num_blocks: 4
-    sn: True
-train_params:
-  num_epochs: 100
-  num_repeats: 75
-  epoch_milestones: [60, 90]
-  lr_generator: 2.0e-4
-  lr_discriminator: 2.0e-4
-  lr_kp_detector: 2.0e-4
-  batch_size: 40
-  scales: [1, 0.5, 0.25, 0.125]
-  checkpoint_freq: 50
-  transform_params:
-    sigma_affine: 0.05
-    sigma_tps: 0.005
-    points_tps: 5
-  loss_weights:
-    generator_gan: 0
-    discriminator_gan: 1
-    feature_matching: [10, 10, 10, 10]
-    perceptual: [10, 10, 10, 10, 10]
-    equivariance_value: 10
-    equivariance_jacobian: 10
-reconstruction_params:
-  num_videos: 1000
-  format: '.mp4'
-animate_params:
-  num_pairs: 50
-  format: '.mp4'
-  normalization_params:
-    adapt_movement_scale: False
-    use_relative_movement: True
-    use_relative_jacobian: True
-visualizer_params:
-  kp_size: 5
-  draw_border: True
-  colormap: 'gist_rainbow'
--- a/applications/run.sh
+++ b/applications/run.sh
-# 模型说明
-# 目前包含DAIN(插帧模型)，DeOldify(上色模型)，DeepRemaster(去噪与上色模型)，EDVR(基于连续帧(视频)超分辨率模型)，RealSR(基于图片的超分辨率模型)
-# 参数说明
-# input 输入视频的路径
-# output 输出视频保存的路径
-# proccess_order 要使用的模型及顺序
-python tools/video-enhance.py \
--input input.mp4  --output output --proccess_order DeOldify RealSR
--- a/applications/tools/first-order-demo.py
+++ b/applications/tools/first-order-demo.py
-import matplotlib
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-matplotlib.use('Agg')
+#
-import os
+#Licensed under the Apache License, Version 2.0 (the "License");
-import sys
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import argparse
-import yaml
-import pickle
-from argparse import ArgumentParser
-from tqdm import tqdm
-import imageio
-import numpy as np
-from skimage.transform import resize
-from skimage import img_as_ubyte
 import paddle
+from ppgan.apps.first_order_predictor import FirstOrderPredictor
-from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator
-from ppgan.modules.keypoint_detector import KPDetector
+parser = argparse.ArgumentParser()
-from ppgan.utils.animate import normalize_kp
+parser.add_argument("--config", default=None, help="path to config")
-from scipy.spatial import ConvexHull
+parser.add_argument("--weight_path",
+                    default=None,
-paddle.disable_static()
+                    help="path to checkpoint to restore")
+parser.add_argument("--source_image", type=str, help="path to source image")
-if sys.version_info[0] < 3:
+parser.add_argument("--driving_video", type=str, help="path to driving video")
-    raise Exception(
+parser.add_argument("--output", default='output', help="path to output")
-        "You must use Python 3 or higher. Recommended version is Python 3.7")
+parser.add_argument("--relative",
+                    dest="relative",
+                    action="store_true",
-def load_checkpoints(config_path, checkpoint_path, cpu=False):
+                    help="use relative or absolute keypoint coordinates")
+parser.add_argument(
-    with open(config_path) as f:
+    "--adapt_scale",
-        config = yaml.load(f)
+    dest="adapt_scale",
+    action="store_true",
-    generator = OcclusionAwareGenerator(
+    help="adapt movement scale based on convex hull of keypoints")
-        **config['model_params']['generator_params'],
-        **config['model_params']['common_params'])
+parser.add_argument(
+    "--find_best_frame",
-    kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
+    dest="find_best_frame",
-                             **config['model_params']['common_params'])
+    action="store_true",
+    help=
-    checkpoint = pickle.load(open(checkpoint_path, 'rb'))
+    "Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)"
-    generator.set_state_dict(checkpoint['generator'])
+)
-    kp_detector.set_state_dict(checkpoint['kp_detector'])
+parser.add_argument("--best_frame",
+                    dest="best_frame",
-    generator.eval()
+                    type=int,
-    kp_detector.eval()
+                    default=None,
+                    help="Set frame to start from.")
-    return generator, kp_detector
+parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.")
+parser.set_defaults(relative=False)
-def make_animation(source_image,
+parser.set_defaults(adapt_scale=False)
-                   driving_video,
-                   generator,
-                   kp_detector,
-                   relative=True,
-                   adapt_movement_scale=True,
-                   cpu=False):
-    with paddle.no_grad():
-        predictions = []
-        source = paddle.to_tensor(source_image[np.newaxis].astype(
-            np.float32)).transpose([0, 3, 1, 2])
-        # if not cpu:
-        #     source = source.cuda()
-        driving = paddle.to_tensor(
-            np.array(driving_video)[np.newaxis].astype(np.float32)).transpose(
-                [0, 4, 1, 2, 3])
-        kp_source = kp_detector(source)
-        kp_driving_initial = kp_detector(driving[:, :, 0])
-        for frame_idx in tqdm(range(driving.shape[2])):
-            driving_frame = driving[:, :, frame_idx]
-            kp_driving = kp_detector(driving_frame)
-            kp_norm = normalize_kp(kp_source=kp_source,
-                                   kp_driving=kp_driving,
-                                   kp_driving_initial=kp_driving_initial,
-                                   use_relative_movement=relative,
-                                   use_relative_jacobian=relative,
-                                   adapt_movement_scale=adapt_movement_scale)
-            out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
-            predictions.append(
-                np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0])
-    return predictions
-def find_best_frame(source, driving, cpu=False):
-    import face_alignment
-    def normalize_kp(kp):
-        kp = kp - kp.mean(axis=0, keepdims=True)
-        area = ConvexHull(kp[:, :2]).volume
-        area = np.sqrt(area)
-        kp[:, :2] = kp[:, :2] / area
-        return kp
-    fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
-                                      flip_input=True,
-                                      device='cpu' if cpu else 'cuda')
-    kp_source = fa.get_landmarks(255 * source)[0]
-    kp_source = normalize_kp(kp_source)
-    norm = float('inf')
-    frame_num = 0
-    for i, image in tqdm(enumerate(driving)):
-        kp_driving = fa.get_landmarks(255 * image)[0]
-        kp_driving = normalize_kp(kp_driving)
-        new_norm = (np.abs(kp_source - kp_driving)**2).sum()
-        if new_norm < norm:
-            norm = new_norm
-            frame_num = i
-    return frame_num
 if __name__ == "__main__":
-    parser = ArgumentParser()
+    args = parser.parse_args()
-    parser.add_argument("--config", required=True, help="path to config")
-    parser.add_argument("--checkpoint",
+    if args.cpu:
-                        default='vox-cpk.pth.tar',
+        paddle.set_device('cpu')
-                        help="path to checkpoint to restore")
+    predictor = FirstOrderPredictor(output=args.output,
-    parser.add_argument("--source_image",
+                                    weight_path=args.weight_path,
-                        default='sup-mat/source.png',
+                                    config=args.config,
-                        help="path to source image")
+                                    relative=args.relative,
-    parser.add_argument("--driving_video",
+                                    adapt_scale=args.adapt_scale,
-                        default='sup-mat/source.png',
+                                    find_best_frame=args.find_best_frame,
-                        help="path to driving video")
+                                    best_frame=args.best_frame)
-    parser.add_argument("--result_video",
+    predictor.run(args.source_image, args.driving_video)
-                        default='result.mp4',
-                        help="path to output")
-    parser.add_argument("--relative",
-                        dest="relative",
-                        action="store_true",
-                        help="use relative or absolute keypoint coordinates")
-    parser.add_argument(
-        "--adapt_scale",
-        dest="adapt_scale",
-        action="store_true",
-        help="adapt movement scale based on convex hull of keypoints")
-    parser.add_argument(
-        "--find_best_frame",
-        dest="find_best_frame",
-        action="store_true",
-        help=
-        "Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)"
-    )
-    parser.add_argument("--best_frame",
-                        dest="best_frame",
-                        type=int,
-                        default=None,
-                        help="Set frame to start from.")
-    parser.add_argument("--cpu",
-                        dest="cpu",
-                        action="store_true",
-                        help="cpu mode.")
-    parser.set_defaults(relative=False)
-    parser.set_defaults(adapt_scale=False)
-    opt = parser.parse_args()
-    source_image = imageio.imread(opt.source_image)
-    reader = imageio.get_reader(opt.driving_video)
-    fps = reader.get_meta_data()['fps']
-    driving_video = []
-    try:
-        for im in reader:
-            driving_video.append(im)
-    except RuntimeError:
-        pass
-    reader.close()
-    source_image = resize(source_image, (256, 256))[..., :3]
-    driving_video = [
-        resize(frame, (256, 256))[..., :3] for frame in driving_video
-    ]
-    generator, kp_detector = load_checkpoints(config_path=opt.config,
-                                              checkpoint_path=opt.checkpoint,
-                                              cpu=opt.cpu)
-    if opt.find_best_frame or opt.best_frame is not None:
-        i = opt.best_frame if opt.best_frame is not None else find_best_frame(
-            source_image, driving_video, cpu=opt.cpu)
-        print("Best frame: " + str(i))
-        driving_forward = driving_video[i:]
-        driving_backward = driving_video[:(i + 1)][::-1]
-        predictions_forward = make_animation(
-            source_image,
-            driving_forward,
-            generator,
-            kp_detector,
-            relative=opt.relative,
-            adapt_movement_scale=opt.adapt_scale,
-            cpu=opt.cpu)
-        predictions_backward = make_animation(
-            source_image,
-            driving_backward,
-            generator,
-            kp_detector,
-            relative=opt.relative,
-            adapt_movement_scale=opt.adapt_scale,
-            cpu=opt.cpu)
-        predictions = predictions_backward[::-1] + predictions_forward[1:]
-    else:
-        predictions = make_animation(source_image,
-                                     driving_video,
-                                     generator,
-                                     kp_detector,
-                                     relative=opt.relative,
-                                     adapt_movement_scale=opt.adapt_scale,
-                                     cpu=opt.cpu)
-    imageio.mimsave(opt.result_video,
-                    [img_as_ubyte(frame) for frame in predictions],
-                    fps=fps)
--- a/applications/tools/ps_demo.py
+++ b/applications/tools/ps_demo.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+import argparse
+from pathlib import Path
+from PIL import Image
+from fire import Fire
+import numpy as np
+import paddle
+import paddle.vision.transforms as T
+import ppgan.faceutils as futils
+from ppgan.utils.options import parse_args
+from ppgan.utils.config import get_config
+from ppgan.utils.setup import setup
+from ppgan.utils.filesystem import load
+from ppgan.engine.trainer import Trainer
+from ppgan.models.builder import build_model
+from ppgan.utils.preprocess import *
+def toImage(net_output):
+    img = net_output.squeeze(0).transpose(
+        (1, 2, 0)).numpy()  # [1,c,h,w]->[h,w,c]
+    img = (img * 255.0).clip(0, 255)
+    img = np.uint8(img)
+    img = Image.fromarray(img, mode='RGB')
+    return img
+def mask2image(mask: np.array, format="HWC"):
+    H, W = mask.shape
+    canvas = np.zeros((H, W, 3), dtype=np.uint8)
+    for i in range(int(mask.max())):
+        color = np.random.rand(1, 1, 3) * 255
+        canvas += (mask == i)[:, :, None] * color.astype(np.uint8)
+    return canvas
+class PreProcess:
+    def __init__(self, config, need_parser=True):
+        self.img_size = 256
+        self.transform = transform = T.Compose([
+            T.Resize(size=256),
+            T.Permute(to_rgb=False),
+        ])
+        self.norm = T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])
+        if need_parser:
+            self.face_parser = futils.mask.FaceParser()
+        self.up_ratio = 0.6 / 0.85
+        self.down_ratio = 0.2 / 0.85
+        self.width_ratio = 0.2 / 0.85
+    def __call__(self, image):
+        face = futils.dlib.detect(image)
+        if not face:
+            return
+        face_on_image = face[0]
+        image, face, crop_face = futils.dlib.crop(image, face_on_image,
+                                                  self.up_ratio,
+                                                  self.down_ratio,
+                                                  self.width_ratio)
+        np_image = np.array(image)
+        mask = self.face_parser.parse(
+            np.float32(cv2.resize(np_image, (512, 512))))
+        mask = cv2.resize(mask.numpy(), (self.img_size, self.img_size),
+                          interpolation=cv2.INTER_NEAREST)
+        mask = mask.astype(np.uint8)
+        mask_color = mask2image(mask)
+        cv2.imwrite('mask_temp.png', mask_color)
+        mask_tensor = paddle.to_tensor(mask)
+        lms = futils.dlib.landmarks(image, face) * self.img_size / image.width
+        lms = lms.round()
+        P_np = generate_P_from_lmks(lms, self.img_size, self.img_size,
+                                    self.img_size)
+        mask_aug = generate_mask_aug(mask, lms)
+        image = self.transform(np_image)
+        return [
+            self.norm(image),
+            np.float32(mask_aug),
+            np.float32(P_np),
+            np.float32(mask)
+        ], face_on_image, crop_face
+class PostProcess:
+    def __init__(self, config):
+        self.denoise = True
+        self.img_size = 256
+    def __call__(self, source: Image, result: Image):
+        # TODO: Refract -> name, resize
+        source = np.array(source)
+        result = np.array(result)
+        height, width = source.shape[:2]
+        small_source = cv2.resize(source, (self.img_size, self.img_size))
+        laplacian_diff = source.astype(np.float) - cv2.resize(
+            small_source, (width, height)).astype(np.float)
+        result = (cv2.resize(result,
+                             (width, height)) + laplacian_diff).round().clip(
+                                 0, 255).astype(np.uint8)
+        if self.denoise:
+            result = cv2.fastNlMeansDenoisingColored(result)
+        result = Image.fromarray(result).convert('RGB')
+        return result
+class Inference:
+    def __init__(self, config, model_path=''):
+        self.model = build_model(config)
+        self.preprocess = PreProcess(config)
+        self.model_path = model_path
+    def transfer(self, source, reference, with_face=False):
+        source_input, face, crop_face = self.preprocess(source)
+        reference_input, face, crop_face = self.preprocess(reference)
+        consis_mask = np.float32(
+            calculate_consis_mask(source_input[1], reference_input[1]))
+        consis_mask = paddle.to_tensor(np.expand_dims(consis_mask, 0))
+        if not (source_input and reference_input):
+            if with_face:
+                return None, None
+            return
+        for i in range(len(source_input) - 1):
+            source_input[i] = paddle.to_tensor(
+                np.expand_dims(source_input[i], 0))
+        for i in range(len(reference_input) - 1):
+            reference_input[i] = paddle.to_tensor(
+                np.expand_dims(reference_input[i], 0))
+        input_data = {
+            'image_A': source_input[0],
+            'image_B': reference_input[0],
+            'mask_A_aug': source_input[1],
+            'mask_B_aug': reference_input[1],
+            'P_A': source_input[2],
+            'P_B': reference_input[2],
+            'consis_mask': consis_mask
+        }
+        state_dicts = load(self.model_path)
+        net = getattr(self.model, 'netG')
+        net.set_dict(state_dicts['netG'])
+        result, _ = self.model.test(input_data)
+        print('result shape: ', result.shape)
+        min_, max_ = result.min(), result.max()
+        result += -min_
+        result = paddle.divide(result, max_ - min_ + 1e-5)
+        img = toImage(result)
+        if with_face:
+            return img, crop_face
+        img.save('before.png')
+        return img
+def main(args, cfg, save_path='transferred_image.png'):
+    setup(args, cfg)
+    inference = Inference(cfg, args.model_path)
+    postprocess = PostProcess(cfg)
+    source = Image.open(args.source_path).convert("RGB")
+    reference_paths = list(Path(args.reference_dir).glob("*"))
+    np.random.shuffle(reference_paths)
+    for reference_path in reference_paths:
+        if not reference_path.is_file():
+            print(reference_path, "is not a valid file.")
+            continue
+        reference = Image.open(reference_path).convert("RGB")
+        # Transfer the psgan from reference to source.
+        image, face = inference.transfer(source, reference, with_face=True)
+        image.save('before.png')
+        source_crop = source.crop(
+            (face.left(), face.top(), face.right(), face.bottom()))
+        image = postprocess(source_crop, image)
+        image.save(save_path)
+if __name__ == '__main__':
+    args = parse_args()
+    cfg = get_config(args.config_file)
+    main(args, cfg)
--- a/applications/tools/video-enhance.py
+++ b/applications/tools/video-enhance.py
-import sys
-sys.path.append('.')
-import argparse
-import paddle
-from DAIN.predict import VideoFrameInterp
-from DeepRemaster.predict import DeepReasterPredictor
-from DeOldify.predict import DeOldifyPredictor
-from RealSR.predict import RealSRPredictor
-from EDVR.predict import EDVRPredictor
-parser = argparse.ArgumentParser(description='Fix video')
-parser.add_argument('--input', type=str, default=None, help='Input video')
-parser.add_argument('--output', type=str, default='output', help='output dir')
-parser.add_argument('--DAIN_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-parser.add_argument('--DeepRemaster_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-parser.add_argument('--DeOldify_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-parser.add_argument('--RealSR_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-parser.add_argument('--EDVR_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-# DAIN args
-parser.add_argument('--time_step',
-                    type=float,
-                    default=0.5,
-                    help='choose the time steps')
-# DeepRemaster args
-parser.add_argument('--reference_dir',
-                    type=str,
-                    default=None,
-                    help='Path to the reference image directory')
-parser.add_argument('--colorization',
-                    action='store_true',
-                    default=False,
-                    help='Remaster with colorization')
-parser.add_argument('--mindim',
-                    type=int,
-                    default=360,
-                    help='Length of minimum image edges')
-# DeOldify args
-parser.add_argument('--render_factor',
-                    type=int,
-                    default=32,
-                    help='model inputsize=render_factor*16')
-#process order support model name:[DAIN, DeepRemaster, DeOldify, RealSR, EDVR]
-parser.add_argument('--proccess_order',
-                    type=str,
-                    default='none',
-                    nargs='+',
-                    help='Process order')
-if __name__ == "__main__":
-    args = parser.parse_args()
-    orders = args.proccess_order
-    temp_video_path = None
-    for order in orders:
-        print('Model {} proccess start..'.format(order))
-        if temp_video_path is None:
-            temp_video_path = args.input
-        if order == 'DAIN':
-            predictor = VideoFrameInterp(args.time_step,
-                                         args.DAIN_weight,
-                                         temp_video_path,
-                                         output_path=args.output)
-            frames_path, temp_video_path = predictor.run()
-        elif order == 'DeepRemaster':
-            paddle.disable_static()
-            predictor = DeepReasterPredictor(
-                temp_video_path,
-                args.output,
-                weight_path=args.DeepRemaster_weight,
-                colorization=args.colorization,
-                reference_dir=args.reference_dir,
-                mindim=args.mindim)
-            frames_path, temp_video_path = predictor.run()
-            paddle.enable_static()
-        elif order == 'DeOldify':
-            paddle.disable_static()
-            predictor = DeOldifyPredictor(temp_video_path,
-                                          args.output,
-                                          weight_path=args.DeOldify_weight)
-            frames_path, temp_video_path = predictor.run()
-            paddle.enable_static()
-        elif order == 'RealSR':
-            paddle.disable_static()
-            predictor = RealSRPredictor(temp_video_path,
-                                        args.output,
-                                        weight_path=args.RealSR_weight)
-            frames_path, temp_video_path = predictor.run()
-            paddle.enable_static()
-        elif order == 'EDVR':
-            predictor = EDVRPredictor(temp_video_path,
-                                      args.output,
-                                      weight_path=args.EDVR_weight)
-            frames_path, temp_video_path = predictor.run()
-        print('Model {} output frames path:'.format(order), frames_path)
-        print('Model {} output video path:'.format(order), temp_video_path)
-        print('Model {} proccess done!'.format(order))
--- a/configs/cyclegan_cityscapes.yaml
+++ b/configs/cyclegan_cityscapes.yaml
@@ -36,16 +36,18 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 50
-    transform:
+    transforms:
-      load_size: 286
+      - name: Resize
-      crop_size: 256
+        size: [286, 286]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: False
+      - name: RandomCrop
-      normalize:
+        output_size: [256, 256]
-        mean:
+      - name: RandomHorizontalFlip
-          (127.5, 127.5, 127.5)
+        prob: 0.5
-        std:
+      - name: Permute
-          (127.5, 127.5, 127.5)
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
  test:
    name: SingleDataset
    dataroot: data/cityscapes/testB
@@ -55,17 +57,14 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 50
-    transform:
+    transforms:
-      load_size: 256
+      - name: Resize
-      crop_size: 256
+        size: [256, 256]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: True
+      - name: Permute
-      normalize:
+      - name: Normalize
-        mean:
+        mean: [127.5, 127.5, 127.5]
-          (127.5, 127.5, 127.5)
+        std: [127.5, 127.5, 127.5]
-        std:
-          (127.5, 127.5, 127.5)
 optimizer:
  name: Adam

--- a/configs/cyclegan_horse2zebra.yaml
+++ b/configs/cyclegan_horse2zebra.yaml
@@ -35,16 +35,18 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 50
-    transform:
+    transforms:
-      load_size: 286
+      - name: Resize
-      crop_size: 256
+        size: [286, 286]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: False
+      - name: RandomCrop
-      normalize:
+        output_size: [256, 256]
-        mean:
+      - name: RandomHorizontalFlip
-          (127.5, 127.5, 127.5)
+        prob: 0.5
-        std:
+      - name: Permute
-          (127.5, 127.5, 127.5)
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
  test:
    name: SingleDataset
    dataroot: data/horse2zebra/testA
@@ -55,15 +57,14 @@ dataset:
    serial_batches: False
    pool_size: 50
    transform:
-      load_size: 256
+      transform:
-      crop_size: 256
+      - name: Resize
-      preprocess: resize_and_crop
+        size: [256, 256]
-      no_flip: True
+        interpolation: 2 #cv2.INTER_CUBIC
-      normalize:
+      - name: Permute
-        mean:
+      - name: Normalize
-          (127.5, 127.5, 127.5)
+        mean: [127.5, 127.5, 127.5]
-        std:
+        std: [127.5, 127.5, 127.5]
-          (127.5, 127.5, 127.5)
 optimizer:
  name: Adam

--- a/configs/makeup.yaml
+++ b/configs/makeup.yaml
+epochs: 100
+isTrain: True
+output_dir: tmp
+checkpoints_dir: checkpoints
+lambda_A: 10.0
+lambda_B: 10.0
+lambda_identity: 0.5
+model:
+  name: MakeupModel
+  generator:
+    name: GeneratorPSGANAttention
+    conv_dim: 64
+    repeat_num: 6
+  discriminator:
+    name: NLayerDiscriminator
+    ndf: 64
+    n_layers: 3
+    input_nc: 3
+    norm_type: spectral
+  gan_mode: lsgan
+dataset:
+  train:
+    name: MakeupDataset
+    trans_size: 256
+    dataroot: MT-Dataset
+    cls_list: [non-makeup, makeup]
+    phase: train
+    pool_size: 16
+  test:
+    name: MakeupDataset
+    trans_size: 256
+    dataroot: MT-Dataset
+    cls_list: [non-makeup, makeup]
+    phase: test
+    pool_size: 16
+optimizer:
+  name: Adam
+  beta1: 0.5
+lr_scheduler:
+  name: linear
+  learning_rate: 0.0002
+  start_epoch: 100
+  decay_epochs: 100
+log_config:
+  interval: 10
+  visiual_interval: 500
+snapshot_config:
+  interval: 1
--- a/configs/pix2pix_cityscapes.yaml
+++ b/configs/pix2pix_cityscapes.yaml
@@ -33,16 +33,23 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 0
-    transform:
+    transforms:
-      load_size: 286
+      - name: Resize
-      crop_size: 256
+        size: [286, 286]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: False
+        keys: [image, image]
-      normalize:
+      - name: PairedRandomCrop
-        mean:
+        output_size: [256, 256]
-          (127.5, 127.5, 127.5)
+        keys: [image, image]
-        std:
+      - name: PairedRandomHorizontalFlip
-          (127.5, 127.5, 127.5)
+        prob: 0.5
+        keys: [image, image]
+      - name: Permute
+        keys: [image, image]
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
  test:
    name: PairedDataset
    dataroot: data/cityscapes/
@@ -53,16 +60,18 @@ dataset:
    output_nc: 3
    serial_batches: True
    pool_size: 50
-    transform:
+    transforms:
-      load_size: 256
+      - name: Resize
-      crop_size: 256
+        size: [256, 256]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: True
+        keys: [image, image]
-      normalize:
+      - name: Permute
-        mean:
+        keys: [image, image]
-          (127.5, 127.5, 127.5)
+      - name: Normalize
-        std:
+        mean: [127.5, 127.5, 127.5]
-          (127.5, 127.5, 127.5)
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
 optimizer:
  name: Adam

--- a/configs/pix2pix_cityscapes_2gpus.yaml
+++ b/configs/pix2pix_cityscapes_2gpus.yaml
@@ -32,16 +32,23 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 0
-    transform:
+    transforms:
-      load_size: 286
+      - name: Resize
-      crop_size: 256
+        size: [286, 286]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: False
+        keys: [image, image]
-      normalize:
+      - name: PairedRandomCrop
-        mean:
+        output_size: [256, 256]
-          (127.5, 127.5, 127.5)
+        keys: [image, image]
-        std:
+      - name: PairedRandomHorizontalFlip
-          (127.5, 127.5, 127.5)
+        prob: 0.5
+        keys: [image, image]
+      - name: Permute
+        keys: [image, image]
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
  test:
    name: PairedDataset
    dataroot: data/cityscapes/
@@ -52,16 +59,17 @@ dataset:
    output_nc: 3
    serial_batches: True
    pool_size: 50
-    transform:
+    transforms:
-      load_size: 256
+      - name: Resize
-      crop_size: 256
+        size: [256, 256]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: True
+        keys: [image, image]
-      normalize:
+      - name: Permute
-        mean:
+        keys: [image, image]
-          (127.5, 127.5, 127.5)
+      - name: Normalize
-        std:
+        mean: [127.5, 127.5, 127.5]
-          (127.5, 127.5, 127.5)
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
 optimizer:
  name: Adam

--- a/configs/pix2pix_facades.yaml
+++ b/configs/pix2pix_facades.yaml
@@ -32,16 +32,23 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 0
-    transform:
+    transforms:
-      load_size: 286
+      - name: Resize
-      crop_size: 256
+        size: [286, 286]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: False
+        keys: [image, image]
-      normalize:
+      - name: PairedRandomCrop
-        mean:
+        output_size: [256, 256]
-          (127.5, 127.5, 127.5)
+        keys: [image, image]
-        std:
+      - name: PairedRandomHorizontalFlip
-          (127.5, 127.5, 127.5)
+        prob: 0.5
+        keys: [image, image]
+      - name: Permute
+        keys: [image, image]
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
  test:
    name: PairedDataset
    dataroot: data/facades/
@@ -52,16 +59,17 @@ dataset:
    output_nc: 3
    serial_batches: True
    pool_size: 50
-    transform:
+    transforms:
-      load_size: 256
+      - name: Resize
-      crop_size: 256
+        size: [256, 256]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: True
+        keys: [image, image]
-      normalize:
+      - name: Permute
-        mean:
+        keys: [image, image]
-          (127.5, 127.5, 127.5)
+      - name: Normalize
-        std:
+        mean: [127.5, 127.5, 127.5]
-          (127.5, 127.5, 127.5)
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
 optimizer:
  name: Adam

--- a/ppgan/apps/__init__.py
+++ b/ppgan/apps/__init__.py
+from .dain_predictor import DAINPredictor
+from .deepremaster_predictor import DeepRemasterPredictor
+from .deoldify_predictor import DeOldifyPredictor
+from .realsr_predictor import RealSRPredictor
+from .edvr_predictor import EDVRPredictor
+from .first_order_predictor import FirstOrderPredictor
--- a/ppgan/apps/base_predictor.py
+++ b/ppgan/apps/base_predictor.py
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import os
+import cv2
+import paddle
+class BasePredictor(object):
+    def __init__(self):
+        pass
+    def build_inference_model(self):
+        if paddle.in_dynamic_mode():
+            # todo self.model = build_model(self.cfg)
+            pass
+        else:
+            place = paddle.fluid.framework._current_expected_place()
+            self.exe = paddle.fluid.Executor(place)
+            file_names = os.listdir(self.weight_path)
+            for file_name in file_names:
+                if file_name.find('model') > -1:
+                    model_file = file_name
+                elif file_name.find('param') > -1:
+                    param_file = file_name
+            self.program, self.feed_names, self.fetch_targets = paddle.static.load_inference_model(
+                dirname=self.weight_path,
+                executor=self.exe,
+                model_filename=model_file,
+                params_filename=param_file)
+            print(self.feed_names)
+    def base_forward(self, inputs):
+        if paddle.in_dynamic_mode():
+            out = self.model(inputs)
+        else:
+            feed_dict = {}
+            if isinstance(inputs, dict):
+                feed_dict = inputs
+            elif isinstance(inputs, (list, tuple)):
+                for i, feed_name in enumerate(self.feed_names):
+                    feed_dict[feed_name] = inputs[i]
+            else:
+                feed_dict[self.feed_names[0]] = inputs
+            out = self.exe.run(self.program,
+                               fetch_list=self.fetch_targets,
+                               feed=feed_dict)
+        return out
+    def is_video(self, input):
+        try:
+            cv2.VideoCapture(input)
+            return True
+        except:
+            return False
+    def run(self):
+        raise NotImplementedError
--- a/applications/DAIN/demo.py
+++ b/applications/DAIN/demo.py
-import os, sys
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-import math
+#
-import random
+#Licensed under the Apache License, Version 2.0 (the "License");
-import time
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import os
+import cv2
 import glob
 import shutil
 import numpy as np
+from tqdm import tqdm
 from imageio import imread, imsave
-import cv2
+import paddle
 import paddle.fluid as fluid
+from paddle.utils.download import get_path_from_url
+from ppgan.utils.video import video2frames, frames2video
+from .base_predictor import BasePredictor
+DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar'
+class DAINPredictor(BasePredictor):
+    def __init__(self,
+                 output_path='output',
+                 weight_path=None,
+                 time_step=None,
+                 use_gpu=True,
+                 key_frame_thread=0.,
+                 remove_duplicates=False):
+        self.output_path = os.path.join(output_path, 'DAIN')
+        if weight_path is None:
+            cur_path = os.path.abspath(os.path.dirname(__file__))
+            weight_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path)
+        self.weight_path = weight_path
+        self.time_step = time_step
+        self.key_frame_thread = key_frame_thread
+        self.remove_duplicates = remove_duplicates
+        self.build_inference_model()
+    def run(self, video_path):
+        frame_path_input = os.path.join(self.output_path, 'frames-input')
+        frame_path_interpolated = os.path.join(self.output_path,
+                                               'frames-interpolated')
+        frame_path_combined = os.path.join(self.output_path, 'frames-combined')
+        video_path_output = os.path.join(self.output_path, 'videos-output')
+        if not os.path.exists(self.output_path):
+            os.makedirs(self.output_path)
+        if not os.path.exists(frame_path_input):
+            os.makedirs(frame_path_input)
+        if not os.path.exists(frame_path_interpolated):
+            os.makedirs(frame_path_interpolated)
+        if not os.path.exists(frame_path_combined):
+            os.makedirs(frame_path_combined)
+        if not os.path.exists(video_path_output):
+            os.makedirs(video_path_output)
+        timestep = self.time_step
+        num_frames = int(1.0 / timestep) - 1
-import networks
+        cap = cv2.VideoCapture(video_path)
-from util import *
-from my_args import args
-if __name__ == '__main__':
-    DO_MiddleBurryOther = True
-    video_path = args.video_path
-    output_path = args.output_path
-    frame_path_input = os.path.join(output_path, 'frames-input')
-    frame_path_interpolated = os.path.join(output_path, 'frames-interpolated')
-    frame_path_combined = os.path.join(output_path, 'frames-combined')
-    video_path_input = os.path.join(output_path, 'videos-input')
-    video_path_output = os.path.join(output_path, 'videos-output')
-    if not os.path.exists(output_path):
-        os.makedirs(output_path)
-    if not os.path.exists(frame_path_input):
-        os.makedirs(frame_path_input)
-    if not os.path.exists(frame_path_interpolated):
-        os.makedirs(frame_path_interpolated)
-    if not os.path.exists(frame_path_combined):
-        os.makedirs(frame_path_combined)
-    if not os.path.exists(video_path_input):
-        os.makedirs(video_path_input)
-    if not os.path.exists(video_path_output):
-        os.makedirs(video_path_output)
-    args.KEY_FRAME_THREAD = 0.
-    saved_model = args.saved_model
-    timestep = args.time_step
-    num_frames = int(1.0 / timestep) - 1
-    image = fluid.data(name='image',
-                       shape=[2, 1, args.channels, -1, -1],
-                       dtype='float32')
-    DAIN = networks.__dict__["DAIN_slowmotion"](channel=args.channels,
-                                                filter_size=args.filter_size,
-                                                timestep=args.time_step,
-                                                training=False)
-    out = DAIN(image)
-    out = out[0][1]
-    place = fluid.CUDAPlace(0)
-    exe = fluid.Executor(place)
-    exe.run(fluid.default_startup_program())
-    fetch_list = [out.name]
-    inference_program = fluid.default_main_program().clone(for_test=True)
-    inference_program = fluid.io.load_persistables(exe, saved_model,
-                                                   inference_program)
-    if not DO_MiddleBurryOther:
-        sys.exit()
-    if video_path.endswith('.mp4'):
-        videos = [video_path]
-    else:
-        videos = sorted(glob.glob(os.path.join(video_path, '*.mp4')))
-    for cnt, vid in enumerate(videos):
-        print("Interpolating video:", vid)
-        cap = cv2.VideoCapture(vid)
        fps = cap.get(cv2.CAP_PROP_FPS)
        print("Old fps (frame rate): ", fps)
-        timestep = args.time_step
        times_interp = int(1.0 / timestep)
        r2 = str(int(fps) * times_interp)
        print("New fps (frame rate): ", r2)
-        # set start and end of video
+        out_path = video2frames(video_path, frame_path_input)
-        #ss = 0
-        #t = 10
-        #ss = time.strftime('%H:%M:%S', time.gmtime(ss))
-        #t = time.strftime('%H:%M:%S', time.gmtime(t))
-        #print(r, ss, t)
-        r = None
-        ss = None
-        t = None
-        out_path = dump_frames_ffmpeg(vid, frame_path_input, r, ss, t)
+        vidname = video_path.split('/')[-1].split('.')[0]
-        vidname = vid.split('/')[-1].split('.')[0]
-        tot_timer = AverageMeter()
-        proc_timer = AverageMeter()
-        end = time.time()
        frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
+        orig_frames = len(frames)
+        need_frames = orig_frames * times_interp
+        if self.remove_duplicates:
+            frames = self.remove_duplicate_frames(out_path)
+            left_frames = len(frames)
+            timestep = left_frames / need_frames
+            num_frames = int(1.0 / timestep) - 1
        img = imread(frames[0])
@@ -110,7 +99,7 @@ if __name__ == '__main__':
        int_height = img.shape[0]
        channel = img.shape[2]
        if not channel == 3:
-            continue
+            return
        if int_width != ((int_width >> 7) << 7):
            int_width_pad = (((int_width >> 7) + 1) << 7)  # more than necessary
@@ -132,16 +121,13 @@ if __name__ == '__main__':
            padding_bottom = 32
        frame_num = len(frames)
-        print(os.path.join(frame_path_input, vidname, '*.png'))
-        print('processing {} frames, from video: {}'.format(frame_num, vid))
        if not os.path.exists(os.path.join(frame_path_interpolated, vidname)):
            os.makedirs(os.path.join(frame_path_interpolated, vidname))
        if not os.path.exists(os.path.join(frame_path_combined, vidname)):
            os.makedirs(os.path.join(frame_path_combined, vidname))
-        for i in range(frame_num - 1):
+        for i in tqdm(range(frame_num - 1)):
-            print(frames[i])
            first = frames[i]
            second = frames[i + 1]
@@ -155,79 +141,116 @@ if __name__ == '__main__':
            img_second_gray = img_second_gray.flatten(order='C')
            corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1]
            key_frame = False
-            if corr < args.KEY_FRAME_THREAD:
+            if corr < self.key_frame_thread:
                key_frame = True
            '''-------------------------------------------------------'''
            X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
            X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255
-            if key_frame:
+            assert (X0.shape[1] == X1.shape[1])
-                y_ = [
+            assert (X0.shape[2] == X1.shape[2])
-                    np.transpose(255.0 * X0.clip(0, 1.0), (1, 2, 0))
-                    for i in range(num_frames)
+            X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
-                ]
+                (padding_left, padding_right)), mode='edge')
-            else:
+            X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
-                assert (X0.shape[1] == X1.shape[1])
+                (padding_left, padding_right)), mode='edge')
-                assert (X0.shape[2] == X1.shape[2])
+            X0 = np.expand_dims(X0, axis=0)
-                print("size before padding ", X0.shape)
+            X1 = np.expand_dims(X1, axis=0)
-                X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
-                    (padding_left, padding_right)), mode='edge')
+            X0 = np.expand_dims(X0, axis=0)
-                X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
+            X1 = np.expand_dims(X1, axis=0)
-                    (padding_left, padding_right)), mode='edge')
-                print("size after padding ", X0.shape)
+            X = np.concatenate((X0, X1), axis=0)
-                X0 = np.expand_dims(X0, axis=0)
+            o = self.base_forward(X)
-                X1 = np.expand_dims(X1, axis=0)
+            y_ = o[0]
-                X0 = np.expand_dims(X0, axis=0)
-                X1 = np.expand_dims(X1, axis=0)
+            y_ = [
+                np.transpose(
-                X = np.concatenate((X0, X1), axis=0)
+                    255.0 * item.clip(
+                        0, 1.0)[0, :, padding_top:padding_top + int_height,
-                proc_end = time.time()
+                                padding_left:padding_left + int_width],
-                o = exe.run(inference_program,
+                    (1, 2, 0)) for item in y_
-                            fetch_list=fetch_list,
+            ]
-                            feed={"image": X})
+            time_offsets = [kk * timestep for kk in range(1, 1 + num_frames, 1)]
-                y_ = o[0]
+            count = 1
-                proc_timer.update(time.time() - proc_end)
+            for item, time_offset in zip(y_, time_offsets):
-                tot_timer.update(time.time() - end)
+                out_dir = os.path.join(frame_path_interpolated, vidname,
-                end = time.time()
+                                       "{:0>6d}_{:0>4d}.png".format(i, count))
-                print("*******current image process time \t " +
+                count = count + 1
-                      str(time.time() - proc_end) + "s ******")
+                imsave(out_dir, np.round(item).astype(np.uint8))
-                y_ = [
-                    np.transpose(
-                        255.0 * item.clip(
-                            0, 1.0)[0, :, padding_top:padding_top + int_height,
-                                    padding_left:padding_left + int_width],
-                        (1, 2, 0)) for item in y_
-                ]
-                time_offsets = [
-                    kk * timestep for kk in range(1, 1 + num_frames, 1)
-                ]
-                count = 1
-                for item, time_offset in zip(y_, time_offsets):
-                    out_dir = os.path.join(
-                        frame_path_interpolated, vidname,
-                        "{:0>4d}_{:0>4d}.png".format(i, count))
-                    count = count + 1
-                    imsave(out_dir, np.round(item).astype(np.uint8))
-        timestep = args.time_step
        num_frames = int(1.0 / timestep) - 1
        input_dir = os.path.join(frame_path_input, vidname)
        interpolated_dir = os.path.join(frame_path_interpolated, vidname)
        combined_dir = os.path.join(frame_path_combined, vidname)
-        combine_frames(input_dir, interpolated_dir, combined_dir, num_frames)
+        self.combine_frames(input_dir, interpolated_dir, combined_dir,
+                            num_frames)
        frame_pattern_combined = os.path.join(frame_path_combined, vidname,
                                              '%08d.png')
        video_pattern_output = os.path.join(video_path_output, vidname + '.mp4')
        if os.path.exists(video_pattern_output):
            os.remove(video_pattern_output)
-        frames_to_video_ffmpeg(frame_pattern_combined, video_pattern_output, r2)
+        frames2video(frame_pattern_combined, video_pattern_output, r2)
+        return frame_pattern_combined, video_pattern_output
+    def combine_frames(self, input, interpolated, combined, num_frames):
+        frames1 = sorted(glob.glob(os.path.join(input, '*.png')))
+        frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png')))
+        num1 = len(frames1)
+        num2 = len(frames2)
+        for i in range(num1):
+            src = frames1[i]
+            imgname = int(src.split('/')[-1].split('.')[-2])
+            assert i == imgname
+            dst = os.path.join(combined,
+                               '{:08d}.png'.format(i * (num_frames + 1)))
+            shutil.copy2(src, dst)
+            if i < num1 - 1:
+                try:
+                    for k in range(num_frames):
+                        src = frames2[i * num_frames + k]
+                        dst = os.path.join(
+                            combined,
+                            '{:08d}.png'.format(i * (num_frames + 1) + k + 1))
+                        shutil.copy2(src, dst)
+                except Exception as e:
+                    print(e)
+    def remove_duplicate_frames(self, paths):
+        def dhash(image, hash_size=8):
+            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+            resized = cv2.resize(gray, (hash_size + 1, hash_size))
+            diff = resized[:, 1:] > resized[:, :-1]
+            return sum([2**i for (i, v) in enumerate(diff.flatten()) if v])
+        hashes = {}
+        image_paths = sorted(glob.glob(os.path.join(paths, '*.png')))
+        for image_path in image_paths:
+            image = cv2.imread(image_path)
+            h = dhash(image)
+            p = hashes.get(h, [])
+            p.append(image_path)
+            hashes[h] = p
+        for (h, hashed_paths) in hashes.items():
+            if len(hashed_paths) > 1:
+                for p in hashed_paths[1:]:
+                    os.remove(p)
+        frames = sorted(glob.glob(os.path.join(paths, '*.png')))
+        for fid, frame in enumerate(frames):
+            new_name = '{:08d}'.format(fid) + '.png'
+            new_name = os.path.join(paths, new_name)
+            os.rename(frame, new_name)
+        frames = sorted(glob.glob(os.path.join(paths, '*.png')))
+        return frames
--- a/applications/DeepRemaster/predict.py
+++ b/applications/DeepRemaster/predict.py
-import os
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-import sys
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
-cur_path = os.path.abspath(os.path.dirname(__file__))
+#you may not use this file except in compliance with the License.
-sys.path.append(cur_path)
+#You may obtain a copy of the License at
+#
-import paddle
+#    http://www.apache.org/licenses/LICENSE-2.0
-import paddle.nn as nn
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import os
 import cv2
-from PIL import Image
+import subprocess
 import numpy as np
 from tqdm import tqdm
-import argparse
+from PIL import Image
-import subprocess
+from skimage import color
-import utils
+import paddle
 from ppgan.models.generators.remaster import NetworkR, NetworkC
 from paddle.utils.download import get_path_from_url
+from .base_predictor import BasePredictor
 DEEPREMASTER_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/deep_remaster.pdparams'
-parser = argparse.ArgumentParser(description='Remastering')
-parser.add_argument('--input', type=str, default=None, help='Input video')
+def convertLAB2RGB(lab):
-parser.add_argument('--output', type=str, default='output', help='output dir')
+    lab[:, :, 0:1] = lab[:, :, 0:1] * 100  # [0, 1] -> [0, 100]
-parser.add_argument('--reference_dir',
+    lab[:, :, 1:3] = np.clip(lab[:, :, 1:3] * 255 - 128, -100,
-                    type=str,
+                             100)  # [0, 1] -> [-128, 128]
-                    default=None,
+    rgb = color.lab2rgb(lab.astype(np.float64))
-                    help='Path to the reference image directory')
+    return rgb
-parser.add_argument('--colorization',
-                    action='store_true',
-                    default=False,
+def convertRGB2LABTensor(rgb):
-                    help='Remaster without colorization')
+    lab = color.rgb2lab(
-parser.add_argument('--mindim',
+        np.asarray(rgb))  # RGB -> LAB L[0, 100] a[-127, 128] b[-128, 127]
-                    type=int,
+    ab = np.clip(lab[:, :, 1:3] + 128, 0, 255)  # AB --> [0, 255]
-                    default='360',
+    ab = paddle.to_tensor(ab.astype('float32')) / 255.
-                    help='Length of minimum image edges')
+    L = lab[:, :, 0] * 2.55  # L --> [0, 255]
+    L = Image.fromarray(np.uint8(L))
-class DeepReasterPredictor:
+    L = paddle.to_tensor(np.array(L).astype('float32')[..., np.newaxis] / 255.0)
+    return L, ab
+def addMergin(img, target_w, target_h, background_color=(0, 0, 0)):
+    width, height = img.size
+    if width == target_w and height == target_h:
+        return img
+    scale = max(target_w, target_h) / max(width, height)
+    width = int(width * scale / 16.) * 16
+    height = int(height * scale / 16.) * 16
+    img = img.resize((width, height), Image.BICUBIC)
+    xp = (target_w - width) // 2
+    yp = (target_h - height) // 2
+    result = Image.new(img.mode, (target_w, target_h), background_color)
+    result.paste(img, (xp, yp))
+    return result
+class DeepRemasterPredictor(BasePredictor):
    def __init__(self,
-                 input,
+                 output='output',
-                 output,
                 weight_path=None,
                 colorization=False,
                 reference_dir=None,
                 mindim=360):
-        self.input = input
        self.output = os.path.join(output, 'DeepRemaster')
        self.colorization = colorization
        self.reference_dir = reference_dir
        self.mindim = mindim
        if weight_path is None:
+            cur_path = os.path.abspath(os.path.dirname(__file__))
            weight_path = get_path_from_url(DEEPREMASTER_WEIGHT_URL, cur_path)
-        state_dict, _ = paddle.load(weight_path)
+        self.weight_path = weight_path
+        state_dict = paddle.load(weight_path)
        self.modelR = NetworkR()
        self.modelR.load_dict(state_dict['modelR'])
@@ -63,7 +92,7 @@ class DeepReasterPredictor:
            self.modelC.load_dict(state_dict['modelC'])
            self.modelC.eval()
-    def run(self):
+    def run(self, video_path):
        outputdir = self.output
        outputdir_in = os.path.join(outputdir, 'input/')
        os.makedirs(outputdir_in, exist_ok=True)
@@ -94,9 +123,7 @@ class DeepReasterPredictor:
                refimgs = []
                for i, v in enumerate(refs):
-                    refimg = utils.addMergin(v,
+                    refimg = addMergin(v, target_w=target_w, target_h=target_h)
-                                             target_w=target_w,
-                                             target_h=target_h)
                    refimg = np.array(refimg).astype('float32').transpose(
                        2, 0, 1) / 255.0
                    refimgs.append(refimg)
@@ -105,7 +132,7 @@ class DeepReasterPredictor:
                refimgs = paddle.unsqueeze(refimgs, 0)
        # Load video
-        cap = cv2.VideoCapture(self.input)
+        cap = cv2.VideoCapture(video_path)
        nframes = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        v_w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        v_h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
@@ -156,7 +183,7 @@ class DeepReasterPredictor:
                    elif nchannels == 3:
                        cv2.imwrite(outputdir_in + '%07d.png' % index, frame)
                        frame = frame[:, :, ::-1]  ## BGR -> RGB
-                        frame_l, frame_ab = utils.convertRGB2LABTensor(frame)
+                        frame_l, frame_ab = convertRGB2LABTensor(frame)
                        frame_l = frame_l.transpose([2, 0, 1])
                        frame_ab = frame_ab.transpose([2, 0, 1])
                        frame_l = frame_l.reshape([
@@ -193,7 +220,7 @@ class DeepReasterPredictor:
                                (out_l, out_ab),
                                axis=0).detach().numpy().transpose((1, 2, 0))
                            out = Image.fromarray(
-                                np.uint8(utils.convertLAB2RGB(out) * 255))
+                                np.uint8(convertLAB2RGB(out) * 255))
                            out.save(outputdir_out + '%07d.png' % (index))
                        else:
                            raise ValueError('channels of imag3 must be 3!')
@@ -214,7 +241,7 @@ class DeepReasterPredictor:
                        output = paddle.concat(
                            (out_l, out_c), axis=0).numpy().transpose((1, 2, 0))
                        output = Image.fromarray(
-                            np.uint8(utils.convertLAB2RGB(output) * 255))
+                            np.uint8(convertLAB2RGB(output) * 255))
                        output.save(outputdir_out + '%07d.png' % index)
                it = it + 1
@@ -222,7 +249,7 @@ class DeepReasterPredictor:
            # Save result videos
            outfile = os.path.join(outputdir,
-                                   self.input.split('/')[-1].split('.')[0])
+                                   video_path.split('/')[-1].split('.')[0])
            cmd = 'ffmpeg -y -r %d -i %s%%07d.png -vcodec libx264 -pix_fmt yuv420p -r %d %s_in.mp4' % (
                fps, outputdir_in, fps, outfile)
            subprocess.call(cmd, shell=True)
@@ -236,14 +263,3 @@ class DeepReasterPredictor:
        cap.release()
        pbar.close()
        return outputdir_out, '%s_out.mp4' % outfile
-if __name__ == "__main__":
-    args = parser.parse_args()
-    paddle.disable_static()
-    predictor = DeepReasterPredictor(args.input,
-                                     args.output,
-                                     colorization=args.colorization,
-                                     reference_dir=args.reference_dir,
-                                     mindim=args.mindim)
-    predictor.run()
--- a/applications/DeOldify/predict.py
+++ b/applications/DeOldify/predict.py
-import os
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-import sys
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
-cur_path = os.path.abspath(os.path.dirname(__file__))
+#you may not use this file except in compliance with the License.
-sys.path.append(cur_path)
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import os
 import cv2
 import glob
-import argparse
 import numpy as np
-import paddle
-import pickle
 from PIL import Image
 from tqdm import tqdm
-from paddle import fluid
+import paddle
 from paddle.utils.download import get_path_from_url
 from ppgan.utils.video import frames2video, video2frames
 from ppgan.models.generators.deoldify import build_model
-parser = argparse.ArgumentParser(description='DeOldify')
+from .base_predictor import BasePredictor
-parser.add_argument('--input', type=str, default='none', help='Input video')
-parser.add_argument('--output', type=str, default='output', help='output dir')
-parser.add_argument('--render_factor',
-                    type=int,
-                    default=32,
-                    help='model inputsize=render_factor*16')
-parser.add_argument('--weight_path',
-                    type=str,
-                    default=None,
-                    help='Path to the reference image directory')
 DEOLDIFY_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DeOldify_stable.pdparams'
-class DeOldifyPredictor():
+class DeOldifyPredictor(BasePredictor):
-    def __init__(self,
+    def __init__(self, output='output', weight_path=None, render_factor=32):
-                 input,
+        # self.input = input
-                 output,
-                 batch_size=1,
-                 weight_path=None,
-                 render_factor=32):
-        self.input = input
        self.output = os.path.join(output, 'DeOldify')
        self.render_factor = render_factor
        self.model = build_model()
        if weight_path is None:
+            cur_path = os.path.abspath(os.path.dirname(__file__))
            weight_path = get_path_from_url(DEOLDIFY_WEIGHT_URL, cur_path)
-        state_dict, _ = paddle.load(weight_path)
+        state_dict = paddle.load(weight_path)
        self.model.load_dict(state_dict)
        self.model.eval()
@@ -85,8 +77,14 @@ class DeOldifyPredictor():
        final = Image.fromarray(final)
        return final
-    def run_single(self, img_path):
+    def run_image(self, img):
-        ori_img = Image.open(img_path).convert('LA').convert('RGB')
+        if isinstance(img, str):
+            ori_img = Image.open(img).convert('LA').convert('RGB')
+        elif isinstance(img, np.ndarray):
+            ori_img = Image.fromarray(img).convert('LA').convert('RGB')
+        elif isinstance(img, Image.Image):
+            ori_img = img
        img = self.norm(ori_img, self.render_factor)
        x = paddle.to_tensor(img[np.newaxis, ...])
        out = self.model(x)
@@ -97,9 +95,8 @@ class DeOldifyPredictor():
        pred_img = self.post_process(pred_img, ori_img)
        return pred_img
-    def run(self):
+    def run_video(self, video):
-        vid = self.input
+        base_name = os.path.basename(video).split('.')[0]
-        base_name = os.path.basename(vid).split('.')[0]
        output_path = os.path.join(self.output, base_name)
        pred_frame_path = os.path.join(output_path, 'frames_pred')
@@ -109,15 +106,15 @@ class DeOldifyPredictor():
        if not os.path.exists(pred_frame_path):
            os.makedirs(pred_frame_path)
-        cap = cv2.VideoCapture(vid)
+        cap = cv2.VideoCapture(video)
        fps = cap.get(cv2.CAP_PROP_FPS)
-        out_path = video2frames(vid, output_path)
+        out_path = video2frames(video, output_path)
        frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
        for frame in tqdm(frames):
-            pred_img = self.run_single(frame)
+            pred_img = self.run_image(frame)
            frame_name = os.path.basename(frame)
            pred_img.save(os.path.join(pred_frame_path, frame_name))
@@ -130,15 +127,14 @@ class DeOldifyPredictor():
        return frame_pattern_combined, vid_out_path
+    def run(self, input):
+        if self.is_video(input):
+            return self.run_video(input)
+        else:
+            pred_img = self.run_image(input)
-if __name__ == '__main__':
+            if self.output:
-    paddle.disable_static()
+                base_name = os.path.basename(input)
-    args = parser.parse_args()
+                pred_img.save(os.path.join(self.output, base_name + '.png'))
-    predictor = DeOldifyPredictor(args.input,
-                                  args.output,
-                                  weight_path=args.weight_path,
-                                  render_factor=args.render_factor)
-    frames_path, temp_video_path = predictor.run()
-    print('output video path:', temp_video_path)
+            return pred_img
--- a/applications/EDVR/predict.py
+++ b/applications/EDVR/predict.py
@@ -13,44 +13,18 @@
 #limitations under the License.
 import os
-import sys
+import cv2
-cur_path = os.path.abspath(os.path.dirname(__file__))
-sys.path.append(cur_path)
 import time
-import argparse
-import ast
 import glob
 import numpy as np
-import paddle.fluid as fluid
-import cv2
 from tqdm import tqdm
-from data import EDVRDataset
 from paddle.utils.download import get_path_from_url
 from ppgan.utils.video import frames2video, video2frames
-EDVR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar'
+from .base_predictor import BasePredictor
-def parse_args():
+EDVR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/edvr_infer_model.tar'
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--input',
-                        type=str,
-                        default=None,
-                        help='input video path')
-    parser.add_argument('--output',
-                        type=str,
-                        default='output',
-                        help='output path')
-    parser.add_argument('--weight_path',
-                        type=str,
-                        default=None,
-                        help='weight path')
-    args = parser.parse_args()
-    return args
 def get_img(pred):
@@ -72,29 +46,107 @@ def save_img(img, framename):
    cv2.imwrite(framename, img)
-class EDVRPredictor:
+def read_img(path, size=None, is_gt=False):
-    def __init__(self, input, output, weight_path=None):
+    """read image by cv2
+    return: Numpy float32, HWC, BGR, [0,1]"""
+    img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
+    img = img.astype(np.float32) / 255.
+    if img.ndim == 2:
+        img = np.expand_dims(img, axis=2)
+    if img.shape[2] > 3:
+        img = img[:, :, :3]
+    return img
+def get_test_neighbor_frames(crt_i, N, max_n, padding='new_info'):
+    """Generate an index list for reading N frames from a sequence of images
+    Args:
+        crt_i (int): current center index
+        max_n (int): max number of the sequence of images (calculated from 1)
+        N (int): reading N frames
+        padding (str): padding mode, one of replicate | reflection | new_info | circle
+            Example: crt_i = 0, N = 5
+            replicate: [0, 0, 0, 1, 2]
+            reflection: [2, 1, 0, 1, 2]
+            new_info: [4, 3, 0, 1, 2]
+            circle: [3, 4, 0, 1, 2]
+    Returns:
+        return_l (list [int]): a list of indexes
+    """
+    max_n = max_n - 1
+    n_pad = N // 2
+    return_l = []
+    for i in range(crt_i - n_pad, crt_i + n_pad + 1):
+        if i < 0:
+            if padding == 'replicate':
+                add_idx = 0
+            elif padding == 'reflection':
+                add_idx = -i
+            elif padding == 'new_info':
+                add_idx = (crt_i + n_pad) + (-i)
+            elif padding == 'circle':
+                add_idx = N + i
+            else:
+                raise ValueError('Wrong padding mode')
+        elif i > max_n:
+            if padding == 'replicate':
+                add_idx = max_n
+            elif padding == 'reflection':
+                add_idx = max_n * 2 - i
+            elif padding == 'new_info':
+                add_idx = (crt_i - n_pad) - (i - max_n)
+            elif padding == 'circle':
+                add_idx = i - N
+            else:
+                raise ValueError('Wrong padding mode')
+        else:
+            add_idx = i
+        return_l.append(add_idx)
+    return return_l
+class EDVRDataset:
+    def __init__(self, frame_paths):
+        self.frames = frame_paths
+    def __getitem__(self, index):
+        indexs = get_test_neighbor_frames(index, 5, len(self.frames))
+        frame_list = []
+        for i in indexs:
+            img = read_img(self.frames[i])
+            frame_list.append(img)
+        img_LQs = np.stack(frame_list, axis=0)
+        # BGR to RGB, HWC to CHW, numpy to tensor
+        img_LQs = img_LQs[:, :, :, [2, 1, 0]]
+        img_LQs = np.transpose(img_LQs, (0, 3, 1, 2)).astype('float32')
+        return img_LQs, self.frames[index]
+    def __len__(self):
+        return len(self.frames)
+class EDVRPredictor(BasePredictor):
+    def __init__(self, output='output', weight_path=None):
        self.input = input
        self.output = os.path.join(output, 'EDVR')
-        place = fluid.CUDAPlace(
-            0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
-        self.exe = fluid.Executor(place)
        if weight_path is None:
+            cur_path = os.path.abspath(os.path.dirname(__file__))
            weight_path = get_path_from_url(EDVR_WEIGHT_URL, cur_path)
-        model_filename = 'EDVR_model.pdmodel'
+        self.weight_path = weight_path
-        params_filename = 'EDVR_params.pdparams'
-        out = fluid.io.load_inference_model(dirname=weight_path,
+        self.build_inference_model()
-                                            model_filename=model_filename,
-                                            params_filename=params_filename,
-                                            executor=self.exe)
-        self.infer_prog, self.feed_list, self.fetch_list = out
-    def run(self):
+    def run(self, video_path):
-        vid = self.input
+        vid = video_path
        base_name = os.path.basename(vid).split('.')[0]
        output_path = os.path.join(self.output, base_name)
        pred_frame_path = os.path.join(output_path, 'frames_pred')
@@ -119,11 +171,9 @@ class EDVRPredictor:
        for infer_iter, data in enumerate(tqdm(dataset)):
            data_feed_in = [data[0]]
-            infer_outs = self.exe.run(
+            outs = self.base_forward(np.array(data_feed_in))
-                self.infer_prog,
-                fetch_list=self.fetch_list,
+            infer_result_list = [item for item in outs]
-                feed={self.feed_list[0]: np.array(data_feed_in)})
-            infer_result_list = [item for item in infer_outs]
            frame_path = data[1]
@@ -144,9 +194,3 @@ class EDVRPredictor:
        frames2video(frame_pattern_combined, vid_out_path, str(int(fps)))
        return frame_pattern_combined, vid_out_path
-if __name__ == "__main__":
-    args = parse_args()
-    predictor = EDVRPredictor(args.input, args.output, args.weight_path)
-    predictor.run()
--- a/ppgan/apps/first_order_predictor.py
+++ b/ppgan/apps/first_order_predictor.py
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import os
+import sys
+import yaml
+import pickle
+import imageio
+import numpy as np
+from tqdm import tqdm
+from skimage import img_as_ubyte
+from skimage.transform import resize
+from scipy.spatial import ConvexHull
+import paddle
+from paddle.utils.download import get_path_from_url
+from ppgan.utils.animate import normalize_kp
+from ppgan.modules.keypoint_detector import KPDetector
+from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator
+from .base_predictor import BasePredictor
+class FirstOrderPredictor(BasePredictor):
+    def __init__(self,
+                 output='output',
+                 weight_path=None,
+                 config=None,
+                 relative=False,
+                 adapt_scale=False,
+                 find_best_frame=False,
+                 best_frame=None):
+        if config is not None and isinstance(config, str):
+            self.cfg = yaml.load(config)
+        elif isinstance(config, dict):
+            self.cfg = config
+        elif config is None:
+            self.cfg = {
+                'model_params': {
+                    'common_params': {
+                        'num_kp': 10,
+                        'num_channels': 3,
+                        'estimate_jacobian': True
+                    },
+                    'kp_detector_params': {
+                        'temperature': 0.1,
+                        'block_expansion': 32,
+                        'max_features': 1024,
+                        'scale_factor': 0.25,
+                        'num_blocks': 5
+                    },
+                    'generator_params': {
+                        'block_expansion': 64,
+                        'max_features': 512,
+                        'num_down_blocks': 2,
+                        'num_bottleneck_blocks': 6,
+                        'estimate_occlusion_map': True,
+                        'dense_motion_params': {
+                            'block_expansion': 64,
+                            'max_features': 1024,
+                            'num_blocks': 5,
+                            'scale_factor': 0.25
+                        }
+                    }
+                }
+            }
+            if weight_path is None:
+                vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-cpk.pdparams'
+                cur_path = os.path.abspath(os.path.dirname(__file__))
+                weight_path = get_path_from_url(vox_cpk_weight_url, cur_path)
+        self.weight_path = weight_path
+        self.output = output
+        self.relative = relative
+        self.adapt_scale = adapt_scale
+        self.find_best_frame = find_best_frame
+        self.best_frame = best_frame
+        self.generator, self.kp_detector = self.load_checkpoints(
+            self.cfg, self.weight_path)
+    def run(self, source_image, driving_video):
+        source_image = imageio.imread(source_image)
+        reader = imageio.get_reader(driving_video)
+        fps = reader.get_meta_data()['fps']
+        driving_video = []
+        try:
+            for im in reader:
+                driving_video.append(im)
+        except RuntimeError:
+            pass
+        reader.close()
+        source_image = resize(source_image, (256, 256))[..., :3]
+        driving_video = [
+            resize(frame, (256, 256))[..., :3] for frame in driving_video
+        ]
+        if self.find_best_frame or self.best_frame is not None:
+            i = self.best_frame if self.best_frame is not None else self.find_best_frame_func(
+                source_image, driving_video)
+            print("Best frame: " + str(i))
+            driving_forward = driving_video[i:]
+            driving_backward = driving_video[:(i + 1)][::-1]
+            predictions_forward = self.make_animation(
+                source_image,
+                driving_forward,
+                self.generator,
+                self.kp_detector,
+                relative=self.relative,
+                adapt_movement_scale=self.adapt_scale)
+            predictions_backward = self.make_animation(
+                source_image,
+                driving_backward,
+                self.generator,
+                self.kp_detector,
+                relative=self.relative,
+                adapt_movement_scale=self.adapt_scale)
+            predictions = predictions_backward[::-1] + predictions_forward[1:]
+        else:
+            predictions = self.make_animation(
+                source_image,
+                driving_video,
+                self.generator,
+                self.kp_detector,
+                relative=self.relative,
+                adapt_movement_scale=self.adapt_scale)
+            imageio.mimsave(os.path.join(self.output, 'result.mp4'),
+                            [img_as_ubyte(frame) for frame in predictions],
+                            fps=fps)
+    def load_checkpoints(self, config, checkpoint_path):
+        generator = OcclusionAwareGenerator(
+            **config['model_params']['generator_params'],
+            **config['model_params']['common_params'])
+        kp_detector = KPDetector(**config['model_params']['kp_detector_params'],
+                                 **config['model_params']['common_params'])
+        checkpoint = paddle.load(self.weight_path)
+        generator.set_state_dict(checkpoint['generator'])
+        kp_detector.set_state_dict(checkpoint['kp_detector'])
+        generator.eval()
+        kp_detector.eval()
+        return generator, kp_detector
+    def make_animation(self,
+                       source_image,
+                       driving_video,
+                       generator,
+                       kp_detector,
+                       relative=True,
+                       adapt_movement_scale=True):
+        with paddle.no_grad():
+            predictions = []
+            source = paddle.to_tensor(source_image[np.newaxis].astype(
+                np.float32)).transpose([0, 3, 1, 2])
+            driving = paddle.to_tensor(
+                np.array(driving_video)[np.newaxis].astype(
+                    np.float32)).transpose([0, 4, 1, 2, 3])
+            kp_source = kp_detector(source)
+            kp_driving_initial = kp_detector(driving[:, :, 0])
+            for frame_idx in tqdm(range(driving.shape[2])):
+                driving_frame = driving[:, :, frame_idx]
+                kp_driving = kp_detector(driving_frame)
+                kp_norm = normalize_kp(
+                    kp_source=kp_source,
+                    kp_driving=kp_driving,
+                    kp_driving_initial=kp_driving_initial,
+                    use_relative_movement=relative,
+                    use_relative_jacobian=relative,
+                    adapt_movement_scale=adapt_movement_scale)
+                out = generator(source, kp_source=kp_source, kp_driving=kp_norm)
+                predictions.append(
+                    np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0])
+        return predictions
+    def find_best_frame_func(self, source, driving):
+        import face_alignment
+        def normalize_kp(kp):
+            kp = kp - kp.mean(axis=0, keepdims=True)
+            area = ConvexHull(kp[:, :2]).volume
+            area = np.sqrt(area)
+            kp[:, :2] = kp[:, :2] / area
+            return kp
+        fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
+                                          flip_input=True)
+        kp_source = fa.get_landmarks(255 * source)[0]
+        kp_source = normalize_kp(kp_source)
+        norm = float('inf')
+        frame_num = 0
+        for i, image in tqdm(enumerate(driving)):
+            kp_driving = fa.get_landmarks(255 * image)[0]
+            kp_driving = normalize_kp(kp_driving)
+            new_norm = (np.abs(kp_source - kp_driving)**2).sum()
+            if new_norm < norm:
+                norm = new_norm
+                frame_num = i
+        return frame_num
--- a/applications/RealSR/predict.py
+++ b/applications/RealSR/predict.py
-import os
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-import sys
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
-cur_path = os.path.abspath(os.path.dirname(__file__))
+#you may not use this file except in compliance with the License.
-sys.path.append(cur_path)
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import os
 import cv2
 import glob
-import argparse
 import numpy as np
-import paddle
-import pickle
 from PIL import Image
 from tqdm import tqdm
+import paddle
 from ppgan.models.generators import RRDBNet
 from ppgan.utils.video import frames2video, video2frames
 from paddle.utils.download import get_path_from_url
+from .base_predictor import BasePredictor
-parser = argparse.ArgumentParser(description='RealSR')
-parser.add_argument('--input', type=str, default='none', help='Input video')
-parser.add_argument('--output', type=str, default='output', help='output dir')
-parser.add_argument('--weight_path',
-                    type=str,
-                    default=None,
-                    help='Path to the reference image directory')
 REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DF2K_JPEG.pdparams'
-class RealSRPredictor():
+class RealSRPredictor(BasePredictor):
-    def __init__(self, input, output, batch_size=1, weight_path=None):
+    def __init__(self, output='output', weight_path=None):
        self.input = input
        self.output = os.path.join(output, 'RealSR')
        self.model = RRDBNet(3, 3, 64, 23)
        if weight_path is None:
+            cur_path = os.path.abspath(os.path.dirname(__file__))
            weight_path = get_path_from_url(REALSR_WEIGHT_URL, cur_path)
-        state_dict, _ = paddle.load(weight_path)
+        state_dict = paddle.load(weight_path)
        self.model.load_dict(state_dict)
        self.model.eval()
@@ -49,8 +49,14 @@ class RealSRPredictor():
        img = img.transpose((1, 2, 0))
        return (img * 255).clip(0, 255).astype('uint8')
-    def run_single(self, img_path):
+    def run_image(self, img):
-        ori_img = Image.open(img_path).convert('RGB')
+        if isinstance(img, str):
+            ori_img = Image.open(img).convert('RGB')
+        elif isinstance(img, np.ndarray):
+            ori_img = Image.fromarray(img).convert('RGB')
+        elif isinstance(img, Image.Image):
+            ori_img = img
        img = self.norm(ori_img)
        x = paddle.to_tensor(img[np.newaxis, ...])
        out = self.model(x)
@@ -59,9 +65,8 @@ class RealSRPredictor():
        pred_img = Image.fromarray(pred_img)
        return pred_img
-    def run(self):
+    def run_video(self, video):
-        vid = self.input
+        base_name = os.path.basename(video).split('.')[0]
-        base_name = os.path.basename(vid).split('.')[0]
        output_path = os.path.join(self.output, base_name)
        pred_frame_path = os.path.join(output_path, 'frames_pred')
@@ -71,15 +76,15 @@ class RealSRPredictor():
        if not os.path.exists(pred_frame_path):
            os.makedirs(pred_frame_path)
-        cap = cv2.VideoCapture(vid)
+        cap = cv2.VideoCapture(video)
        fps = cap.get(cv2.CAP_PROP_FPS)
-        out_path = video2frames(vid, output_path)
+        out_path = video2frames(video, output_path)
        frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
        for frame in tqdm(frames):
-            pred_img = self.run_single(frame)
+            pred_img = self.run_image(frame)
            frame_name = os.path.basename(frame)
            pred_img.save(os.path.join(pred_frame_path, frame_name))
@@ -92,14 +97,14 @@ class RealSRPredictor():
        return frame_pattern_combined, vid_out_path
+    def run(self, input):
+        if self.is_video(input):
+            return self.run_video(input)
+        else:
+            pred_img = self.run_image(input)
-if __name__ == '__main__':
+            if self.output:
-    paddle.disable_static()
+                base_name = os.path.basename(input)
-    args = parser.parse_args()
+                pred_img.save(os.path.join(self.output, base_name + '.png'))
-    predictor = RealSRPredictor(args.input,
-                                args.output,
-                                weight_path=args.weight_path)
-    frames_path, temp_video_path = predictor.run()
-    print('output video path:', temp_video_path)
+            return pred_img
--- a/ppgan/datasets/__init__.py
+++ b/ppgan/datasets/__init__.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from .unpaired_dataset import UnpairedDataset
 from .single_dataset import SingleDataset
 from .paired_dataset import PairedDataset
 from .sr_image_dataset import SRImageDataset
\ No newline at end of file
+from .makeup_dataset import MakeupDataset
--- a/ppgan/datasets/makeup_dataset.py
+++ b/ppgan/datasets/makeup_dataset.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cv2
+import os.path
+from .base_dataset import BaseDataset, get_transform
+from .transforms.makeup_transforms import get_makeup_transform
+import paddle.vision.transforms as T
+from PIL import Image
+import random
+import numpy as np
+from ..utils.preprocess import *
+from .builder import DATASETS
+@DATASETS.register()
+class MakeupDataset(BaseDataset):
+    def __init__(self, cfg):
+        """Initialize this dataset class.
+        Parameters:
+            opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
+        """
+        BaseDataset.__init__(self, cfg)
+        self.image_path = cfg.dataroot
+        self.mode = cfg.phase
+        self.transform = get_makeup_transform(cfg)
+        self.norm = T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])
+        self.transform_mask = get_makeup_transform(cfg, pic="mask")
+        self.trans_size = cfg.trans_size
+        self.cls_list = cfg.cls_list
+        self.cls_A = self.cls_list[0]
+        self.cls_B = self.cls_list[1]
+        for cls in self.cls_list:
+            setattr(
+                self, cls + "_list_path",
+                os.path.join(self.image_path, self.mode + '_' + cls + ".txt"))
+            setattr(self, cls + "_lines",
+                    open(getattr(self, cls + "_list_path"), 'r').readlines())
+            setattr(self, "num_of_" + cls + "_data",
+                    len(getattr(self, cls + "_lines")))
+        print('Start preprocessing dataset..!')
+        self.preprocess()
+        print('Finished preprocessing dataset..!')
+    def preprocess(self):
+        """preprocess image"""
+        for cls in self.cls_list:
+            setattr(self, cls + "_filenames", [])
+            setattr(self, cls + "_mask_filenames", [])
+            setattr(self, cls + "_lmks_filenames", [])
+            lines = getattr(self, cls + "_lines")
+            random.shuffle(lines)
+            for i, line in enumerate(lines):
+                splits = line.split()
+                getattr(self, cls + "_filenames").append(splits[0])
+                getattr(self, cls + "_mask_filenames").append(splits[1])
+                getattr(self, cls + "_lmks_filenames").append(splits[2])
+    def __getitem__(self, index):
+        """Return MANet and MDNet needed params.
+        Parameters:
+            index (int)      -- a random integer for data indexing
+        Returns a dictionary that contains needed params.
+        """
+        try:
+            index_A = random.randint(
+                0, getattr(self, "num_of_" + self.cls_A + "_data"))
+            index_B = random.randint(
+                0, getattr(self, "num_of_" + self.cls_B + "_data"))
+            if self.mode == 'test':
+                num_b = getattr(self, 'num_of_' + self.cls_list[1] + '_data')
+                index_A = int(index / num_b)
+                index_B = int(index % num_b)
+            image_A = Image.open(
+                os.path.join(self.image_path,
+                             getattr(self, self.cls_A +
+                                     "_filenames")[index_A])).convert("RGB")
+            image_B = Image.open(
+                os.path.join(self.image_path,
+                             getattr(self, self.cls_B +
+                                     "_filenames")[index_B])).convert("RGB")
+            mask_A = np.array(
+                Image.open(
+                    os.path.join(
+                        self.image_path,
+                        getattr(self,
+                                self.cls_A + "_mask_filenames")[index_A])))
+            mask_B = np.array(
+                Image.open(
+                    os.path.join(
+                        self.image_path,
+                        getattr(self, self.cls_B +
+                                "_mask_filenames")[index_B])).convert('L'))
+            image_A = np.array(image_A)
+            image_B = np.array(image_B)
+            image_A = self.transform(image_A)
+            image_B = self.transform(image_B)
+            mask_A = cv2.resize(mask_A, (256, 256),
+                                interpolation=cv2.INTER_NEAREST)
+            mask_B = cv2.resize(mask_B, (256, 256),
+                                interpolation=cv2.INTER_NEAREST)
+            lmks_A = np.loadtxt(
+                os.path.join(
+                    self.image_path,
+                    getattr(self, self.cls_A + "_lmks_filenames")[index_A]))
+            lmks_B = np.loadtxt(
+                os.path.join(
+                    self.image_path,
+                    getattr(self, self.cls_B + "_lmks_filenames")[index_B]))
+            lmks_A = lmks_A / image_A.shape[:2] * self.trans_size
+            lmks_B = lmks_B / image_B.shape[:2] * self.trans_size
+            P_A = generate_P_from_lmks(lmks_A, self.trans_size,
+                                       image_A.shape[0], image_A.shape[1])
+            P_B = generate_P_from_lmks(lmks_B, self.trans_size,
+                                       image_B.shape[0], image_B.shape[1])
+            mask_A_aug = generate_mask_aug(mask_A, lmks_A)
+            mask_B_aug = generate_mask_aug(mask_B, lmks_B)
+            consis_mask = calculate_consis_mask(mask_A_aug, mask_B_aug)
+            consis_mask_idt_A = calculate_consis_mask(mask_A_aug, mask_A_aug)
+            consis_mask_idt_B = calculate_consis_mask(mask_A_aug, mask_B_aug)
+        except Exception as e:
+            print(e)
+            return self.__getitem__(index + 1)
+        return {
+            'image_A': self.norm(image_A),
+            'image_B': self.norm(image_B),
+            'mask_A': np.float32(mask_A),
+            'mask_B': np.float32(mask_B),
+            'consis_mask': np.float32(consis_mask),
+            'P_A': np.float32(P_A),
+            'P_B': np.float32(P_B),
+            'consis_mask_idt_A': np.float32(consis_mask_idt_A),
+            'consis_mask_idt_B': np.float32(consis_mask_idt_B),
+            'mask_A_aug': np.float32(mask_A_aug),
+            'mask_B_aug': np.float32(mask_B_aug)
+        }
+    def __len__(self):
+        """Return the total number of images in the dataset.
+        As we have two datasets with potentially different number of images,
+        we take a maximum of
+        """
+        if self.mode == 'train':
+            num_A = getattr(self, 'num_of_' + self.cls_list[0] + '_data')
+            num_B = getattr(self, 'num_of_' + self.cls_list[1] + '_data')
+            return max(num_A, num_B)
+        elif self.mode == "test":
+            num_A = getattr(self, 'num_of_' + self.cls_list[0] + '_data')
+            num_B = getattr(self, 'num_of_' + self.cls_list[1] + '_data')
+            return num_A * num_B
+        return max(self.A_size, self.B_size)
--- a/ppgan/datasets/paired_dataset.py
+++ b/ppgan/datasets/paired_dataset.py
@@ -5,13 +5,13 @@ from .base_dataset import BaseDataset, get_params, get_transform
 from .image_folder import make_dataset
 from .builder import DATASETS
+from .transforms.builder import build_transforms
 @DATASETS.register()
 class PairedDataset(BaseDataset):
    """A dataset class for paired image dataset.
    """
    def __init__(self, cfg):
        """Initialize this dataset class.
@@ -19,11 +19,14 @@ class PairedDataset(BaseDataset):
            cfg (dict) -- stores all the experiment flags
        """
        BaseDataset.__init__(self, cfg)
-        self.dir_AB = os.path.join(cfg.dataroot, cfg.phase)  # get the image directory
+        self.dir_AB = os.path.join(cfg.dataroot,
-        self.AB_paths = sorted(make_dataset(self.dir_AB, cfg.max_dataset_size))  # get image paths
+                                   cfg.phase)  # get the image directory
-        assert(self.cfg.transform.load_size >= self.cfg.transform.crop_size)   # crop_size should be smaller than the size of loaded image
+        self.AB_paths = sorted(make_dataset(
+            self.dir_AB, cfg.max_dataset_size))  # get image paths
        self.input_nc = self.cfg.output_nc if self.cfg.direction == 'BtoA' else self.cfg.input_nc
        self.output_nc = self.cfg.input_nc if self.cfg.direction == 'BtoA' else self.cfg.output_nc
+        self.transforms = build_transforms(cfg.transforms)
    def __getitem__(self, index):
        """Return a data point and its metadata information.
@@ -49,27 +52,11 @@ class PairedDataset(BaseDataset):
        A = AB[:h, :w2, :]
        B = AB[:h, w2:, :]
        # apply the same transform to both A and B
-        # transform_params = get_params(self.opt, A.size)
+        A, B = self.transforms((A, B))
-        transform_params = get_params(self.cfg.transform, (w2, h))
-        A_transform = get_transform(self.cfg.transform, transform_params, grayscale=(self.input_nc == 1))
-        B_transform = get_transform(self.cfg.transform, transform_params, grayscale=(self.output_nc == 1))
-        A = A_transform(A)
-        B = B_transform(B)
        return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path}
    def __len__(self):
        """Return the total number of images in the dataset."""
        return len(self.AB_paths)
-    def get_path_by_indexs(self, indexs):
-        if isinstance(indexs, paddle.Variable):
-            indexs = indexs.numpy()
-        current_paths = []
-        for index in indexs:
-            current_paths.append(self.AB_paths[index])
-        return current_paths
--- a/ppgan/datasets/transforms/__init__.py
+++ b/ppgan/datasets/transforms/__init__.py
+from .transforms import RandomCrop, Resize, RandomHorizontalFlip, PairedRandomCrop, PairedRandomHorizontalFlip, Normalize, Permute
--- a/ppgan/datasets/transforms/builder.py
+++ b/ppgan/datasets/transforms/builder.py
+import copy
+import traceback
+import paddle
+from ...utils.registry import Registry
+TRANSFORMS = Registry("TRANSFORMS")
+class Compose(object):
+    """
+    Composes several transforms together use for composing list of transforms
+    together for a dataset transform.
+    Args:
+        transforms (list): List of transforms to compose.
+    Returns:
+        A compose object which is callable, __call__ for this Compose
+        object will call each given :attr:`transforms` sequencely.
+    """
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, data):
+        for f in self.transforms:
+            try:
+                data = f(data)
+            except Exception as e:
+                stack_info = traceback.format_exc()
+                print("fail to perform transform [{}] with error: "
+                      "{} and stack:\n{}".format(f, e, str(stack_info)))
+                raise e
+        return data
+def build_transforms(cfg):
+    transforms = []
+    for trans_cfg in cfg:
+        temp_trans_cfg = copy.deepcopy(trans_cfg)
+        name = temp_trans_cfg.pop('name')
+        transforms.append(TRANSFORMS.get(name)(**temp_trans_cfg))
+    transforms = Compose(transforms)
+    return transforms
--- a/applications/DAIN/pwcnet/correlation_op/correlation.py
+++ b/applications/DAIN/pwcnet/correlation_op/correlation.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,36 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import paddle.fluid as fluid
-import os
-file_dir = os.path.dirname(os.path.abspath(__file__))
-fluid.load_op_library(os.path.join(file_dir, 'correlation_lib.so'))
-from paddle.fluid.layer_helper import LayerHelper
+import paddle.vision.transforms as T
+import cv2
-def correlation(input1,
+def get_makeup_transform(cfg, pic="image"):
-                input2,
+    if pic == "image":
-                pad_size,
+        transform = T.Compose([
-                kernel_size,
+            T.Resize(size=cfg.trans_size),
-                max_displacement,
+            T.Permute(to_rgb=False),
-                stride1,
+        ])
-                stride2,
+    else:
-                corr_type_multiply=1):
+        transform = T.Resize(size=cfg.trans_size,
-    helper = LayerHelper("correlation", **locals())
+                             interpolation=cv2.INTER_NEAREST)
-    output = helper.create_variable_for_type_inference(dtype=input1.dtype)
-    helper.append_op(type="correlation",
+    return transform
-                     inputs={
-                         "Input1": input1,
-                         "Input2": input2
-                     },
-                     attrs={
-                         "pad_size": pad_size,
-                         "kernel_size": kernel_size,
-                         "max_displacement": max_displacement,
-                         "stride1": stride1,
-                         "stride2": stride2,
-                         "corr_type_multiply": corr_type_multiply
-                     },
-                     outputs={"Output": output})
-    return output
--- a/ppgan/datasets/transforms/transforms.py
+++ b/ppgan/datasets/transforms/transforms.py
+import sys
 import random
+import numbers
+import collections
+import numpy as np
+from paddle.utils import try_import
+import paddle.vision.transforms.functional as F
-class RandomCrop(object):
+from .builder import TRANSFORMS
-    def __init__(self, output_size):
+if sys.version_info < (3, 3):
+    Sequence = collections.Sequence
+    Iterable = collections.Iterable
+else:
+    Sequence = collections.abc.Sequence
+    Iterable = collections.abc.Iterable
+class Transform():
+    def _set_attributes(self, args):
+        """
+        Set attributes from the input list of parameters.
+        Args:
+            args (list): list of parameters.
+        """
+        if args:
+            for k, v in args.items():
+                if k != "self" and not k.startswith("_"):
+                    setattr(self, k, v)
+    def apply_image(self, input):
+        raise NotImplementedError
+    def __call__(self, inputs):
+        if isinstance(inputs, tuple):
+            inputs = list(inputs)
+        if self.keys is not None:
+            for i, key in enumerate(self.keys):
+                if isinstance(inputs, dict):
+                    inputs[key] = getattr(self, 'apply_' + key)(inputs[key])
+                elif isinstance(inputs, (list, tuple)):
+                    inputs[i] = getattr(self, 'apply_' + key)(inputs[i])
+        else:
+            inputs = self.apply_image(inputs)
+        if isinstance(inputs, list):
+            inputs = tuple(inputs)
+        return inputs
+@TRANSFORMS.register()
+class Resize(Transform):
+    """Resize the input Image to the given size.
+    Args:
+        size (int|list|tuple): Desired output size. If size is a sequence like
+            (h, w), output size will be matched to this. If size is an int,
+            smaller edge of the image will be matched to this number.
+            i.e, if height > width, then image will be rescaled to
+            (size * height / width, size)
+        interpolation (int, optional): Interpolation mode of resize. Default: 1.
+            0 : cv2.INTER_NEAREST
+            1 : cv2.INTER_LINEAR
+            2 : cv2.INTER_CUBIC
+            3 : cv2.INTER_AREA
+            4 : cv2.INTER_LANCZOS4
+            5 : cv2.INTER_LINEAR_EXACT
+            7 : cv2.INTER_MAX
+            8 : cv2.WARP_FILL_OUTLIERS
+            16: cv2.WARP_INVERSE_MAP
+    """
+    def __init__(self, size, interpolation=1, keys=None):
+        super().__init__()
+        assert isinstance(size, int) or (isinstance(size, Iterable)
+                                         and len(size) == 2)
+        self._set_attributes(locals())
+        if isinstance(self.size, Iterable):
+            self.size = tuple(size)
+    def apply_image(self, img):
+        return F.resize(img, self.size, self.interpolation)
+@TRANSFORMS.register()
+class RandomCrop(Transform):
+    def __init__(self, output_size, keys=None):
+        super().__init__()
+        self._set_attributes(locals())
        if isinstance(output_size, int):
            self.output_size = (output_size, output_size)
        else:
@@ -19,12 +105,162 @@ class RandomCrop(object):
        j = random.randint(0, w - tw)
        return i, j, th, tw
-    def __call__(self, img):
+    def apply_image(self, img):
        i, j, h, w = self._get_params(img)
        cropped_img = img[i:i + h, j:j + w]
        return cropped_img
+@TRANSFORMS.register()
+class PairedRandomCrop(RandomCrop):
+    def __init__(self, output_size, keys=None):
+        super().__init__(output_size, keys)
+        if isinstance(output_size, int):
+            self.output_size = (output_size, output_size)
+        else:
+            self.output_size = output_size
+    def apply_image(self, img, crop_prams=None):
+        if crop_prams is not None:
+            i, j, h, w = crop_prams
+        else:
+            i, j, h, w = self._get_params(img)
+        cropped_img = img[i:i + h, j:j + w]
+        return cropped_img
+    def __call__(self, inputs):
+        if isinstance(inputs, tuple):
+            inputs = list(inputs)
+        if self.keys is not None:
+            if isinstance(inputs, dict):
+                crop_params = self._get_params(inputs[self.keys[0]])
+            elif isinstance(inputs, (list, tuple)):
+                crop_params = self._get_params(inputs[0])
+            for i, key in enumerate(self.keys):
+                if isinstance(inputs, dict):
+                    inputs[key] = getattr(self, 'apply_' + key)(inputs[key],
+                                                                crop_params)
+                elif isinstance(inputs, (list, tuple)):
+                    inputs[i] = getattr(self, 'apply_' + key)(inputs[i],
+                                                              crop_params)
+        else:
+            crop_params = self._get_params(inputs)
+            inputs = self.apply_image(inputs, crop_params)
+        if isinstance(inputs, list):
+            inputs = tuple(inputs)
+        return inputs
+@TRANSFORMS.register()
+class RandomHorizontalFlip(Transform):
+    """Horizontally flip the input data randomly with a given probability.
+    Args:
+        prob (float): Probability of the input data being flipped. Default: 0.5
+    """
+    def __init__(self, prob=0.5, keys=None):
+        super().__init__()
+        self._set_attributes(locals())
+    def apply_image(self, img):
+        if np.random.random() < self.prob:
+            return F.flip(img, code=1)
+        return img
+@TRANSFORMS.register()
+class PairedRandomHorizontalFlip(RandomHorizontalFlip):
+    def __init__(self, prob=0.5, keys=None):
+        super().__init__()
+        self._set_attributes(locals())
+    def apply_image(self, img, flip):
+        if flip:
+            return F.flip(img, code=1)
+        return img
+    def __call__(self, inputs):
+        if isinstance(inputs, tuple):
+            inputs = list(inputs)
+        flip = np.random.random() < self.prob
+        if self.keys is not None:
+            for i, key in enumerate(self.keys):
+                if isinstance(inputs, dict):
+                    inputs[key] = getattr(self, 'apply_' + key)(inputs[key],
+                                                                flip)
+                elif isinstance(inputs, (list, tuple)):
+                    inputs[i] = getattr(self, 'apply_' + key)(inputs[i], flip)
+        else:
+            inputs = self.apply_image(inputs, flip)
+        if isinstance(inputs, list):
+            inputs = tuple(inputs)
+        return inputs
+@TRANSFORMS.register()
+class Normalize(Transform):
+    """Normalize the input data with mean and standard deviation.
+    Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels,
+    this transform will normalize each channel of the input data.
+    ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
+    Args:
+        mean (int|float|list): Sequence of means for each channel.
+        std (int|float|list): Sequence of standard deviations for each channel.
+    """
+    def __init__(self, mean=0.0, std=1.0, keys=None):
+        super().__init__()
+        self._set_attributes(locals())
+        if isinstance(mean, numbers.Number):
+            mean = [mean, mean, mean]
+        if isinstance(std, numbers.Number):
+            std = [std, std, std]
+        self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1)
+        self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1)
+    def apply_image(self, img):
+        return (img - self.mean) / self.std
+@TRANSFORMS.register()
+class Permute(Transform):
+    """Change input data to a target mode.
+    For example, most transforms use HWC mode image,
+    while the Neural Network might use CHW mode input tensor.
+    Input image should be HWC mode and an instance of numpy.ndarray.
+    Args:
+        mode (str): Output mode of input. Default: "CHW".
+        to_rgb (bool): Convert 'bgr' image to 'rgb'. Default: True.
+    """
+    def __init__(self, mode="CHW", to_rgb=True, keys=None):
+        super().__init__()
+        self._set_attributes(locals())
+        assert mode in [
+            "CHW"
+        ], "Only support 'CHW' mode, but received mode: {}".format(mode)
+        self.mode = mode
+        self.to_rgb = to_rgb
+    def apply_image(self, img):
+        if self.to_rgb:
+            img = img[..., ::-1]
+        if self.mode == "CHW":
+            return img.transpose((2, 0, 1))
+        return img
 class Crop():
    def __init__(self, pos, size):
        self.pos = pos
@@ -35,6 +271,6 @@ class Crop():
        x, y = self.pos
        th = tw = self.size
        if (ow > tw or oh > th):
-            return img[y: y + th, x: x + tw]
+            return img[y:y + th, x:x + tw]
        return img
\ No newline at end of file
--- a/ppgan/datasets/unpaired_dataset.py
+++ b/ppgan/datasets/unpaired_dataset.py
@@ -5,13 +5,13 @@ from .base_dataset import BaseDataset, get_transform
 from .image_folder import make_dataset
 from .builder import DATASETS
+from .transforms.builder import build_transforms
 @DATASETS.register()
 class UnpairedDataset(BaseDataset):
    """
    """
    def __init__(self, cfg):
        """Initialize this dataset class.
@@ -19,18 +19,25 @@ class UnpairedDataset(BaseDataset):
            cfg (dict) -- stores all the experiment flags
        """
        BaseDataset.__init__(self, cfg)
-        self.dir_A = os.path.join(cfg.dataroot, cfg.phase + 'A')  # create a path '/path/to/data/trainA'
+        self.dir_A = os.path.join(cfg.dataroot, cfg.phase +
-        self.dir_B = os.path.join(cfg.dataroot, cfg.phase + 'B')  # create a path '/path/to/data/trainB'
+                                  'A')  # create a path '/path/to/data/trainA'
+        self.dir_B = os.path.join(cfg.dataroot, cfg.phase +
+                                  'B')  # create a path '/path/to/data/trainB'
-        self.A_paths = sorted(make_dataset(self.dir_A, cfg.max_dataset_size))   # load images from '/path/to/data/trainA'
+        self.A_paths = sorted(make_dataset(
-        self.B_paths = sorted(make_dataset(self.dir_B, cfg.max_dataset_size))    # load images from '/path/to/data/trainB'
+            self.dir_A,
+            cfg.max_dataset_size))  # load images from '/path/to/data/trainA'
+        self.B_paths = sorted(make_dataset(
+            self.dir_B,
+            cfg.max_dataset_size))  # load images from '/path/to/data/trainB'
        self.A_size = len(self.A_paths)  # get the size of dataset A
        self.B_size = len(self.B_paths)  # get the size of dataset B
        btoA = self.cfg.direction == 'BtoA'
-        input_nc = self.cfg.output_nc if btoA else self.cfg.input_nc       # get the number of channels of input image
+        input_nc = self.cfg.output_nc if btoA else self.cfg.input_nc  # get the number of channels of input image
-        output_nc = self.cfg.input_nc if btoA else self.cfg.output_nc      # get the number of channels of output image
+        output_nc = self.cfg.input_nc if btoA else self.cfg.output_nc  # get the number of channels of output image
-        self.transform_A = get_transform(self.cfg.transform, grayscale=(input_nc == 1))
-        self.transform_B = get_transform(self.cfg.transform, grayscale=(output_nc == 1))
+        self.transform_A = build_transforms(self.cfg.transforms)
+        self.transform_B = build_transforms(self.cfg.transforms)
        self.reset_paths()
@@ -49,10 +56,11 @@ class UnpairedDataset(BaseDataset):
            A_paths (str)    -- image paths
            B_paths (str)    -- image paths
        """
-        A_path = self.A_paths[index % self.A_size]  # make sure index is within then range
+        A_path = self.A_paths[
-        if self.cfg.serial_batches:   # make sure index is within then range
+            index % self.A_size]  # make sure index is within then range
+        if self.cfg.serial_batches:  # make sure index is within then range
            index_B = index % self.B_size
-        else:   # randomize the index for domain B to avoid fixed pairs.
+        else:  # randomize the index for domain B to avoid fixed pairs.
            index_B = random.randint(0, self.B_size - 1)
        B_path = self.B_paths[index_B]

--- a/ppgan/faceutils/__init__.py
+++ b/ppgan/faceutils/__init__.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from . import dlibutils as dlib
+from . import mask
+from . import image
--- a/ppgan/faceutils/dlibutils/__init__.py
+++ b/ppgan/faceutils/dlibutils/__init__.py
+from .dlib_utils import detect, crop, landmarks, crop_from_array
--- a/ppgan/faceutils/dlibutils/dlib_utils.py
+++ b/ppgan/faceutils/dlibutils/dlib_utils.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os.path as osp
+import numpy as np
+from PIL import Image
+import dlib
+import cv2
+from ..image import resize_by_max
+detector = dlib.get_frontal_face_detector()
+predictor = dlib.shape_predictor(
+    osp.split(osp.realpath(__file__))[0] + '/lms.dat')
+def detect(image: Image):
+    image = np.asarray(image)
+    h, w = image.shape[:2]
+    image = resize_by_max(image, 361)
+    actual_h, actual_w = image.shape[:2]
+    faces_on_small = detector(image, 1)
+    faces = dlib.rectangles()
+    for face in faces_on_small:
+        faces.append(
+            dlib.rectangle(int(face.left() / actual_w * w + 0.5),
+                           int(face.top() / actual_h * h + 0.5),
+                           int(face.right() / actual_w * w + 0.5),
+                           int(face.bottom() / actual_h * h + 0.5)))
+    return faces
+def crop(image: Image, face, up_ratio, down_ratio, width_ratio):
+    width, height = image.size
+    face_height = face.height()
+    face_width = face.width()
+    delta_up = up_ratio * face_height
+    delta_down = down_ratio * face_height
+    delta_width = width_ratio * width
+    img_left = int(max(0, face.left() - delta_width))
+    img_top = int(max(0, face.top() - delta_up))
+    img_right = int(min(width, face.right() + delta_width))
+    img_bottom = int(min(height, face.bottom() + delta_down))
+    image = image.crop((img_left, img_top, img_right, img_bottom))
+    face = dlib.rectangle(face.left() - img_left,
+                          face.top() - img_top,
+                          face.right() - img_left,
+                          face.bottom() - img_top)
+    face_expand = dlib.rectangle(img_left, img_top, img_right, img_bottom)
+    center = face_expand.center()
+    width, height = image.size
+    crop_left = img_left
+    crop_top = img_top
+    crop_right = img_right
+    crop_bottom = img_bottom
+    if width > height:
+        left = int(center.x - height / 2)
+        right = int(center.x + height / 2)
+        if left < 0:
+            left, right = 0, height
+        elif right > width:
+            left, right = width - height, width
+        image = image.crop((left, 0, right, height))
+        face = dlib.rectangle(face.left() - left, face.top(),
+                              face.right() - left, face.bottom())
+        crop_left += left
+        crop_right = crop_left + height
+    elif width < height:
+        top = int(center.y - width / 2)
+        bottom = int(center.y + width / 2)
+        if top < 0:
+            top, bottom = 0, width
+        elif bottom > height:
+            top, bottom = height - width, height
+        image = image.crop((0, top, width, bottom))
+        face = dlib.rectangle(face.left(),
+                              face.top() - top, face.right(),
+                              face.bottom() - top)
+        crop_top += top
+        crop_bottom = crop_top + width
+    crop_face = dlib.rectangle(crop_left, crop_top, crop_right, crop_bottom)
+    return image, face, crop_face
+def crop_by_image_size(image: Image, face):
+    center = face.center()
+    width, height = image.size
+    if width > height:
+        left = int(center.x - height / 2)
+        right = int(center.x + height / 2)
+        if left < 0:
+            left, right = 0, height
+        elif right > width:
+            left, right = width - height, width
+        image = image.crop((left, 0, right, height))
+        face = dlib.rectangle(face.left() - left, face.top(),
+                              face.right() - left, face.bottom())
+    elif width < height:
+        top = int(center.y - width / 2)
+        bottom = int(center.y + width / 2)
+        if top < 0:
+            top, bottom = 0, width
+        elif bottom > height:
+            top, bottom = height - width, height
+        image = image.crop((0, top, width, bottom))
+        face = dlib.rectangle(face.left(),
+                              face.top() - top, face.right(),
+                              face.bottom() - top)
+    return image, face
+def landmarks(image: Image, face):
+    shape = predictor(np.asarray(image), face).parts()
+    return np.array([[p.y, p.x] for p in shape])
+def crop_from_array(image: np.array, face):
+    ratio = 0.20 / 0.85  # delta_size / face_size
+    height, width = image.shape[:2]
+    face_height = face.height()
+    face_width = face.width()
+    delta_height = ratio * face_height
+    delta_width = ratio * width
+    img_left = int(max(0, face.left() - delta_width))
+    img_top = int(max(0, face.top() - delta_height))
+    img_right = int(min(width, face.right() + delta_width))
+    img_bottom = int(min(height, face.bottom() + delta_height))
+    image = image[img_top:img_bottom, img_left:img_right]
+    face = dlib.rectangle(face.left() - img_left,
+                          face.top() - img_top,
+                          face.right() - img_left,
+                          face.bottom() - img_top)
+    center = face.center()
+    height, width = image.shape[:2]
+    if width > height:
+        left = int(center.x - height / 2)
+        right = int(center.x + height / 2)
+        if left < 0:
+            left, right = 0, height
+        elif right > width:
+            left, right = width - height, width
+        image = image[0:height, left:right]
+        face = dlib.rectangle(face.left() - left, face.top(),
+                              face.right() - left, face.bottom())
+    elif width < height:
+        top = int(center.y - width / 2)
+        bottom = int(center.y + width / 2)
+        if top < 0:
+            top, bottom = 0, width
+        elif bottom > height:
+            top, bottom = height - width, height
+        image = image[top:bottom, 0:width]
+        face = dlib.rectangle(face.left(),
+                              face.top() - top, face.right(),
+                              face.bottom() - top)
+    return image, face
--- a/ppgan/faceutils/image.py
+++ b/ppgan/faceutils/image.py
+import numpy as np
+import cv2
+from io import BytesIO
+def resize_by_max(image, max_side=512, force=False):
+    h, w = image.shape[:2]
+    if max(h, w) < max_side and not force:
+        return image
+    ratio = max(h, w) / max_side
+    w = int(w / ratio + 0.5)
+    h = int(h / ratio + 0.5)
+    return cv2.resize(image, (w, h))
--- a/ppgan/faceutils/mask/__init__.py
+++ b/ppgan/faceutils/mask/__init__.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .face_parser import FaceParser
--- a/ppgan/faceutils/mask/face_parser.py
+++ b/ppgan/faceutils/mask/face_parser.py
+import os.path as osp
+import numpy as np
+import cv2
+from PIL import Image
+import paddle
+import paddle.vision.transforms as T
+import pickle
+from .model import BiSeNet
+class FaceParser:
+    def __init__(self, device="cpu"):
+        self.mapper = {
+            0: 0,
+            1: 1,
+            2: 2,
+            3: 3,
+            4: 4,
+            5: 5,
+            6: 0,
+            7: 11,
+            8: 12,
+            9: 0,
+            10: 6,
+            11: 8,
+            12: 7,
+            13: 9,
+            14: 13,
+            15: 0,
+            16: 0,
+            17: 10,
+            18: 0
+        }
+        #self.dict = paddle.to_tensor(mapper)
+        self.save_pth = osp.split(
+            osp.realpath(__file__))[0] + '/resnet.pdparams'
+        self.net = BiSeNet(n_classes=19)
+        self.transforms = T.Compose([
+            T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+        ])
+    def parse(self, image):
+        assert image.shape[:2] == (512, 512)
+        image = image / 255.0
+        image = image.transpose((2, 0, 1))
+        image = self.transforms(image)
+        state_dict, _ = paddle.load(self.save_pth)
+        self.net.set_dict(state_dict)
+        self.net.eval()
+        with paddle.no_grad():
+            image = paddle.to_tensor(image)
+            image = image.unsqueeze(0)
+            out = self.net(image)[0]
+            parsing = out.squeeze(0).argmax(0)  #argmax(0).astype('float32')
+        #parsing = paddle.nn.functional.embedding(x=self.dict, weight=parsing)
+        parse_np = parsing.numpy()
+        h, w = parse_np.shape
+        result = np.zeros((h, w))
+        for i in range(h):
+            for j in range(w):
+                result[i][j] = self.mapper[parse_np[i][j]]
+        result = paddle.to_tensor(result).astype('float32')
+        return result
--- a/ppgan/faceutils/mask/model.py
+++ b/ppgan/faceutils/mask/model.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle.utils.download import get_weights_path_from_url
+import numpy as np
+from .resnet import resnet18
+class ConvBNReLU(paddle.nn.Layer):
+    def __init__(self,
+                 in_chan,
+                 out_chan,
+                 ks=3,
+                 stride=1,
+                 padding=1,
+                 *args,
+                 **kwargs):
+        super(ConvBNReLU, self).__init__()
+        self.conv = nn.Conv2d(in_chan,
+                              out_chan,
+                              kernel_size=ks,
+                              stride=stride,
+                              padding=padding,
+                              bias_attr=False)
+        self.bn = nn.BatchNorm2d(out_chan)
+        self.relu = nn.ReLU()
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+class BiSeNetOutput(paddle.nn.Layer):
+    def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
+        super(BiSeNetOutput, self).__init__()
+        self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
+        self.conv_out = nn.Conv2d(mid_chan,
+                                  n_classes,
+                                  kernel_size=1,
+                                  bias_attr=False)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.conv_out(x)
+        return x
+class AttentionRefinementModule(paddle.nn.Layer):
+    def __init__(self, in_chan, out_chan, *args, **kwargs):
+        super(AttentionRefinementModule, self).__init__()
+        self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
+        self.conv_atten = nn.Conv2d(out_chan,
+                                    out_chan,
+                                    kernel_size=1,
+                                    bias_attr=False)
+        self.bn_atten = nn.BatchNorm(out_chan)
+        self.sigmoid_atten = nn.Sigmoid()
+    def forward(self, x):
+        feat = self.conv(x)
+        atten = F.avg_pool2d(feat, feat.shape[2:])
+        atten = self.conv_atten(atten)
+        atten = self.bn_atten(atten)
+        atten = self.sigmoid_atten(atten)
+        out = feat * atten
+        return out
+class ContextPath(paddle.nn.Layer):
+    def __init__(self, *args, **kwargs):
+        super(ContextPath, self).__init__()
+        self.resnet = resnet18()
+        self.arm16 = AttentionRefinementModule(256, 128)
+        self.arm32 = AttentionRefinementModule(512, 128)
+        self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+        self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+        self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
+    def forward(self, x):
+        H0, W0 = x.shape[2:]
+        feat8, feat16, feat32 = self.resnet(x)
+        H8, W8 = feat8.shape[2:]
+        H16, W16 = feat16.shape[2:]
+        H32, W32 = feat32.shape[2:]
+        avg = F.avg_pool2d(feat32, feat32.shape[2:])
+        avg = self.conv_avg(avg)
+        avg_up = F.interpolate(avg, size=(H32, W32), mode='nearest')
+        feat32_arm = self.arm32(feat32)
+        feat32_sum = feat32_arm + avg_up
+        feat32_up = F.interpolate(feat32_sum, size=(H16, W16), mode='nearest')
+        feat32_up = self.conv_head32(feat32_up)
+        feat16_arm = self.arm16(feat16)
+        feat16_sum = feat16_arm + feat32_up
+        feat16_up = F.interpolate(feat16_sum, size=(H8, W8), mode='nearest')
+        feat16_up = self.conv_head16(feat16_up)
+        return feat8, feat16_up, feat32_up  # x8, x8, x16
+class SpatialPath(paddle.nn.Layer):
+    def __init__(self, *args, **kwargs):
+        super(SpatialPath, self).__init__()
+        self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
+        self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
+        self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
+        self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
+    def forward(self, x):
+        feat = self.conv1(x)
+        feat = self.conv2(feat)
+        feat = self.conv3(feat)
+        feat = self.conv_out(feat)
+        return feat
+class FeatureFusionModule(paddle.nn.Layer):
+    def __init__(self, in_chan, out_chan, *args, **kwargs):
+        super(FeatureFusionModule, self).__init__()
+        self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
+        self.conv1 = nn.Conv2d(out_chan,
+                               out_chan // 4,
+                               kernel_size=1,
+                               stride=1,
+                               padding=0,
+                               bias_attr=False)
+        self.conv2 = nn.Conv2d(out_chan // 4,
+                               out_chan,
+                               kernel_size=1,
+                               stride=1,
+                               padding=0,
+                               bias_attr=False)
+        self.relu = nn.ReLU()
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, fsp, fcp):
+        fcat = paddle.concat([fsp, fcp], axis=1)
+        feat = self.convblk(fcat)
+        atten = F.avg_pool2d(feat, feat.shape[2:])
+        atten = self.conv1(atten)
+        atten = self.relu(atten)
+        atten = self.conv2(atten)
+        atten = self.sigmoid(atten)
+        feat_atten = feat * atten
+        feat_out = feat_atten + feat
+        return feat_out
+class BiSeNet(paddle.nn.Layer):
+    def __init__(self, n_classes, *args, **kwargs):
+        super(BiSeNet, self).__init__()
+        self.cp = ContextPath()
+        self.ffm = FeatureFusionModule(256, 256)
+        self.conv_out = BiSeNetOutput(256, 256, n_classes)
+        self.conv_out16 = BiSeNetOutput(128, 64, n_classes)
+        self.conv_out32 = BiSeNetOutput(128, 64, n_classes)
+    def forward(self, x):
+        H, W = x.shape[2:]
+        feat_res8, feat_cp8, feat_cp16 = self.cp(
+            x)  # here return res3b1 feature
+        feat_sp = feat_res8  # use res3b1 feature to replace spatial path feature
+        feat_fuse = self.ffm(feat_sp, feat_cp8)
+        feat_out = self.conv_out(feat_fuse)
+        feat_out16 = self.conv_out16(feat_cp8)
+        feat_out32 = self.conv_out32(feat_cp16)
+        feat_out = F.interpolate(feat_out, size=(H, W))
+        feat_out16 = F.interpolate(feat_out16, size=(H, W))
+        feat_out32 = F.interpolate(feat_out32, size=(H, W))
+        return feat_out, feat_out16, feat_out32
--- a/ppgan/faceutils/mask/resnet.py
+++ b/ppgan/faceutils/mask/resnet.py
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import division
+from __future__ import print_function
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle.utils.download import get_weights_path_from_url
+import numpy as np
+import math
+model_urls = {
+    'resnet18': ('https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams',
+                 '0ba53eea9bc970962d0ef96f7b94057e'),
+}
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes,
+                     out_planes,
+                     kernel_size=3,
+                     stride=stride,
+                     padding=1,
+                     bias_attr=False)
+class BasicBlock(paddle.nn.Layer):
+    def __init__(self, in_chan, out_chan, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(in_chan, out_chan, stride)
+        self.bn1 = nn.BatchNorm(out_chan)
+        self.conv2 = conv3x3(out_chan, out_chan)
+        self.bn2 = nn.BatchNorm(out_chan)
+        self.relu = nn.ReLU()
+        self.downsample = None
+        if in_chan != out_chan or stride != 1:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_chan,
+                          out_chan,
+                          kernel_size=1,
+                          stride=stride,
+                          bias_attr=False),
+                nn.BatchNorm(out_chan),
+            )
+    def forward(self, x):
+        residual = self.conv1(x)
+        residual = self.relu(self.bn1(residual))
+        residual = self.conv2(residual)
+        residual = self.bn2(residual)
+        shortcut = x
+        if self.downsample is not None:
+            shortcut = self.downsample(x)
+        out = shortcut + residual
+        out = self.relu(out)
+        return out
+def create_layer_basic(in_chan, out_chan, bnum, stride=1):
+    layers = [BasicBlock(in_chan, out_chan, stride=stride)]
+    for i in range(bnum - 1):
+        layers.append(BasicBlock(out_chan, out_chan, stride=1))
+    return nn.Sequential(*layers)
+class Resnet18(paddle.nn.Layer):
+    def __init__(self):
+        super(Resnet18, self).__init__()
+        self.conv1 = nn.Conv2d(3,
+                               64,
+                               kernel_size=7,
+                               stride=2,
+                               padding=3,
+                               bias_attr=False)
+        self.bn1 = nn.BatchNorm(64)
+        self.relu = nn.ReLU()
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
+        self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
+        self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
+        self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.relu(self.bn1(x))
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        feat8 = self.layer2(x)  # 1/8
+        feat16 = self.layer3(feat8)  # 1/16
+        feat32 = self.layer4(feat16)  # 1/32
+        return feat8, feat16, feat32
+def resnet18(pretrained=False, **kwargs):
+    model = Resnet18()
+    arch = 'resnet18'
+    if pretrained:
+        weight_path = './resnet.pdparams'
+        param, _ = paddle.load(weight_path)
+        model.set_dict(param)
+    return model
--- a/ppgan/models/__init__.py
+++ b/ppgan/models/__init__.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from .base_model import BaseModel
 from .cycle_gan_model import CycleGANModel
 from .pix2pix_model import Pix2PixModel
 from .srgan_model import SRGANModel
 from .sr_model import SRModel
+from .makeup_model import MakeupModel
+from .vgg import vgg16
--- a/ppgan/models/base_model.py
+++ b/ppgan/models/base_model.py
@@ -26,7 +26,7 @@ class BaseModel(ABC):
        When creating your custom class, you need to implement your own initialization.
        In this function, you should first call <BaseModel.__init__(self, opt)>
        Then, you need to define four lists:
-            -- self.loss_names (str list):          specify the training losses that you want to plot and save.
+            -- self.losses (str list):          specify the training losses that you want to plot and save.
            -- self.model_names (str list):         define networks used in our training.
            -- self.visual_names (str list):        specify the images that you want to display and save.
            -- self.optimizers (optimizer list):    define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an example.
@@ -37,7 +37,7 @@ class BaseModel(ABC):
            opt.output_dir,
            opt.model.name)  # save all the checkpoints to save_dir
-        self.loss_names = []
+        self.losses = OrderedDict()
        self.model_names = []
        self.visual_names = []
        self.optimizers = []
@@ -115,13 +115,7 @@ class BaseModel(ABC):
    def get_current_losses(self):
        """Return traning losses / errors. train.py will print out these errors on console, and save them to a file"""
-        errors_ret = OrderedDict()
+        return self.losses
-        for name in self.loss_names:
-            if isinstance(name, str):
-                errors_ret[name] = float(
-                    getattr(self, 'loss_' + name)
-                )  # float(...) works for both scalar tensor and float number
-        return errors_ret
    def set_requires_grad(self, nets, requires_grad=False):
        """Set requies_grad=Fasle for all the networks to avoid unnecessary computations

--- a/ppgan/models/builder.py
+++ b/ppgan/models/builder.py
@@ -2,18 +2,9 @@ import paddle
 from ..utils.registry import Registry
 MODELS = Registry("MODEL")
 def build_model(cfg):
-    # dataset = MODELS.get(cfg.MODEL.name)(cfg.MODEL)
-    # place = paddle.CUDAPlace(0)
-    # dataloader = paddle.io.DataLoader(dataset,
-    #                                 batch_size=1, #opt.batch_size,
-    #                                 places=place,
-    #                                 shuffle=True, #not opt.serial_batches,
-    #                                 num_workers=0)#int(opt.num_threads))
    model = MODELS.get(cfg.model.name)(cfg)
    return model
-    # pass
\ No newline at end of file
--- a/ppgan/models/cycle_gan_model.py
+++ b/ppgan/models/cycle_gan_model.py
@@ -31,10 +31,6 @@ class CycleGANModel(BaseModel):
            opt (config)-- stores all the experiment flags; needs to be a subclass of Dict
        """
        BaseModel.__init__(self, opt)
-        # specify the training losses you want to print out. The training/test scripts will call <BaseModel.get_current_losses>
-        self.loss_names = [
-            'D_A', 'G_A', 'cycle_A', 'idt_A', 'D_B', 'G_B', 'cycle_B', 'idt_B'
-        ]
        # specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals>
        visual_names_A = ['real_A', 'fake_B', 'rec_A']
        visual_names_B = ['real_B', 'fake_A', 'rec_B']
@@ -165,11 +161,13 @@ class CycleGANModel(BaseModel):
        """Calculate GAN loss for discriminator D_A"""
        fake_B = self.fake_B_pool.query(self.fake_B)
        self.loss_D_A = self.backward_D_basic(self.netD_A, self.real_B, fake_B)
+        self.losses['D_A_loss'] = self.loss_D_A
    def backward_D_B(self):
        """Calculate GAN loss for discriminator D_B"""
        fake_A = self.fake_A_pool.query(self.fake_A)
        self.loss_D_B = self.backward_D_basic(self.netD_B, self.real_A, fake_A)
+        self.losses['D_B_loss'] = self.loss_D_B
    def backward_G(self):
        """Calculate the loss for generators G_A and G_B"""
@@ -200,6 +198,13 @@ class CycleGANModel(BaseModel):
        # Backward cycle loss || G_A(G_B(B)) - B||
        self.loss_cycle_B = self.criterionCycle(self.rec_B,
                                                self.real_B) * lambda_B
+        self.losses['G_idt_A_loss'] = self.loss_idt_A
+        self.losses['G_idt_B_loss'] = self.loss_idt_B
+        self.losses['G_A_adv_loss'] = self.loss_G_A
+        self.losses['G_B_adv_loss'] = self.loss_G_B
+        self.losses['G_A_cycle_loss'] = self.loss_cycle_A
+        self.losses['G_B_cycle_loss'] = self.loss_cycle_B
        # combined loss and calculate gradients
        self.loss_G = self.loss_G_A + self.loss_G_B + self.loss_cycle_A + self.loss_cycle_B + self.loss_idt_A + self.loss_idt_B

--- a/ppgan/models/discriminators/nlayers.py
+++ b/ppgan/models/discriminators/nlayers.py
-import functools
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-import numpy as np
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import paddle
+import functools
+import numpy as np
 import paddle.nn as nn
+import paddle.nn.functional as F
+from ...modules.nn import Spectralnorm
 from ...modules.norm import build_norm_layer
 from .builder import DISCRIMINATORS
@@ -14,7 +30,7 @@ class NLayerDiscriminator(nn.Layer):
    def __init__(self, input_nc, ndf=64, n_layers=3, norm_type='instance'):
        """Construct a PatchGAN discriminator
-        Args:
+        Parameters:
            input_nc (int)  -- the number of channels in input images
            ndf (int)       -- the number of filters in the last conv layer
            n_layers (int)  -- the number of conv layers in the discriminator
@@ -22,49 +38,107 @@ class NLayerDiscriminator(nn.Layer):
        """
        super(NLayerDiscriminator, self).__init__()
        norm_layer = build_norm_layer(norm_type)
-        if type(norm_layer) == functools.partial:
+        if type(
-            use_bias = norm_layer.func == nn.InstanceNorm
+                norm_layer
+        ) == functools.partial:  # no need to use bias as BatchNorm2d has affine parameters
+            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
-            use_bias = norm_layer == nn.InstanceNorm
+            use_bias = norm_layer == nn.InstanceNorm2d
        kw = 4
        padw = 1
-        sequence = [
-            nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw),
+        if norm_type == 'spectral':
-            nn.LeakyReLU(0.2)
+            sequence = [
-        ]
+                Spectralnorm(
+                    nn.Conv2d(input_nc,
+                              ndf,
+                              kernel_size=kw,
+                              stride=2,
+                              padding=padw)),
+                nn.LeakyReLU(0.01)
+            ]
+        else:
+            sequence = [
+                nn.Conv2d(input_nc,
+                          ndf,
+                          kernel_size=kw,
+                          stride=2,
+                          padding=padw,
+                          bias_attr=use_bias),
+                nn.LeakyReLU(0.2)
+            ]
        nf_mult = 1
        nf_mult_prev = 1
-        for n in range(1, n_layers):
+        for n in range(1, n_layers):  # gradually increase the number of filters
            nf_mult_prev = nf_mult
            nf_mult = min(2**n, 8)
+            if norm_type == 'spectral':
+                sequence += [
+                    Spectralnorm(
+                        nn.Conv2d(ndf * nf_mult_prev,
+                                  ndf * nf_mult,
+                                  kernel_size=kw,
+                                  stride=2,
+                                  padding=padw)),
+                    nn.LeakyReLU(0.01)
+                ]
+            else:
+                sequence += [
+                    nn.Conv2d(ndf * nf_mult_prev,
+                              ndf * nf_mult,
+                              kernel_size=kw,
+                              stride=2,
+                              padding=padw,
+                              bias_attr=use_bias),
+                    norm_layer(ndf * nf_mult),
+                    nn.LeakyReLU(0.2)
+                ]
+        nf_mult_prev = nf_mult
+        nf_mult = min(2**n_layers, 8)
+        if norm_type == 'spectral':
+            sequence += [
+                Spectralnorm(
+                    nn.Conv2d(ndf * nf_mult_prev,
+                              ndf * nf_mult,
+                              kernel_size=kw,
+                              stride=1,
+                              padding=padw)),
+                nn.LeakyReLU(0.01)
+            ]
+        else:
            sequence += [
                nn.Conv2d(ndf * nf_mult_prev,
                          ndf * nf_mult,
                          kernel_size=kw,
-                          stride=2,
+                          stride=1,
                          padding=padw,
                          bias_attr=use_bias),
                norm_layer(ndf * nf_mult),
                nn.LeakyReLU(0.2)
            ]
-        nf_mult_prev = nf_mult
+        if norm_type == 'spectral':
-        nf_mult = min(2**n_layers, 8)
+            sequence += [
-        sequence += [
+                Spectralnorm(
-            nn.Conv2d(ndf * nf_mult_prev,
+                    nn.Conv2d(ndf * nf_mult,
-                      ndf * nf_mult,
+                              1,
-                      kernel_size=kw,
+                              kernel_size=kw,
-                      stride=1,
+                              stride=1,
-                      padding=padw,
+                              padding=padw,
-                      bias_attr=use_bias),
+                              bias_attr=False))
-            norm_layer(ndf * nf_mult),
+            ]  # output 1 channel prediction map
-            nn.LeakyReLU(0.2)
+        else:
-        ]
+            sequence += [
+                nn.Conv2d(ndf * nf_mult,
+                          1,
+                          kernel_size=kw,
+                          stride=1,
+                          padding=padw,
+                          bias_attr=False)
+            ]  # output 1 channel prediction map
-        sequence += [
-            nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)
-        ]
        self.model = nn.Sequential(*sequence)
    def forward(self, input):

--- a/ppgan/models/generators/__init__.py
+++ b/ppgan/models/generators/__init__.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from .resnet import ResnetGenerator
 from .unet import UnetGenerator
 from .rrdb_net import RRDBNet
\ No newline at end of file
+from .makeup import GeneratorPSGANAttention
--- a/ppgan/models/generators/makeup.py
+++ b/ppgan/models/generators/makeup.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+import functools
+import numpy as np
+from ...modules.norm import build_norm_layer
+from .builder import GENERATORS
+class PONO(paddle.nn.Layer):
+    def __init__(self, eps=1e-5):
+        super(PONO, self).__init__()
+        self.eps = eps
+    def forward(self, x):
+        mean = paddle.mean(x, axis=1, keepdim=True)
+        var = paddle.mean(paddle.square(x - mean), axis=1, keepdim=True)
+        tmp = (x - mean) / paddle.sqrt(var + self.eps)
+        return tmp
+class ResidualBlock(paddle.nn.Layer):
+    """Residual Block with instance normalization."""
+    def __init__(self, dim_in, dim_out, mode=None):
+        super(ResidualBlock, self).__init__()
+        if mode == 't':
+            weight_attr = False
+            bias_attr = False
+        elif mode == 'p' or (mode is None):
+            weight_attr = None
+            bias_attr = None
+        self.main = nn.Sequential(
+            nn.Conv2d(dim_in,
+                      dim_out,
+                      kernel_size=3,
+                      stride=1,
+                      padding=1,
+                      bias_attr=False),
+            nn.InstanceNorm2d(dim_out,
+                              weight_attr=weight_attr,
+                              bias_attr=bias_attr), nn.ReLU(),
+            nn.Conv2d(dim_out,
+                      dim_out,
+                      kernel_size=3,
+                      stride=1,
+                      padding=1,
+                      bias_attr=False),
+            nn.InstanceNorm2d(dim_out,
+                              weight_attr=weight_attr,
+                              bias_attr=bias_attr))
+    def forward(self, x):
+        """forward"""
+        return x + self.main(x)
+class StyleResidualBlock(paddle.nn.Layer):
+    """Residual Block with instance normalization."""
+    def __init__(self, dim_in, dim_out):
+        super(StyleResidualBlock, self).__init__()
+        self.block1 = nn.Sequential(
+            nn.Conv2d(dim_in,
+                      dim_out,
+                      kernel_size=3,
+                      stride=1,
+                      padding=1,
+                      bias_attr=False), PONO())
+        ks = 3
+        pw = ks // 2
+        self.beta1 = nn.Conv2d(dim_in, dim_out, kernel_size=ks, padding=pw)
+        self.gamma1 = nn.Conv2d(dim_in, dim_out, kernel_size=ks, padding=pw)
+        self.block2 = nn.Sequential(
+            nn.ReLU(),
+            nn.Conv2d(dim_out,
+                      dim_out,
+                      kernel_size=3,
+                      stride=1,
+                      padding=1,
+                      bias_attr=False), PONO())
+        self.beta2 = nn.Conv2d(dim_in, dim_out, kernel_size=ks, padding=pw)
+        self.gamma2 = nn.Conv2d(dim_in, dim_out, kernel_size=ks, padding=pw)
+    def forward(self, x, y):
+        """forward"""
+        x_ = self.block1(x)
+        b = self.beta1(y)
+        g = self.gamma1(y)
+        x_ = (g + 1) * x_ + b
+        x_ = self.block2(x_)
+        b = self.beta2(y)
+        g = self.gamma2(y)
+        x_ = (g + 1) * x_ + b
+        return x + x_
+class MDNet(paddle.nn.Layer):
+    """MDNet in PSGAN"""
+    def __init__(self, conv_dim=64, repeat_num=3):
+        super(MDNet, self).__init__()
+        layers = []
+        layers.append(
+            nn.Conv2d(3,
+                      conv_dim,
+                      kernel_size=7,
+                      stride=1,
+                      padding=3,
+                      bias_attr=False))
+        layers.append(
+            nn.InstanceNorm2d(conv_dim, weight_attr=None, bias_attr=None))
+        layers.append(nn.ReLU())
+        # Down-Sampling
+        curr_dim = conv_dim
+        for i in range(2):
+            layers.append(
+                nn.Conv2d(curr_dim,
+                          curr_dim * 2,
+                          kernel_size=4,
+                          stride=2,
+                          padding=1,
+                          bias_attr=False))
+            layers.append(
+                nn.InstanceNorm2d(curr_dim * 2,
+                                  weight_attr=None,
+                                  bias_attr=None))
+            layers.append(nn.ReLU())
+            curr_dim = curr_dim * 2
+        # Bottleneck
+        for i in range(repeat_num):
+            layers.append(ResidualBlock(dim_in=curr_dim, dim_out=curr_dim))
+        self.main = nn.Sequential(*layers)
+    def forward(self, x):
+        """forward"""
+        out = self.main(x)
+        return out
+class TNetDown(paddle.nn.Layer):
+    """MDNet in PSGAN"""
+    def __init__(self, conv_dim=64, repeat_num=3):
+        super(TNetDown, self).__init__()
+        layers = []
+        layers.append(
+            nn.Conv2d(3,
+                      conv_dim,
+                      kernel_size=7,
+                      stride=1,
+                      padding=3,
+                      bias_attr=False))
+        layers.append(
+            nn.InstanceNorm2d(conv_dim, weight_attr=False, bias_attr=False))
+        layers.append(nn.ReLU())
+        # Down-Sampling
+        curr_dim = conv_dim
+        for i in range(2):
+            layers.append(
+                nn.Conv2d(curr_dim,
+                          curr_dim * 2,
+                          kernel_size=4,
+                          stride=2,
+                          padding=1,
+                          bias_attr=False))
+            layers.append(
+                nn.InstanceNorm2d(curr_dim * 2,
+                                  weight_attr=False,
+                                  bias_attr=False))
+            layers.append(nn.ReLU())
+            curr_dim = curr_dim * 2
+        # Bottleneck
+        for i in range(repeat_num):
+            layers.append(
+                ResidualBlock(dim_in=curr_dim, dim_out=curr_dim, mode='t'))
+        self.main = nn.Sequential(*layers)
+    def forward(self, x):
+        """forward"""
+        out = self.main(x)
+        return out
+class GetMatrix(paddle.fluid.dygraph.Layer):
+    def __init__(self, dim_in, dim_out):
+        super(GetMatrix, self).__init__()
+        self.get_gamma = nn.Conv2d(dim_in,
+                                   dim_out,
+                                   kernel_size=1,
+                                   stride=1,
+                                   padding=0,
+                                   bias_attr=False)
+        self.get_beta = nn.Conv2d(dim_in,
+                                  dim_out,
+                                  kernel_size=1,
+                                  stride=1,
+                                  padding=0,
+                                  bias_attr=False)
+    def forward(self, x):
+        gamma = self.get_gamma(x)
+        beta = self.get_beta(x)
+        return gamma, beta
+class MANet(paddle.nn.Layer):
+    """MANet in PSGAN"""
+    def __init__(self, conv_dim=64, repeat_num=3, w=0.01):
+        super(MANet, self).__init__()
+        self.encoder = TNetDown(conv_dim=conv_dim, repeat_num=repeat_num)
+        curr_dim = conv_dim * 4
+        self.w = w
+        self.beta = nn.Conv2d(curr_dim, curr_dim, kernel_size=3, padding=1)
+        self.gamma = nn.Conv2d(curr_dim, curr_dim, kernel_size=3, padding=1)
+        self.simple_spade = GetMatrix(curr_dim, 1)  # get the makeup matrix
+        self.repeat_num = repeat_num
+        for i in range(repeat_num):
+            setattr(self, "bottlenecks_" + str(i),
+                    ResidualBlock(dim_in=curr_dim, dim_out=curr_dim, mode='t'))
+        # Up-Sampling
+        self.upsamplers = []
+        self.up_betas = []
+        self.up_gammas = []
+        self.up_acts = []
+        y_dim = curr_dim
+        for i in range(2):
+            layers = []
+            layers.append(
+                nn.ConvTranspose2d(curr_dim,
+                                   curr_dim // 2,
+                                   kernel_size=4,
+                                   stride=2,
+                                   padding=1,
+                                   bias_attr=False))
+            layers.append(
+                nn.InstanceNorm2d(curr_dim // 2,
+                                  weight_attr=False,
+                                  bias_attr=False))
+            setattr(self, "up_acts_" + str(i), nn.ReLU())
+            setattr(
+                self, "up_betas_" + str(i),
+                nn.ConvTranspose2d(y_dim,
+                                   curr_dim // 2,
+                                   kernel_size=4,
+                                   stride=2,
+                                   padding=1))
+            setattr(
+                self, "up_gammas_" + str(i),
+                nn.ConvTranspose2d(y_dim,
+                                   curr_dim // 2,
+                                   kernel_size=4,
+                                   stride=2,
+                                   padding=1))
+            setattr(self, "up_samplers_" + str(i), nn.Sequential(*layers))
+            curr_dim = curr_dim // 2
+        self.img_reg = [
+            nn.Conv2d(curr_dim,
+                      3,
+                      kernel_size=7,
+                      stride=1,
+                      padding=3,
+                      bias_attr=False)
+        ]
+        self.img_reg = nn.Sequential(*self.img_reg)
+    def forward(self, x, y, x_p, y_p, consistency_mask, mask_x, mask_y):
+        """forward"""
+        # y -> ref feature
+        # x -> src img
+        x = self.encoder(x)
+        _, c, h, w = x.shape
+        x_flat = x.reshape([-1, c, h * w])
+        x_flat = self.w * x_flat
+        if x_p is not None:
+            x_flat = paddle.concat([x_flat, x_p], axis=1)
+        _, c2, h2, w2 = y.shape
+        y_flat = y.reshape([-1, c2, h2 * w2])
+        y_flat = self.w * y_flat
+        if y_p is not None:
+            y_flat = paddle.concat([y_flat, y_p], axis=1)
+        a_ = paddle.matmul(x_flat, y_flat, transpose_x=True) * 200.0
+        # mask softmax
+        if consistency_mask is not None:
+            a_ = a_ - 100.0 * (1 - consistency_mask)
+        a = F.softmax(a_, axis=-1)
+        gamma, beta = self.simple_spade(y)
+        beta = beta.reshape([-1, h2 * w2, 1])
+        beta = paddle.matmul(a, beta)
+        beta = beta.reshape([-1, 1, h2, w2])
+        gamma = gamma.reshape([-1, h2 * w2, 1])
+        gamma = paddle.matmul(a, gamma)
+        gamma = gamma.reshape([-1, 1, h2, w2])
+        x = x * (1 + gamma) + beta
+        for i in range(self.repeat_num):
+            layer = getattr(self, "bottlenecks_" + str(i))
+            x = layer(x)
+        for idx in range(2):
+            layer = getattr(self, "up_samplers_" + str(idx))
+            x = layer(x)
+            layer = getattr(self, "up_acts_" + str(idx))
+            x = layer(x)
+        x = self.img_reg(x)
+        x = paddle.tanh(x)
+        return x, a
+@GENERATORS.register()
+class GeneratorPSGANAttention(paddle.nn.Layer):
+    def __init__(self, conv_dim=64, repeat_num=3):
+        super(GeneratorPSGANAttention, self).__init__()
+        self.ma_net = MANet(conv_dim=conv_dim, repeat_num=repeat_num)
+        self.md_net = MDNet(conv_dim=conv_dim, repeat_num=repeat_num)
+    def forward(self, x, y, x_p, y_p, consistency_mask, mask_x, mask_y):
+        """forward"""
+        y = self.md_net(y)
+        out, a = self.ma_net(x, y, x_p, y_p, consistency_mask, mask_x, mask_y)
+        return out, a
--- a/ppgan/models/losses.py
+++ b/ppgan/models/losses.py
@@ -45,19 +45,18 @@ class GANLoss(nn.Layer):
        Returns:
            A label tensor filled with ground truth label, and with the size of the input
        """
        if target_is_real:
            if not hasattr(self, 'target_real_tensor'):
-                self.target_real_tensor = paddle.fill_constant(
+                self.target_real_tensor = paddle.full(
                    shape=paddle.shape(prediction),
-                    value=self.target_real_label,
+                    fill_value=self.target_real_label,
                    dtype='float32')
            target_tensor = self.target_real_tensor
        else:
            if not hasattr(self, 'target_fake_tensor'):
-                self.target_fake_tensor = paddle.fill_constant(
+                self.target_fake_tensor = paddle.full(
                    shape=paddle.shape(prediction),
-                    value=self.target_fake_label,
+                    fill_value=self.target_fake_label,
                    dtype='float32')
            target_tensor = self.target_fake_tensor

--- a/ppgan/models/makeup_model.py
+++ b/ppgan/models/makeup_model.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from .base_model import BaseModel
+from .builder import MODELS
+from .generators.builder import build_generator
+from .discriminators.builder import build_discriminator
+from .losses import GANLoss
+from ..modules.init import init_weights
+from ..solver import build_optimizer
+from ..utils.image_pool import ImagePool
+from ..utils.preprocess import *
+from ..datasets.makeup_dataset import MakeupDataset
+import numpy as np
+from .vgg import vgg16
+@MODELS.register()
+class MakeupModel(BaseModel):
+    """
+    This class implements the CycleGAN model, for learning image-to-image translation without paired data.
+    The model training requires '--dataset_mode unaligned' dataset.
+    By default, it uses a '--netG resnet_9blocks' ResNet generator,
+    a '--netD basic' discriminator (PatchGAN introduced by pix2pix),
+    and a least-square GANs objective ('--gan_mode lsgan').
+    CycleGAN paper: https://arxiv.org/pdf/1703.10593.pdf
+    """
+    def __init__(self, opt):
+        """Initialize the CycleGAN class.
+        Parameters:
+            opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions
+        """
+        BaseModel.__init__(self, opt)
+        # specify the training losses you want to print out. The training/test scripts will call <BaseModel.get_current_losses>
+        # specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals>
+        visual_names_A = ['real_A', 'fake_A', 'rec_A']
+        visual_names_B = ['real_B', 'fake_B', 'rec_B']
+        if self.isTrain and self.opt.lambda_identity > 0.0:  # if identity loss is used, we also visualize idt_B=G_A(B) ad idt_A=G_A(B)
+            visual_names_A.append('idt_B')
+            visual_names_B.append('idt_A')
+        self.visual_names = visual_names_A + visual_names_B  # combine visualizations for A and B
+        self.vgg = vgg16(pretrained=True)
+        # specify the models you want to save to the disk. The training/test scripts will call <BaseModel.save_networks> and <BaseModel.load_networks>.
+        if self.isTrain:
+            self.model_names = ['G', 'D_A', 'D_B']
+        else:  # during test time, only load Gs
+            self.model_names = ['G']
+        # define networks (both Generators and discriminators)
+        # The naming is different from those used in the paper.
+        # Code (vs. paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X)
+        self.netG = build_generator(opt.model.generator)
+        init_weights(self.netG, init_type='xavier', init_gain=1.0)
+        if self.isTrain:  # define discriminators
+            self.netD_A = build_discriminator(opt.model.discriminator)
+            self.netD_B = build_discriminator(opt.model.discriminator)
+            init_weights(self.netD_A, init_type='xavier', init_gain=1.0)
+            init_weights(self.netD_B, init_type='xavier', init_gain=1.0)
+        if self.isTrain:
+            self.fake_A_pool = ImagePool(
+                opt.dataset.train.pool_size
+            )  # create image buffer to store previously generated images
+            self.fake_B_pool = ImagePool(
+                opt.dataset.train.pool_size
+            )  # create image buffer to store previously generated images
+            # define loss functions
+            self.criterionGAN = GANLoss(
+                opt.model.gan_mode)  #.to(self.device)  # define GAN loss.
+            self.criterionCycle = paddle.nn.L1Loss()
+            self.criterionIdt = paddle.nn.L1Loss()
+            self.criterionL1 = paddle.nn.L1Loss()
+            self.criterionL2 = paddle.nn.MSELoss()
+            self.build_lr_scheduler()
+            self.optimizer_G = build_optimizer(
+                opt.optimizer,
+                self.lr_scheduler,
+                parameter_list=self.netG.parameters())
+            # self.optimizer_D = paddle.optimizer.Adam(learning_rate=lr_scheduler_d, parameter_list=self.netD_A.parameters() + self.netD_B.parameters(), beta1=opt.beta1)
+            self.optimizer_DA = build_optimizer(
+                opt.optimizer,
+                self.lr_scheduler,
+                parameter_list=self.netD_A.parameters())
+            self.optimizer_DB = build_optimizer(
+                opt.optimizer,
+                self.lr_scheduler,
+                parameter_list=self.netD_B.parameters())
+            self.optimizers.append(self.optimizer_G)
+            # self.optimizers.append(self.optimizer_D)
+            self.optimizers.append(self.optimizer_DA)
+            self.optimizers.append(self.optimizer_DB)
+            self.optimizer_names.extend(
+                ['optimizer_G', 'optimizer_DA', 'optimizer_DB'])
+    def set_input(self, input):
+        """Unpack input data from the dataloader and perform necessary pre-processing steps.
+        Parameters:
+            input (dict): include the data itself and its metadata information.
+        The option 'direction' can be used to swap domain A and domain B.
+        """
+        self.real_A = paddle.to_tensor(input['image_A'])
+        self.real_B = paddle.to_tensor(input['image_B'])
+        self.c_m = paddle.to_tensor(input['consis_mask'])
+        self.P_A = paddle.to_tensor(input['P_A'])
+        self.P_B = paddle.to_tensor(input['P_B'])
+        self.mask_A_aug = paddle.to_tensor(input['mask_A_aug'])
+        self.mask_B_aug = paddle.to_tensor(input['mask_B_aug'])
+        self.c_m_t = paddle.transpose(self.c_m, perm=[0, 2, 1])
+        if self.isTrain:
+            self.mask_A = paddle.to_tensor(input['mask_A'])
+            self.mask_B = paddle.to_tensor(input['mask_B'])
+            self.c_m_idt_a = paddle.to_tensor(input['consis_mask_idt_A'])
+            self.c_m_idt_b = paddle.to_tensor(input['consis_mask_idt_B'])
+        #self.hm_gt_A = self.hm_gt_A_lip + self.hm_gt_A_skin + self.hm_gt_A_eye
+        #self.hm_gt_B = self.hm_gt_B_lip + self.hm_gt_B_skin + self.hm_gt_B_eye
+    def forward(self):
+        """Run forward pass; called by both functions <optimize_parameters> and <test>."""
+        self.fake_A, amm = self.netG(self.real_A, self.real_B, self.P_A,
+                                     self.P_B, self.c_m, self.mask_A_aug,
+                                     self.mask_B_aug)  # G_A(A)
+        self.fake_B, _ = self.netG(self.real_B, self.real_A, self.P_B, self.P_A,
+                                   self.c_m_t, self.mask_A_aug,
+                                   self.mask_B_aug)  # G_A(A)
+        self.rec_A, _ = self.netG(self.fake_A, self.real_A, self.P_A, self.P_A,
+                                  self.c_m_idt_a, self.mask_A_aug,
+                                  self.mask_B_aug)  # G_A(A)
+        self.rec_B, _ = self.netG(self.fake_B, self.real_B, self.P_B, self.P_B,
+                                  self.c_m_idt_b, self.mask_A_aug,
+                                  self.mask_B_aug)  # G_A(A)
+    def forward_test(self, input):
+        '''
+        not implement now
+        '''
+        return self.netG(input['image_A'], input['image_B'], input['P_A'],
+                         input['P_B'], input['consis_mask'],
+                         input['mask_A_aug'], input['mask_B_aug'])
+    def test(self, input):
+        """Forward function used in test time.
+        This function wraps <forward> function in no_grad() so we don't save intermediate steps for backprop
+        It also calls <compute_visuals> to produce additional visualization results
+        """
+        with paddle.no_grad():
+            return self.forward_test(input)
+    def backward_D_basic(self, netD, real, fake):
+        """Calculate GAN loss for the discriminator
+        Parameters:
+            netD (network)      -- the discriminator D
+            real (tensor array) -- real images
+            fake (tensor array) -- images generated by a generator
+        Return the discriminator loss.
+        We also call loss_D.backward() to calculate the gradients.
+        """
+        # Real
+        pred_real = netD(real)
+        loss_D_real = self.criterionGAN(pred_real, True)
+        # Fake
+        pred_fake = netD(fake.detach())
+        loss_D_fake = self.criterionGAN(pred_fake, False)
+        # Combined loss and calculate gradients
+        loss_D = (loss_D_real + loss_D_fake) * 0.5
+        loss_D.backward()
+        return loss_D
+    def backward_D_A(self):
+        """Calculate GAN loss for discriminator D_A"""
+        fake_B = self.fake_B_pool.query(self.fake_B)
+        self.loss_D_A = self.backward_D_basic(self.netD_A, self.real_B, fake_B)
+        self.losses['D_A_loss'] = self.loss_D_A
+    def backward_D_B(self):
+        """Calculate GAN loss for discriminator D_B"""
+        fake_A = self.fake_A_pool.query(self.fake_A)
+        self.loss_D_B = self.backward_D_basic(self.netD_B, self.real_A, fake_A)
+        self.losses['D_B_loss'] = self.loss_D_B
+    def backward_G(self):
+        """Calculate the loss for generators G_A and G_B"""
+        '''
+        self.loss_names = [
+                'G_A_vgg',
+                'G_B_vgg',
+                'G_bg_consis'
+                ]
+        # specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals>
+        visual_names_A = ['real_A', 'fake_B', 'rec_A', 'amm_a']
+        visual_names_B = ['real_B', 'fake_A', 'rec_B', 'amm_b']
+        '''
+        lambda_idt = self.opt.lambda_identity
+        lambda_A = self.opt.lambda_A
+        lambda_B = self.opt.lambda_B
+        lambda_vgg = 5e-3
+        # Identity loss
+        if lambda_idt > 0:
+            self.idt_A, _ = self.netG(self.real_A, self.real_A, self.P_A,
+                                      self.P_A, self.c_m_idt_a, self.mask_A_aug,
+                                      self.mask_B_aug)  # G_A(A)
+            self.loss_idt_A = self.criterionIdt(
+                self.idt_A, self.real_A) * lambda_A * lambda_idt
+            self.idt_B, _ = self.netG(self.real_B, self.real_B, self.P_B,
+                                      self.P_B, self.c_m_idt_b, self.mask_A_aug,
+                                      self.mask_B_aug)  # G_A(A)
+            self.loss_idt_B = self.criterionIdt(
+                self.idt_B, self.real_B) * lambda_B * lambda_idt
+        else:
+            self.loss_idt_A = 0
+            self.loss_idt_B = 0
+        # GAN loss D_A(G_A(A))
+        self.loss_G_A = self.criterionGAN(self.netD_A(self.fake_A), True)
+        # GAN loss D_B(G_B(B))
+        self.loss_G_B = self.criterionGAN(self.netD_B(self.fake_B), True)
+        # Forward cycle loss || G_B(G_A(A)) - A||
+        self.loss_cycle_A = self.criterionCycle(self.rec_A,
+                                                self.real_A) * lambda_A
+        # Backward cycle loss || G_A(G_B(B)) - B||
+        self.loss_cycle_B = self.criterionCycle(self.rec_B,
+                                                self.real_B) * lambda_B
+        self.losses['G_A_adv_loss'] = self.loss_G_A
+        self.losses['G_B_adv_loss'] = self.loss_G_B
+        mask_A_lip = self.mask_A_aug[:, 0].unsqueeze(1)
+        mask_B_lip = self.mask_B_aug[:, 0].unsqueeze(1)
+        mask_A_lip_np = mask_A_lip.numpy().squeeze()
+        mask_B_lip_np = mask_B_lip.numpy().squeeze()
+        mask_A_lip_np, mask_B_lip_np, index_A_lip, index_B_lip = mask_preprocess(
+            mask_A_lip_np, mask_B_lip_np)
+        real_A = paddle.nn.clip((self.real_A + 1.0) / 2.0, 0.0, 1.0) * 255.0
+        real_A_np = real_A.numpy().squeeze()
+        real_B = paddle.nn.clip((self.real_B + 1.0) / 2.0, 0.0, 1.0) * 255.0
+        real_B_np = real_B.numpy().squeeze()
+        fake_A = paddle.nn.clip((self.fake_A + 1.0) / 2.0, 0.0, 1.0) * 255.0
+        fake_A_np = fake_A.numpy().squeeze()
+        fake_B = paddle.nn.clip((self.fake_B + 1.0) / 2.0, 0.0, 1.0) * 255.0
+        fake_B_np = fake_B.numpy().squeeze()
+        fake_match_lip_A = hisMatch(fake_A_np, real_B_np, mask_A_lip_np,
+                                    mask_B_lip_np, index_A_lip)
+        fake_match_lip_B = hisMatch(fake_B_np, real_A_np, mask_B_lip_np,
+                                    mask_A_lip_np, index_B_lip)
+        fake_match_lip_A = paddle.to_tensor(fake_match_lip_A)
+        fake_match_lip_A.stop_gradient = True
+        fake_match_lip_A = fake_match_lip_A.unsqueeze(0)
+        fake_match_lip_B = paddle.to_tensor(fake_match_lip_B)
+        fake_match_lip_B.stop_gradient = True
+        fake_match_lip_B = fake_match_lip_B.unsqueeze(0)
+        fake_A_lip_masked = fake_A * mask_A_lip
+        fake_B_lip_masked = fake_B * mask_B_lip
+        g_A_lip_loss_his = self.criterionL1(fake_A_lip_masked, fake_match_lip_A)
+        g_B_lip_loss_his = self.criterionL1(fake_B_lip_masked, fake_match_lip_B)
+        #skin
+        mask_A_skin = self.mask_A_aug[:, 1].unsqueeze(1)
+        mask_B_skin = self.mask_B_aug[:, 1].unsqueeze(1)
+        mask_A_skin_np = mask_A_skin.numpy().squeeze()
+        mask_B_skin_np = mask_B_skin.numpy().squeeze()
+        mask_A_skin_np, mask_B_skin_np, index_A_skin, index_B_skin = mask_preprocess(
+            mask_A_skin_np, mask_B_skin_np)
+        fake_match_skin_A = hisMatch(fake_A_np, real_B_np, mask_A_skin_np,
+                                     mask_B_skin_np, index_A_skin)
+        fake_match_skin_B = hisMatch(fake_B_np, real_A_np, mask_B_skin_np,
+                                     mask_A_skin_np, index_B_skin)
+        fake_match_skin_A = paddle.to_tensor(fake_match_skin_A)
+        fake_match_skin_A.stop_gradient = True
+        fake_match_skin_A = fake_match_skin_A.unsqueeze(0)
+        fake_match_skin_B = paddle.to_tensor(fake_match_skin_B)
+        fake_match_skin_B.stop_gradient = True
+        fake_match_skin_B = fake_match_skin_B.unsqueeze(0)
+        fake_A_skin_masked = fake_A * mask_A_skin
+        fake_B_skin_masked = fake_B * mask_B_skin
+        g_A_skin_loss_his = self.criterionL1(fake_A_skin_masked,
+                                             fake_match_skin_A)
+        g_B_skin_loss_his = self.criterionL1(fake_B_skin_masked,
+                                             fake_match_skin_B)
+        #eye
+        mask_A_eye = self.mask_A_aug[:, 2].unsqueeze(1)
+        mask_B_eye = self.mask_B_aug[:, 2].unsqueeze(1)
+        mask_A_eye_np = mask_A_eye.numpy().squeeze()
+        mask_B_eye_np = mask_B_eye.numpy().squeeze()
+        mask_A_eye_np, mask_B_eye_np, index_A_eye, index_B_eye = mask_preprocess(
+            mask_A_eye_np, mask_B_eye_np)
+        fake_match_eye_A = hisMatch(fake_A_np, real_B_np, mask_A_eye_np,
+                                    mask_B_eye_np, index_A_eye)
+        fake_match_eye_B = hisMatch(fake_B_np, real_A_np, mask_B_eye_np,
+                                    mask_A_eye_np, index_B_eye)
+        fake_match_eye_A = paddle.to_tensor(fake_match_eye_A)
+        fake_match_eye_A.stop_gradient = True
+        fake_match_eye_A = fake_match_eye_A.unsqueeze(0)
+        fake_match_eye_B = paddle.to_tensor(fake_match_eye_B)
+        fake_match_eye_B.stop_gradient = True
+        fake_match_eye_B = fake_match_eye_B.unsqueeze(0)
+        fake_A_eye_masked = fake_A * mask_A_eye
+        fake_B_eye_masked = fake_B * mask_B_eye
+        g_A_eye_loss_his = self.criterionL1(fake_A_eye_masked, fake_match_eye_A)
+        g_B_eye_loss_his = self.criterionL1(fake_B_eye_masked, fake_match_eye_B)
+        self.loss_G_A_his = (g_A_eye_loss_his + g_A_lip_loss_his +
+                             g_A_skin_loss_his * 0.1) * 0.01
+        self.loss_G_B_his = (g_B_eye_loss_his + g_B_lip_loss_his +
+                             g_B_skin_loss_his * 0.1) * 0.01
+        self.losses['G_A_his_loss'] = self.loss_G_A_his
+        self.losses['G_B_his_loss'] = self.loss_G_A_his
+        #vgg loss
+        vgg_s = self.vgg(self.real_A)
+        vgg_s.stop_gradient = True
+        vgg_fake_A = self.vgg(self.fake_A)
+        self.loss_A_vgg = self.criterionL2(vgg_fake_A,
+                                           vgg_s) * lambda_A * lambda_vgg
+        vgg_r = self.vgg(self.real_B)
+        vgg_r.stop_gradient = True
+        vgg_fake_B = self.vgg(self.fake_B)
+        self.loss_B_vgg = self.criterionL2(vgg_fake_B,
+                                           vgg_r) * lambda_B * lambda_vgg
+        self.loss_rec = (self.loss_cycle_A + self.loss_cycle_B +
+                         self.loss_A_vgg + self.loss_B_vgg) * 0.2
+        self.loss_idt = (self.loss_idt_A + self.loss_idt_B) * 0.2
+        self.losses['G_A_vgg_loss'] = self.loss_A_vgg
+        self.losses['G_B_vgg_loss'] = self.loss_B_vgg
+        self.losses['G_rec_loss'] = self.loss_rec
+        self.losses['G_idt_loss'] = self.loss_idt
+        # bg consistency loss
+        mask_A_consis = paddle.cast(
+            (self.mask_A == 0), dtype='float32') + paddle.cast(
+                (self.mask_A == 10), dtype='float32') + paddle.cast(
+                    (self.mask_A == 8), dtype='float32')
+        mask_A_consis = paddle.unsqueeze(paddle.clip(mask_A_consis, 0, 1), 1)
+        self.loss_G_bg_consis = self.criterionL1(
+            self.real_A * mask_A_consis, self.fake_A * mask_A_consis) * 0.1
+        # combined loss and calculate gradients
+        self.loss_G = self.loss_G_A + self.loss_G_B + self.loss_rec + self.loss_idt + self.loss_G_A_his + self.loss_G_B_his + self.loss_G_bg_consis
+        self.loss_G.backward()
+    def optimize_parameters(self):
+        """Calculate losses, gradients, and update network weights; called in every training iteration"""
+        # forward
+        self.forward()  # compute fake images and reconstruction images.
+        # G_A and G_B
+        self.set_requires_grad(
+            [self.netD_A, self.netD_B],
+            False)  # Ds require no gradients when optimizing Gs
+        # self.optimizer_G.clear_gradients() #zero_grad()  # set G_A and G_B's gradients to zero
+        self.backward_G()  # calculate gradients for G_A and G_B
+        self.optimizer_G.minimize(
+            self.loss_G)  #step()       # update G_A and G_B's weights
+        self.optimizer_G.clear_gradients()
+        # self.optimizer_G.clear_gradients()
+        # D_A and D_B
+        # self.set_requires_grad([self.netD_A, self.netD_B], True)
+        self.set_requires_grad(self.netD_A, True)
+        # self.optimizer_D.clear_gradients() #zero_grad()   # set D_A and D_B's gradients to zero
+        self.backward_D_A()  # calculate gradients for D_A
+        self.optimizer_DA.minimize(
+            self.loss_D_A)  #step()  # update D_A and D_B's weights
+        self.optimizer_DA.clear_gradients()  #zero_g
+        self.set_requires_grad(self.netD_B, True)
+        # self.optimizer_DB.clear_gradients() #zero_grad()   # set D_A and D_B's gradients to zero
+        self.backward_D_B()  # calculate graidents for D_B
+        self.optimizer_DB.minimize(
+            self.loss_D_B)  #step()  # update D_A and D_B's weights
+        self.optimizer_DB.clear_gradients(
+        )  #zero_grad()   # set D_A and D_B's gradients to zero
--- a/ppgan/models/pix2pix_model.py
+++ b/ppgan/models/pix2pix_model.py
@@ -31,7 +31,6 @@ class Pix2PixModel(BaseModel):
        """
        BaseModel.__init__(self, opt)
        # specify the training losses you want to print out. The training/test scripts will call <BaseModel.get_current_losses>
-        self.loss_names = ['G_GAN', 'G_L1', 'D_real', 'D_fake']
        # specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals>
        self.visual_names = ['real_A', 'fake_B', 'real_B']
        # specify the models you want to save to the disk.
@@ -81,8 +80,8 @@ class Pix2PixModel(BaseModel):
        """
        AtoB = self.opt.dataset.train.direction == 'AtoB'
-        self.real_A = paddle.to_variable(input['A' if AtoB else 'B'])
+        self.real_A = paddle.to_tensor(input['A' if AtoB else 'B'])
-        self.real_B = paddle.to_variable(input['B' if AtoB else 'A'])
+        self.real_B = paddle.to_tensor(input['B' if AtoB else 'A'])
        self.image_paths = input['A_paths' if AtoB else 'B_paths']
@@ -114,6 +113,9 @@ class Pix2PixModel(BaseModel):
        else:
            self.loss_D.backward()
+        self.losses['D_fake_loss'] = self.loss_D_fake
+        self.losses['D_real_loss'] = self.loss_D_real
    def backward_G(self):
        """Calculate GAN and L1 loss for the generator"""
        # First, G(A) should fake the discriminator
@@ -134,6 +136,9 @@ class Pix2PixModel(BaseModel):
        else:
            self.loss_G.backward()
+        self.losses['G_adv_loss'] = self.loss_G_GAN
+        self.losses['G_L1_loss'] = self.loss_G_L1
    def optimize_parameters(self):
        # compute fake images: G(A)
        self.forward()

--- a/ppgan/models/vgg.py
+++ b/ppgan/models/vgg.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+from paddle.utils.download import get_weights_path_from_url
+from paddle.vision.models.vgg import make_layers
+cfg = [
+    64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512,
+    512, 512, 'M'
+]
+model_urls = {
+    'vgg16': ('https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams',
+              '89bbffc0f87d260be9b8cdc169c991c4')
+}
+class VGG(nn.Layer):
+    def __init__(self, features):
+        super(VGG, self).__init__()
+        self.features = features
+    def forward(self, x):
+        x = self.features(x)
+        return x
+def vgg16(pretrained=False):
+    features = make_layers(cfg)
+    model = VGG(features)
+    if pretrained:
+        weight_path = get_weights_path_from_url(model_urls['vgg16'][0],
+                                                model_urls['vgg16'][1])
+        param = paddle.load(weight_path)
+        model.load_dict(param)
+    return model
--- a/ppgan/modules/init.py
+++ b/ppgan/modules/init.py
@@ -80,7 +80,7 @@ def calculate_gain(nonlinearity, param=None):
 @paddle.no_grad()
 def constant_(x, value):
-    temp_value = paddle.fill_constant(x.shape, x.dtype, value)
+    temp_value = paddle.full(x.shape, value, x.dtype)
    x.set_value(temp_value)
    return x

--- a/ppgan/modules/nn.py
+++ b/ppgan/modules/nn.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import paddle
 import paddle.nn as nn
+import math
 class _SpectralNorm(nn.SpectralNorm):

--- a/ppgan/modules/norm.py
+++ b/ppgan/modules/norm.py
 import paddle
 import functools
 import paddle.nn as nn
+from .nn import Spectralnorm
 class Identity(nn.Layer):
@@ -35,6 +36,8 @@ def build_norm_layer(norm_type='instance'):
            bias_attr=paddle.ParamAttr(initializer=nn.initializer.Constant(0.0),
                                       learning_rate=0.0,
                                       trainable=False))
+    elif norm_type == 'spectral':
+        norm_layer = functools.partial(Spectralnorm)
    elif norm_type == 'none':
        def norm_layer(x):

--- a/ppgan/solver/lr_scheduler.py
+++ b/ppgan/solver/lr_scheduler.py
@@ -12,25 +12,8 @@ def build_lr_scheduler(cfg):
                0, epoch + 1 - cfg.start_epoch) / float(cfg.decay_epochs + 1)
            return lr_l
-        scheduler = paddle.optimizer.lr_scheduler.LambdaLR(
+        scheduler = paddle.optimizer.lr.LambdaDecay(cfg.learning_rate,
-            cfg.learning_rate, lr_lambda=lambda_rule)
+                                                    lr_lambda=lambda_rule)
        return scheduler
    else:
        raise NotImplementedError
-# paddle.optimizer.lr_scheduler
-class LinearDecay(paddle.optimizer.lr_scheduler._LRScheduler):
-    def __init__(self, learning_rate, step_per_epoch, start_epoch,
-                 decay_epochs):
-        super(LinearDecay, self).__init__()
-        self.learning_rate = learning_rate
-        self.start_epoch = start_epoch
-        self.decay_epochs = decay_epochs
-        self.step_per_epoch = step_per_epoch
-    def step(self):
-        cur_epoch = int(self.step_num // self.step_per_epoch)
-        decay_rate = 1.0 - max(
-            0, cur_epoch + 1 - self.start_epoch) / float(self.decay_epochs + 1)
-        return self.create_lr_var(decay_rate * self.learning_rate)
--- a/ppgan/utils/animate.py
+++ b/ppgan/utils/animate.py
-import os
+import numpy as np
-from tqdm import tqdm
+from scipy.spatial import ConvexHull
 import paddle
-import imageio
-from scipy.spatial import ConvexHull
-import numpy as np
 def normalize_kp(kp_source,
                 kp_driving,

--- a/ppgan/utils/options.py
+++ b/ppgan/utils/options.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
 def parse_args():
    parser = argparse.ArgumentParser(description='Segmentron')
-    parser.add_argument('--config-file', metavar="FILE",
+    parser.add_argument('--config-file',
+                        metavar="FILE",
                        help='config file path')
    # cuda setting
-    parser.add_argument('--no-cuda', action='store_true', default=False,
+    parser.add_argument('--no-cuda',
+                        action='store_true',
+                        default=False,
                        help='disables CUDA training')
    # checkpoint and log
-    parser.add_argument('--resume', type=str, default=None,
+    parser.add_argument('--resume',
+                        type=str,
+                        default=None,
                        help='put the path to resuming file if needed')
-    parser.add_argument('--load', type=str, default=None,
+    parser.add_argument('--load',
+                        type=str,
+                        default=None,
                        help='put the path to resuming file if needed')
    # for evaluation
-    parser.add_argument('--val-interval', type=int, default=1,
+    parser.add_argument('--val-interval',
+                        type=int,
+                        default=1,
                        help='run validation every interval')
-    parser.add_argument('--evaluate-only', action='store_true', default=False,
+    parser.add_argument('--evaluate-only',
+                        action='store_true',
+                        default=False,
                        help='skip validation during training')
    # config options
-    parser.add_argument('opts', help='See config for all options',
+    parser.add_argument('opts',
-                        default=None, nargs=argparse.REMAINDER)
+                        help='See config for all options',
+                        default=None,
+                        nargs=argparse.REMAINDER)
+    #for inference
+    parser.add_argument("--source_path",
+                        default="",
+                        metavar="FILE",
+                        help="path to source image")
+    parser.add_argument("--reference_dir",
+                        default="",
+                        help="path to reference images")
+    parser.add_argument("--model_path", default="", help="model for loading")
    args = parser.parse_args()
    return args
\ No newline at end of file
--- a/ppgan/utils/preprocess.py
+++ b/ppgan/utils/preprocess.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cv2
+import numpy as np
+def generate_P_from_lmks(lmks, resize, w, h):
+    """generate P from lmks"""
+    diff_size = (64, 64)
+    xs, ys = np.meshgrid(np.linspace(0, resize - 1, resize),
+                         np.linspace(0, resize - 1, resize))
+    xs = xs[None].repeat(68, axis=0)
+    ys = ys[None].repeat(68, axis=0)
+    fix = np.concatenate([ys, xs], axis=0)
+    lmks = lmks.transpose(1, 0).reshape(-1, 1, 1)
+    diff = fix - lmks
+    diff = diff.transpose(1, 2, 0)
+    diff = cv2.resize(diff, diff_size, interpolation=cv2.INTER_NEAREST)
+    diff = diff.transpose(2, 0, 1).reshape(136, -1)
+    norm = np.linalg.norm(diff, axis=0)
+    P_np = diff / norm
+    return P_np
+def copy_area(tar, src, lms):
+    rect = [
+        int(min(lms[:, 1])) - 16,
+        int(min(lms[:, 0])) - 16,
+        int(max(lms[:, 1])) + 16 + 1,
+        int(max(lms[:, 0])) + 16 + 1
+    ]
+    tar[rect[1]:rect[3], rect[0]:rect[2]] = \
+        src[rect[1]:rect[3], rect[0]:rect[2]]
+    src[rect[1]:rect[3], rect[0]:rect[2]] = 0
+def rebound_box(mask, mask_B, mask_face):
+    """solver ps"""
+    index_tmp = mask.nonzero()
+    x_index = index_tmp[0]
+    y_index = index_tmp[1]
+    index_tmp = mask_B.nonzero()
+    x_B_index = index_tmp[0]
+    y_B_index = index_tmp[1]
+    mask_temp = np.copy(mask)
+    mask_B_temp = np.copy(mask_B)
+    mask_temp[min(x_index) - 16:max(x_index) + 17, min(y_index) - 16:max(y_index) + 17] =\
+        mask_face[min(x_index) -
+                    16:max(x_index) +
+                    17, min(y_index) -
+                    16:max(y_index) +
+                    17]
+    mask_B_temp[min(x_B_index) - 16:max(x_B_index) + 17, min(y_B_index) - 16:max(y_B_index) + 17] =\
+        mask_face[min(x_B_index) -
+                    16:max(x_B_index) +
+                    17, min(y_B_index) -
+                    16:max(y_B_index) +
+                    17]
+    return mask_temp, mask_B_temp
+def calculate_consis_mask(mask, mask_B):
+    h_a, w_a = mask.shape[1:]
+    h_b, w_b = mask_B.shape[1:]
+    mask_transpose = np.transpose(mask, (1, 2, 0))
+    mask_B_transpose = np.transpose(mask_B, (1, 2, 0))
+    mask = cv2.resize(mask_transpose,
+                      dsize=(w_a // 4, h_a // 4),
+                      interpolation=cv2.INTER_NEAREST)
+    mask = np.transpose(mask, (2, 0, 1))
+    mask_B = cv2.resize(mask_B_transpose,
+                        dsize=(w_b // 4, h_b // 4),
+                        interpolation=cv2.INTER_NEAREST)
+    mask_B = np.transpose(mask_B, (2, 0, 1))
+    """calculate consistency mask between images"""
+    h_a, w_a = mask.shape[1:]
+    h_b, w_b = mask_B.shape[1:]
+    mask_lip = mask[0]
+    mask_skin = mask[1]
+    mask_eye = mask[2]
+    mask_B_lip = mask_B[0]
+    mask_B_skin = mask_B[1]
+    mask_B_eye = mask_B[2]
+    maskA_one_hot = np.zeros((h_a * w_a, 3))
+    maskA_one_hot[:, 0] = mask_skin.flatten()
+    maskA_one_hot[:, 1] = mask_eye.flatten()
+    maskA_one_hot[:, 2] = mask_lip.flatten()
+    maskB_one_hot = np.zeros((h_b * w_b, 3))
+    maskB_one_hot[:, 0] = mask_B_skin.flatten()
+    maskB_one_hot[:, 1] = mask_B_eye.flatten()
+    maskB_one_hot[:, 2] = mask_B_lip.flatten()
+    con_mask = np.matmul(maskA_one_hot.reshape((h_a * w_a, 3)),
+                         np.transpose(maskB_one_hot.reshape((h_b * w_b, 3))))
+    con_mask = np.clip(con_mask, 0, 1)
+    return con_mask
+def cal_hist(image):
+    """
+        cal cumulative hist for channel list
+    """
+    hists = []
+    for i in range(0, 3):
+        channel = image[i]
+        # channel = image[i, :, :]
+        #channel = torch.from_numpy(channel)
+        hist, _ = np.histogram(channel, bins=256, range=(0, 255))
+        #hist = torch.histc(channel, bins=256, min=0, max=256)
+        # refHist=hist.view(256,1)
+        sum = hist.sum()
+        pdf = [v / sum for v in hist]
+        for i in range(1, 256):
+            pdf[i] = pdf[i - 1] + pdf[i]
+        hists.append(pdf)
+    return hists
+def cal_trans(ref, adj):
+    """
+        calculate transfer function
+        algorithm refering to wiki item: Histogram matching
+    """
+    table = list(range(0, 256))
+    for i in list(range(1, 256)):
+        for j in list(range(1, 256)):
+            if ref[i] >= adj[j - 1] and ref[i] <= adj[j]:
+                table[i] = j
+                break
+    table[255] = 255
+    return table
+def histogram_matching(dstImg, refImg, index):
+    """
+        perform histogram matching
+        dstImg is transformed to have the same the histogram with refImg's
+        index[0], index[1]: the index of pixels that need to be transformed in dstImg
+        index[2], index[3]: the index of pixels that to compute histogram in refImg
+    """
+    dst_align = [dstImg[i, index[0], index[1]] for i in range(0, 3)]
+    ref_align = [refImg[i, index[2], index[3]] for i in range(0, 3)]
+    hist_ref = cal_hist(ref_align)
+    hist_dst = cal_hist(dst_align)
+    tables = [cal_trans(hist_dst[i], hist_ref[i]) for i in range(0, 3)]
+    mid = dst_align.copy()
+    for i in range(0, 3):
+        for k in range(0, len(index[0])):
+            dst_align[i][k] = tables[i][int(mid[i][k])]
+    for i in range(0, 3):
+        dstImg[i, index[0], index[1]] = dst_align[i]
+    return dstImg
+def hisMatch(input_data, target_data, mask_src, mask_tar, index):
+    """solver ps"""
+    mask_src = np.float32(np.clip(mask_src, 0, 1))
+    mask_tar = np.float32(np.clip(mask_tar, 0, 1))
+    input_masked = np.float32(input_data) * mask_src
+    target_masked = np.float32(target_data) * mask_tar
+    input_match = histogram_matching(input_masked, target_masked, index)
+    return input_match
+def mask_preprocess(mask, mask_B):
+    """solver ps"""
+    index_tmp = mask.nonzero()
+    x_index = index_tmp[0]
+    y_index = index_tmp[1]
+    index_tmp = mask_B.nonzero()
+    x_B_index = index_tmp[0]
+    y_B_index = index_tmp[1]
+    index = [x_index, y_index, x_B_index, y_B_index]
+    index_2 = [x_B_index, y_B_index, x_index, y_index]
+    return [mask, mask_B, index, index_2]
+def generate_mask_aug(mask, lmks):
+    lms_eye_left = lmks[42:48]
+    lms_eye_right = lmks[36:42]
+    mask_eye_left = np.zeros_like(mask)
+    mask_eye_right = np.zeros_like(mask)
+    mask_face = np.float32(mask == 1) + np.float32(mask == 6)
+    copy_area(mask_eye_left, mask_face, lms_eye_left)
+    copy_area(mask_eye_right, mask_face, lms_eye_right)
+    mask_skin = mask_face
+    mask_lip = np.float32(mask == 7) + np.float32(mask == 9)
+    mask_eye = mask_eye_left + mask_eye_right
+    mask_aug = np.concatenate(
+        (np.expand_dims(mask_lip, 0), np.expand_dims(
+            mask_skin, 0), np.expand_dims(mask_eye, 0)), 0)
+    return mask_aug
--- a/tools/main.py
+++ b/tools/main.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import sys
@@ -30,7 +31,7 @@ def main(args, cfg):
    # build trainer
    trainer = Trainer(cfg)
    # continue train or evaluate, checkpoint need contain epoch and optimizer info
    if args.resume:
        trainer.resume(args.resume)
@@ -50,4 +51,3 @@ if __name__ == '__main__':
    cfg = get_config(args.config_file)
    main(args, cfg)
\ No newline at end of file