fix conflict

0e429acc · LielinJiang · 139a0472 · 2354ab9d · 0e429acc · 0e429acc
10 changed file
--- a/applications/DAIN/my_args.py
+++ b/applications/DAIN/my_args.py
+import os
+import datetime
+import argparse
+import numpy
+import networks
+
+modelnames = networks.__all__
+# import datasets
+datasetNames = ('Vimeo_90K_interp')  #datasets.__all__
+
+parser = argparse.ArgumentParser(description='DAIN')
+
+parser.add_argument('--debug', action='store_true', help='Enable debug mode')
+parser.add_argument('--netName',
+                    type=str,
+                    default='DAIN',
+                    choices=modelnames,
+                    help='model architecture: ' + ' | '.join(modelnames) +
+                    ' (default: DAIN)')
+
+parser.add_argument('--datasetName',
+                    default='Vimeo_90K_interp',
+                    choices=datasetNames,
+                    nargs='+',
+                    help='dataset type : ' + ' | '.join(datasetNames) +
+                    ' (default: Vimeo_90K_interp)')
+parser.add_argument('--video_path',
+                    default='',
+                    help='the path of selected videos')
+parser.add_argument('--output_path', default='', help='the output root path')
+
+parser.add_argument('--seed',
+                    type=int,
+                    default=1,
+                    help='random seed (default: 1)')
+
+parser.add_argument('--batch_size',
+                    '-b',
+                    type=int,
+                    default=1,
+                    help='batch size (default:1)')
+parser.add_argument('--channels',
+                    '-c',
+                    type=int,
+                    default=3,
+                    choices=[1, 3],
+                    help='channels of images (default:3)')
+parser.add_argument('--filter_size',
+                    '-f',
+                    type=int,
+                    default=4,
+                    help='the size of filters used (default: 4)',
+                    choices=[2, 4, 6, 5, 51])
+
+parser.add_argument('--time_step',
+                    type=float,
+                    default=0.5,
+                    help='choose the time steps')
+parser.add_argument(
+    '--alpha',
+    type=float,
+    nargs='+',
+    default=[0.0, 1.0],
+    help=
+    'the ration of loss for interpolated and rectified result (default: [0.0, 1.0])'
+)
+parser.add_argument('--frame_rate',
+                    type=int,
+                    default=None,
+                    help='frame rate of the input video')
+
+parser.add_argument('--patience',
+                    type=int,
+                    default=5,
+                    help='the patience of reduce on plateou')
+parser.add_argument('--factor',
+                    type=float,
+                    default=0.2,
+                    help='the factor of reduce on plateou')
+
+parser.add_argument('--saved_model',
+                    type=str,
+                    default='',
+                    help='path to the model weights')
+parser.add_argument('--no-date',
+                    action='store_true',
+                    help='don\'t append date timestamp to folder')
+parser.add_argument('--use_cuda',
+                    default=True,
+                    type=bool,
+                    help='use cuda or not')
+parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not')
--- a/applications/DAIN/predict.py
+++ b/applications/DAIN/predict.py
+import os
+import sys
+
+cur_path = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(cur_path)
+
+import time
+import glob
+import numpy as np
+from imageio import imread, imsave
+from tqdm import tqdm
+import cv2
+
+import paddle.fluid as fluid
+from paddle.utils.download import get_path_from_url
+from ppgan.utils.video import video2frames, frames2video
+
+from util import *
+from my_args import parser
+
+DAIN_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/applications/DAIN_weight.tar'
+
+
+def infer_engine(model_dir,
+                 run_mode='fluid',
+                 batch_size=1,
+                 use_gpu=False,
+                 min_subgraph_size=3):
+    if not use_gpu and not run_mode == 'fluid':
+        raise ValueError(
+            "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
+            .format(run_mode, use_gpu))
+    precision_map = {
+        'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
+        'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
+    }
+    config = fluid.core.AnalysisConfig(os.path.join(model_dir, 'model'),
+                                       os.path.join(model_dir, 'params'))
+    if use_gpu:
+        # initial GPU memory(M), device ID
+        config.enable_use_gpu(100, 0)
+        # optimize graph and fuse op
+        config.switch_ir_optim(True)
+    else:
+        config.disable_gpu()
+
+    if run_mode in precision_map.keys():
+        config.enable_tensorrt_engine(workspace_size=1 << 10,
+                                      max_batch_size=batch_size,
+                                      min_subgraph_size=min_subgraph_size,
+                                      precision_mode=precision_map[run_mode],
+                                      use_static=False,
+                                      use_calib_mode=False)
+
+    # disable print log when predict
+    config.disable_glog_info()
+    # enable shared memory
+    config.enable_memory_optim()
+    # disable feed, fetch OP, needed by zero_copy_run
+    config.switch_use_feed_fetch_ops(False)
+    predictor = fluid.core.create_paddle_predictor(config)
+    return predictor
+
+
+def executor(model_dir, use_gpu=False):
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    program, feed_names, fetch_targets = fluid.io.load_inference_model(
+        dirname=model_dir,
+        executor=exe,
+        model_filename='model',
+        params_filename='params')
+    return exe, program, fetch_targets
+
+
+class VideoFrameInterp(object):
+    def __init__(self,
+                 time_step,
+                 model_path,
+                 video_path,
+                 use_gpu=True,
+                 key_frame_thread=0.,
+                 output_path='output'):
+        self.video_path = video_path
+        self.output_path = os.path.join(output_path, 'DAIN')
+        if model_path is None:
+            model_path = get_path_from_url(DAIN_WEIGHT_URL, cur_path)
+
+        self.model_path = model_path
+        self.time_step = time_step
+        self.key_frame_thread = key_frame_thread
+
+        self.exe, self.program, self.fetch_targets = executor(model_path,
+                                                              use_gpu=use_gpu)
+
+    def run(self):
+        frame_path_input = os.path.join(self.output_path, 'frames-input')
+        frame_path_interpolated = os.path.join(self.output_path,
+                                               'frames-interpolated')
+        frame_path_combined = os.path.join(self.output_path, 'frames-combined')
+        video_path_output = os.path.join(self.output_path, 'videos-output')
+
+        if not os.path.exists(self.output_path):
+            os.makedirs(self.output_path)
+        if not os.path.exists(frame_path_input):
+            os.makedirs(frame_path_input)
+        if not os.path.exists(frame_path_interpolated):
+            os.makedirs(frame_path_interpolated)
+        if not os.path.exists(frame_path_combined):
+            os.makedirs(frame_path_combined)
+        if not os.path.exists(video_path_output):
+            os.makedirs(video_path_output)
+
+        timestep = self.time_step
+        num_frames = int(1.0 / timestep) - 1
+
+        if self.video_path.endswith('.mp4'):
+            videos = [self.video_path]
+        else:
+            videos = sorted(glob.glob(os.path.join(self.video_path, '*.mp4')))
+
+        for cnt, vid in enumerate(videos):
+            print("Interpolating video:", vid)
+            cap = cv2.VideoCapture(vid)
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            print("Old fps (frame rate): ", fps)
+
+            times_interp = int(1.0 / timestep)
+            r2 = str(int(fps) * times_interp)
+            print("New fps (frame rate): ", r2)
+
+            out_path = video2frames(vid, frame_path_input)
+
+            vidname = vid.split('/')[-1].split('.')[0]
+
+            tot_timer = AverageMeter()
+            proc_timer = AverageMeter()
+            end = time.time()
+
+            frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
+
+            img = imread(frames[0])
+
+            int_width = img.shape[1]
+            int_height = img.shape[0]
+            channel = img.shape[2]
+            if not channel == 3:
+                continue
+
+            if int_width != ((int_width >> 7) << 7):
+                int_width_pad = (
+                    ((int_width >> 7) + 1) << 7)  # more than necessary
+                padding_left = int((int_width_pad - int_width) / 2)
+                padding_right = int_width_pad - int_width - padding_left
+            else:
+                int_width_pad = int_width
+                padding_left = 32
+                padding_right = 32
+
+            if int_height != ((int_height >> 7) << 7):
+                int_height_pad = (
+                    ((int_height >> 7) + 1) << 7)  # more than necessary
+                padding_top = int((int_height_pad - int_height) / 2)
+                padding_bottom = int_height_pad - int_height - padding_top
+            else:
+                int_height_pad = int_height
+                padding_top = 32
+                padding_bottom = 32
+
+            frame_num = len(frames)
+            print('processing {} frames, from video: {}'.format(frame_num, vid))
+
+            if not os.path.exists(os.path.join(frame_path_interpolated,
+                                               vidname)):
+                os.makedirs(os.path.join(frame_path_interpolated, vidname))
+            if not os.path.exists(os.path.join(frame_path_combined, vidname)):
+                os.makedirs(os.path.join(frame_path_combined, vidname))
+
+            for i in tqdm(range(frame_num - 1)):
+                first = frames[i]
+                second = frames[i + 1]
+
+                img_first = imread(first)
+                img_second = imread(second)
+                '''--------------Frame change test------------------------'''
+                img_first_gray = np.dot(img_first[..., :3],
+                                        [0.299, 0.587, 0.114])
+                img_second_gray = np.dot(img_second[..., :3],
+                                         [0.299, 0.587, 0.114])
+
+                img_first_gray = img_first_gray.flatten(order='C')
+                img_second_gray = img_second_gray.flatten(order='C')
+                corr = np.corrcoef(img_first_gray, img_second_gray)[0, 1]
+                key_frame = False
+                if corr < self.key_frame_thread:
+                    key_frame = True
+                '''-------------------------------------------------------'''
+
+                X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
+                X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255
+
+                if key_frame:
+                    y_ = [
+                        np.transpose(255.0 * X0.clip(0, 1.0), (1, 2, 0))
+                        for i in range(num_frames)
+                    ]
+                else:
+                    assert (X0.shape[1] == X1.shape[1])
+                    assert (X0.shape[2] == X1.shape[2])
+
+                    X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
+                        (padding_left, padding_right)), mode='edge')
+                    X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
+                        (padding_left, padding_right)), mode='edge')
+
+                    X0 = np.expand_dims(X0, axis=0)
+                    X1 = np.expand_dims(X1, axis=0)
+
+                    X0 = np.expand_dims(X0, axis=0)
+                    X1 = np.expand_dims(X1, axis=0)
+
+                    X = np.concatenate((X0, X1), axis=0)
+
+                    proc_end = time.time()
+                    o = self.exe.run(self.program,
+                                     fetch_list=self.fetch_targets,
+                                     feed={"image": X})
+
+                    y_ = o[0]
+
+                    proc_timer.update(time.time() - proc_end)
+                    tot_timer.update(time.time() - end)
+                    end = time.time()
+
+                    y_ = [
+                        np.transpose(
+                            255.0 * item.clip(
+                                0, 1.0)[0, :,
+                                        padding_top:padding_top + int_height,
+                                        padding_left:padding_left + int_width],
+                            (1, 2, 0)) for item in y_
+                    ]
+                    time_offsets = [
+                        kk * timestep for kk in range(1, 1 + num_frames, 1)
+                    ]
+
+                    count = 1
+                    for item, time_offset in zip(y_, time_offsets):
+                        out_dir = os.path.join(
+                            frame_path_interpolated, vidname,
+                            "{:0>6d}_{:0>4d}.png".format(i, count))
+                        count = count + 1
+                        imsave(out_dir, np.round(item).astype(np.uint8))
+
+            num_frames = int(1.0 / timestep) - 1
+
+            input_dir = os.path.join(frame_path_input, vidname)
+            interpolated_dir = os.path.join(frame_path_interpolated, vidname)
+            combined_dir = os.path.join(frame_path_combined, vidname)
+            combine_frames(input_dir, interpolated_dir, combined_dir,
+                           num_frames)
+
+            frame_pattern_combined = os.path.join(frame_path_combined, vidname,
+                                                  '%08d.png')
+            video_pattern_output = os.path.join(video_path_output,
+                                                vidname + '.mp4')
+            if os.path.exists(video_pattern_output):
+                os.remove(video_pattern_output)
+            frames2video(frame_pattern_combined, video_pattern_output, r2)
+
+        return frame_pattern_combined, video_pattern_output
+
+
+if __name__ == '__main__':
+    args = parser.parse_args()
+    predictor = VideoFrameInterp(args.time_step, args.saved_model,
+                                 args.video_path, args.output_path)
+    predictor.run()
--- a/applications/DAIN/util.py
+++ b/applications/DAIN/util.py
+import os, sys
+import glob
+import shutil
+
+
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+
+def combine_frames(input, interpolated, combined, num_frames):
+    frames1 = sorted(glob.glob(os.path.join(input, '*.png')))
+    frames2 = sorted(glob.glob(os.path.join(interpolated, '*.png')))
+    num1 = len(frames1)
+    num2 = len(frames2)
+    # assert (num1 - 1) * num_frames == num2
+    for i in range(num1):
+        src = frames1[i]
+        imgname = int(src.split('/')[-1].split('.')[-2])
+        assert i == imgname
+        dst = os.path.join(combined, '{:08d}.png'.format(i * (num_frames + 1)))
+        shutil.copy2(src, dst)
+        if i < num1 - 1:
+            try:
+                for k in range(num_frames):
+                    src = frames2[i * num_frames + k]
+                    dst = os.path.join(
+                        combined,
+                        '{:08d}.png'.format(i * (num_frames + 1) + k + 1))
+                    shutil.copy2(src, dst)
+            except Exception as e:
+                print(e)
+                print(len(frames2), num_frames, i, k, i * num_frames + k)
--- a/applications/tools/video-enhance.py
+++ b/applications/tools/video-enhance.py
@@ -49,6 +49,10 @@ parser.add_argument('--time_step',
                    type=float,
                    default=0.5,
                    help='choose the time steps')
+parser.add_argument('--remove_duplicates',
+                    default=False,
+                    type=bool,
+                    help='remove duplicate frames or not')
 # DeepRemaster args
 parser.add_argument('--reference_dir',
                    type=str,
@@ -88,7 +92,8 @@ if __name__ == "__main__":
            paddle.enable_static()
            predictor = DAINPredictor(args.output,
                                      weight_path=args.DAIN_weight,
-                                      time_step=args.time_step)
+                                      time_step=args.time_step,
+                                      remove_duplicates=args.remove_duplicates)
            frames_path, temp_video_path = predictor.run(temp_video_path)
            paddle.disable_static()
        elif order == 'DeepRemaster':

--- a/configs/pix2pix_cityscapes.yaml
+++ b/configs/pix2pix_cityscapes.yaml
@@ -25,7 +25,7 @@ dataset:
  train:
    name: PairedDataset
    dataroot: data/cityscapes
-    num_workers: 0
+    num_workers: 4
    phase: train
    max_dataset_size: inf
    direction: BtoA

--- a/ppgan/apps/dain_predictor.py
+++ b/ppgan/apps/dain_predictor.py
@@ -36,7 +36,8 @@ class DAINPredictor(BasePredictor):
                 weight_path=None,
                 time_step=None,
                 use_gpu=True,
-                 key_frame_thread=0.):
+                 key_frame_thread=0.,
+                 remove_duplicates=False):
        self.output_path = os.path.join(output_path, 'DAIN')
        if weight_path is None:
            cur_path = os.path.abspath(os.path.dirname(__file__))
@@ -45,6 +46,7 @@ class DAINPredictor(BasePredictor):
        self.weight_path = weight_path
        self.time_step = time_step
        self.key_frame_thread = key_frame_thread
+        self.remove_duplicates = remove_duplicates

        self.build_inference_model()

@@ -83,6 +85,9 @@ class DAINPredictor(BasePredictor):

        frames = sorted(glob.glob(os.path.join(out_path, '*.png')))

+        if self.remove_duplicates:
+            frames = self.remove_duplicate_frames(out_path)
+
        img = imread(frames[0])

        int_width = img.shape[1]
@@ -111,8 +116,6 @@ class DAINPredictor(BasePredictor):
            padding_bottom = 32

        frame_num = len(frames)
-        print('processing {} frames, from video: {}'.format(
-            frame_num, video_path))

        if not os.path.exists(os.path.join(frame_path_interpolated, vidname)):
            os.makedirs(os.path.join(frame_path_interpolated, vidname))
@@ -140,50 +143,41 @@ class DAINPredictor(BasePredictor):
            X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
            X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255

-            if key_frame:
-                y_ = [
-                    np.transpose(255.0 * X0.clip(0, 1.0), (1, 2, 0))
-                    for i in range(num_frames)
-                ]
-            else:
-                assert (X0.shape[1] == X1.shape[1])
-                assert (X0.shape[2] == X1.shape[2])
-
-                X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
-                    (padding_left, padding_right)), mode='edge')
-                X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
-                    (padding_left, padding_right)), mode='edge')
-
-                X0 = np.expand_dims(X0, axis=0)
-                X1 = np.expand_dims(X1, axis=0)
-
-                X0 = np.expand_dims(X0, axis=0)
-                X1 = np.expand_dims(X1, axis=0)
-
-                X = np.concatenate((X0, X1), axis=0)
-
-                o = self.base_forward(X)
-
-                y_ = o[0]
-
-                y_ = [
-                    np.transpose(
-                        255.0 * item.clip(
-                            0, 1.0)[0, :, padding_top:padding_top + int_height,
-                                    padding_left:padding_left + int_width],
-                        (1, 2, 0)) for item in y_
-                ]
-                time_offsets = [
-                    kk * timestep for kk in range(1, 1 + num_frames, 1)
-                ]
-
-                count = 1
-                for item, time_offset in zip(y_, time_offsets):
-                    out_dir = os.path.join(
-                        frame_path_interpolated, vidname,
-                        "{:0>6d}_{:0>4d}.png".format(i, count))
-                    count = count + 1
-                    imsave(out_dir, np.round(item).astype(np.uint8))
+            assert (X0.shape[1] == X1.shape[1])
+            assert (X0.shape[2] == X1.shape[2])
+
+            X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
+                (padding_left, padding_right)), mode='edge')
+            X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
+                (padding_left, padding_right)), mode='edge')
+
+            X0 = np.expand_dims(X0, axis=0)
+            X1 = np.expand_dims(X1, axis=0)
+
+            X0 = np.expand_dims(X0, axis=0)
+            X1 = np.expand_dims(X1, axis=0)
+
+            X = np.concatenate((X0, X1), axis=0)
+
+            o = self.base_forward(X)
+
+            y_ = o[0]
+
+            y_ = [
+                np.transpose(
+                    255.0 * item.clip(
+                        0, 1.0)[0, :, padding_top:padding_top + int_height,
+                                padding_left:padding_left + int_width],
+                    (1, 2, 0)) for item in y_
+            ]
+            time_offsets = [kk * timestep for kk in range(1, 1 + num_frames, 1)]
+
+            count = 1
+            for item, time_offset in zip(y_, time_offsets):
+                out_dir = os.path.join(frame_path_interpolated, vidname,
+                                       "{:0>6d}_{:0>4d}.png".format(i, count))
+                count = count + 1
+                imsave(out_dir, np.round(item).astype(np.uint8))

        num_frames = int(1.0 / timestep) - 1

@@ -225,3 +219,33 @@ class DAINPredictor(BasePredictor):
                        shutil.copy2(src, dst)
                except Exception as e:
                    print(e)
+
+    def remove_duplicate_frames(self, paths):
+        def dhash(image, hash_size=8):
+            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+            resized = cv2.resize(gray, (hash_size + 1, hash_size))
+            diff = resized[:, 1:] > resized[:, :-1]
+            return sum([2**i for (i, v) in enumerate(diff.flatten()) if v])
+
+        hashes = {}
+        image_paths = sorted(glob.glob(os.path.join(paths, '*.png')))
+        for image_path in image_paths:
+            image = cv2.imread(image_path)
+            h = dhash(image)
+            p = hashes.get(h, [])
+            p.append(image_path)
+            hashes[h] = p
+
+        for (h, hashed_paths) in hashes.items():
+            if len(hashed_paths) > 1:
+                for p in hashed_paths[1:]:
+                    os.remove(p)
+
+        frames = sorted(glob.glob(os.path.join(paths, '*.png')))
+        for fid, frame in enumerate(frames):
+            new_name = '{:08d}'.format(fid) + '.png'
+            new_name = os.path.join(paths, new_name)
+            os.rename(frame, new_name)
+
+        frames = sorted(glob.glob(os.path.join(paths, '*.png')))
+        return frames
--- a/ppgan/models/cycle_gan_model.py
+++ b/ppgan/models/cycle_gan_model.py
@@ -8,6 +8,7 @@ from .discriminators.builder import build_discriminator
 from .losses import GANLoss

 from ..solver import build_optimizer
+from ..modules.init import init_weights
 from ..utils.image_pool import ImagePool


@@ -56,10 +57,14 @@ class CycleGANModel(BaseModel):
        # Code (vs. paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X)
        self.netG_A = build_generator(opt.model.generator)
        self.netG_B = build_generator(opt.model.generator)
+        init_weights(self.netG_A)
+        init_weights(self.netG_B)

        if self.isTrain:  # define discriminators
            self.netD_A = build_discriminator(opt.model.discriminator)
            self.netD_B = build_discriminator(opt.model.discriminator)
+            init_weights(self.netD_A)
+            init_weights(self.netD_B)

        if self.isTrain:
            if opt.lambda_identity > 0.0:  # only works when input and output images have the same number of channels

--- a/ppgan/models/generators/mobile_resnet.py
+++ b/ppgan/models/generators/mobile_resnet.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+import functools
+from ...modules.norm import build_norm_layer
+from .builder import GENERATORS
+
+@GENERATORS.register()
+class MobileResnetGenerator(nn.Layer):
+    def __init__(self,
+                 input_channel,
+                 output_nc,
+                 ngf=64,
+                 norm_type='instance',
+                 use_dropout=False,
+                 n_blocks=9,
+                 padding_type='reflect'):
+        super(MobileResnetGenerator, self).__init__()
+
+        norm_layer = build_norm_layer(norm_type)
+        if type(norm_layer) == functools.partial:
+            use_bias = norm_layer.func == InstanceNorm
+        else:
+            use_bias = norm_layer == InstanceNorm
+
+        self.model = nn.LayerList([
+            nn.ReflectionPad2d([3, 3, 3, 3]),
+            nn.Conv2d(
+                    input_channel,
+                    int(ngf),
+                    kernel_size=7,
+                    padding=0,
+                    bias_attr=use_bias), norm_layer(ngf), nn.ReLU()
+        ])
+
+        n_downsampling = 2
+        for i in range(n_downsampling):
+            mult = 2**i
+            self.model.extend([
+                nn.Conv2d(
+                    ngf * mult,
+                    ngf * mult * 2,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    bias_attr=use_bias), norm_layer(ngf * mult * 2), nn.ReLU()
+            ])
+
+        mult = 2**n_downsampling
+
+        for i in range(n_blocks):
+            self.model.extend([
+                MobileResnetBlock(
+                    ngf * mult,
+                    ngf * mult,
+                    padding_type=padding_type,
+                    norm_layer=norm_layer,
+                    use_dropout=use_dropout,
+                    use_bias=use_bias)
+            ])
+
+
+        for i in range(n_downsampling):
+            mult = 2**(n_downsampling - i)
+            output_size = (i + 1) * 128
+            self.model.extend([
+                nn.ConvTranspose2d(
+                    ngf * mult,
+                    int(ngf * mult / 2),
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    output_padding=1,
+                    bias_attr=use_bias), norm_layer(int(ngf * mult / 2)),
+                nn.ReLU()
+            ])
+
+        self.model.extend([nn.ReflectionPad2d([3, 3, 3, 3])])
+        self.model.extend([nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)])
+        self.model.extend([nn.Tanh()])
+
+    def forward(self, inputs):
+        y = inputs
+        for sublayer in self.model:
+            y = sublayer(y)
+        return y
+
+
+class MobileResnetBlock(nn.Layer):
+    def __init__(self, in_c, out_c, padding_type, norm_layer, use_dropout,
+                 use_bias):
+        super(MobileResnetBlock, self).__init__()
+        self.padding_type = padding_type
+        self.use_dropout = use_dropout
+        self.conv_block = nn.LayerList([])
+
+        p = 0
+        if self.padding_type == 'reflect':
+            self.conv_block.extend([nn.ReflectionPad2d([1, 1, 1, 1])])
+        elif self.padding_type == 'replicate':
+            self.conv_block.extend([nn.ReplicationPad2d([1, 1, 1, 1])])
+        elif self.padding_type == 'zero':
+            p = 1
+        else:
+            raise NotImplementedError('padding [%s] is not implemented' %
+                                      self.padding_type)
+
+        self.conv_block.extend([
+            SeparableConv2D(
+                num_channels=in_c,
+                num_filters=out_c,
+                filter_size=3,
+                padding=p,
+                stride=1), norm_layer(out_c), nn.ReLU()
+        ])
+
+        self.conv_block.extend([nn.Dropout(0.5)])
+
+        if self.padding_type == 'reflect':
+            self.conv_block.extend([nn.ReflectionPad2d([1, 1, 1, 1])])
+        elif self.padding_type == 'replicate':
+            self.conv_block.extend([nn.ReplicationPad2d([1, 1, 1, 1])])
+        elif self.padding_type == 'zero':
+            p = 1
+        else:
+            raise NotImplementedError('padding [%s] is not implemented' %
+                                      self.padding_type)
+
+        self.conv_block.extend([
+            SeparableConv2D(
+                num_channels=out_c,
+                num_filters=in_c,
+                filter_size=3,
+                padding=p,
+                stride=1), norm_layer(in_c)
+        ])
+
+    def forward(self, inputs):
+        y = inputs
+        for sublayer in self.conv_block:
+            y = sublayer(y)
+        out = inputs + y
+        return out
+
+class SeparableConv2D(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 filter_size,
+                 stride=1,
+                 padding=0,
+                 norm_layer=InstanceNorm,
+                 use_bias=True,
+                 scale_factor=1,
+                 stddev=0.02):
+        super(SeparableConv2D, self).__init__()
+
+        self.conv = nn.LayerList([
+            nn.Conv2d(
+                in_channels=num_channels,
+                out_channels=num_channels * scale_factor,
+                kernel_size=filter_size,
+                stride=stride,
+                padding=padding,
+                groups=num_channels,
+                weight_attr=paddle.ParamAttr(
+                    initializer=nn.initializer.Normal(
+                        loc=0.0, scale=stddev)),
+                bias_attr=use_bias)
+        ])
+
+        self.conv.extend([norm_layer(num_channels * scale_factor)])
+
+        self.conv.extend([
+            nn.Conv2d(
+                in_channels=num_channels * scale_factor,
+                out_channels=num_filters,
+                kernel_size=1,
+                stride=1,
+                weight_attr=paddle.ParamAttr(
+                    initializer=nn.initializer.Normal(
+                        loc=0.0, scale=stddev)),
+                bias_attr=use_bias)
+        ])
+
+    def forward(self, inputs):
+        for sublayer in self.conv:
+            inputs = sublayer(inputs)
+        return inputs
+
--- a/ppgan/models/pix2pix_model.py
+++ b/ppgan/models/pix2pix_model.py
@@ -8,6 +8,7 @@ from .discriminators.builder import build_discriminator
 from .losses import GANLoss

 from ..solver import build_optimizer
+from ..modules.init import init_weights
 from ..utils.image_pool import ImagePool


@@ -42,10 +43,12 @@ class Pix2PixModel(BaseModel):

        # define networks (both generator and discriminator)
        self.netG = build_generator(opt.model.generator)
+        init_weights(self.netG)

        # define a discriminator; conditional GANs need to take both input and output images; Therefore, #channels for D is input_nc + output_nc
        if self.isTrain:
            self.netD = build_discriminator(opt.model.discriminator)
+            init_weights(self.netD)

        if self.isTrain:
            self.losses = {}

--- a/ppgan/modules/init.py
+++ b/ppgan/modules/init.py
+import math
+import numpy as np
+
+import paddle
+
+
+def _calculate_fan_in_and_fan_out(tensor):
+    dimensions = len(tensor.shape)
+    if dimensions < 2:
+        raise ValueError(
+            "Fan in and fan out can not be computed for tensor with fewer than 2 dimensions"
+        )
+
+    num_input_fmaps = tensor.shape[1]
+    num_output_fmaps = tensor.shape[0]
+    receptive_field_size = 1
+    if len(tensor.shape) > 2:
+        receptive_field_size = paddle.numel(tensor[0][0])
+    fan_in = num_input_fmaps * receptive_field_size
+    fan_out = num_output_fmaps * receptive_field_size
+
+    return fan_in, fan_out
+
+
+def _calculate_correct_fan(tensor, mode):
+    mode = mode.lower()
+    valid_modes = ['fan_in', 'fan_out']
+    if mode not in valid_modes:
+        raise ValueError("Mode {} not supported, please use one of {}".format(
+            mode, valid_modes))
+
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
+    return fan_in if mode == 'fan_in' else fan_out
+
+
+def calculate_gain(nonlinearity, param=None):
+    """Return the recommended gain value for the given nonlinearity function.
+    The values are as follows:
+
+    ================= ====================================================
+    nonlinearity      gain
+    ================= ====================================================
+    Linear / Identity :math:`1`
+    Conv{1,2,3}D      :math:`1`
+    Sigmoid           :math:`1`
+    Tanh              :math:`\frac{5}{3}`
+    ReLU              :math:`\sqrt{2}`
+    Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
+    ================= ====================================================
+
+    Args:
+        nonlinearity: the non-linear function (`nn.functional` name)
+        param: optional parameter for the non-linear function
+
+    """
+    linear_fns = [
+        'linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d',
+        'conv_transpose2d', 'conv_transpose3d'
+    ]
+    if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
+        return 1
+    elif nonlinearity == 'tanh':
+        return 5.0 / 3
+    elif nonlinearity == 'relu':
+        return math.sqrt(2.0)
+    elif nonlinearity == 'leaky_relu':
+        if param is None:
+            negative_slope = 0.01
+        elif not isinstance(param, bool) and isinstance(
+                param, int) or isinstance(param, float):
+            # True/False are instances of int, hence check above
+            negative_slope = param
+        else:
+            raise ValueError(
+                "negative_slope {} not a valid number".format(param))
+        return math.sqrt(2.0 / (1 + negative_slope**2))
+    else:
+        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
+
+
+@paddle.no_grad()
+def constant_(x, value):
+    temp_value = paddle.fill_constant(x.shape, x.dtype, value)
+    x.set_value(temp_value)
+    return x
+
+
+@paddle.no_grad()
+def normal_(x, mean=0., std=1.):
+    temp_value = paddle.normal(mean, std, shape=x.shape)
+    x.set_value(temp_value)
+    return x
+
+
+@paddle.no_grad()
+def uniform_(x, a=-1., b=1.):
+    temp_value = paddle.uniform(min=a, max=b, shape=x.shape)
+    x.set_value(temp_value)
+    return x
+
+
+@paddle.no_grad()
+def xavier_uniform_(x, gain=1.):
+    """Fills the input `Tensor` with values according to the method
+    described in `Understanding the difficulty of training deep feedforward
+    neural networks` - Glorot, X. & Bengio, Y. (2010), using a uniform
+    distribution. The resulting tensor will have values sampled from
+    :math:`\mathcal{U}(-a, a)` where
+
+    .. math::
+        a = \text{gain} \times \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}}
+
+    Also known as Glorot initialization.
+
+    Args:
+        x: an n-dimensional `paddle.Tensor`
+        gain: an optional scaling factor
+
+    """
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(x)
+    std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
+    a = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
+
+    return uniform_(x, -a, a)
+
+
+@paddle.no_grad()
+def xavier_normal_(x, gain=1.):
+    """Fills the input `Tensor` with values according to the method
+    described in `Understanding the difficulty of training deep feedforward
+    neural networks` - Glorot, X. & Bengio, Y. (2010), using a normal
+    distribution. The resulting tensor will have values sampled from
+    :math:`\mathcal{N}(0, \text{std}^2)` where
+
+    .. math::
+        \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}
+
+    Also known as Glorot initialization.
+
+    Args:
+        tensor: an n-dimensional `paddle.Tensor`
+        gain: an optional scaling factor
+
+    """
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(x)
+    std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
+
+    return normal_(x, 0., std)
+
+
+@paddle.no_grad()
+def kaiming_uniform_(x, a=0, mode='fan_in', nonlinearity='leaky_relu'):
+    """Fills the input `Tensor` with values according to the method
+    described in `Delving deep into rectifiers: Surpassing human-level
+    performance on ImageNet classification` - He, K. et al. (2015), using a
+    uniform distribution. The resulting tensor will have values sampled from
+    :math:`\mathcal{U}(-\text{bound}, \text{bound})` where
+
+    .. math::
+        \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}
+
+    Also known as He initialization.
+
+    Args:
+        x: an n-dimensional `paddle.Tensor`
+        a: the negative slope of the rectifier used after this layer (only
+            used with ``'leaky_relu'``)
+        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
+            preserves the magnitude of the variance of the weights in the
+            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
+            backwards pass.
+        nonlinearity: the non-linear function (`nn.functional` name),
+            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
+
+    """
+    fan = _calculate_correct_fan(x, mode)
+    gain = calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    bound = math.sqrt(
+        3.0) * std  # Calculate uniform bounds from standard deviation
+
+    temp_value = paddle.uniform(x.shape, min=-bound, max=bound)
+    x.set_value(temp_value)
+
+    return x
+
+
+@paddle.no_grad()
+def kaiming_normal_(x, a=0, mode='fan_in', nonlinearity='leaky_relu'):
+    """Fills the input `Tensor` with values according to the method
+    described in `Delving deep into rectifiers: Surpassing human-level
+    performance on ImageNet classification` - He, K. et al. (2015), using a
+    normal distribution. The resulting tensor will have values sampled from
+    :math:`\mathcal{N}(0, \text{std}^2)` where
+
+    .. math::
+        \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}
+
+    Also known as He initialization.
+
+    Args:
+        x: an n-dimensional `paddle.Tensor`
+        a: the negative slope of the rectifier used after this layer (only
+            used with ``'leaky_relu'``)
+        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
+            preserves the magnitude of the variance of the weights in the
+            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
+            backwards pass.
+        nonlinearity: the non-linear function (`nn.functional` name),
+            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
+
+    """
+    fan = _calculate_correct_fan(x, mode)
+    gain = calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+
+    temp_value = paddle.normal(0, std, shape=x.shape)
+    x.set_value(temp_value)
+    return x
+
+
+def constant_init(layer, val, bias=0):
+    if hasattr(layer, 'weight') and layer.weight is not None:
+        constant_(layer.weight, val)
+    if hasattr(layer, 'bias') and layer.bias is not None:
+        constant_(layer.bias, bias)
+
+
+def xavier_init(layer, gain=1, bias=0, distribution='normal'):
+    assert distribution in ['uniform', 'normal']
+    if distribution == 'uniform':
+        xavier_uniform_(layer.weight, gain=gain)
+    else:
+        xavier_normal_(layer.weight, gain=gain)
+    if hasattr(layer, 'bias') and layer.bias is not None:
+        constant_(layer.bias, bias)
+
+
+def normal_init(layer, mean=0, std=1, bias=0):
+    normal_(layer.weight, mean, std)
+    if hasattr(layer, 'bias') and layer.bias is not None:
+        constant_(layer.bias, bias)
+
+
+def uniform_init(layer, a=0, b=1, bias=0):
+    uniform_(layer.weight, a, b)
+    if hasattr(layer, 'bias') and layer.bias is not None:
+        constant_(layer.bias, bias)
+
+
+def kaiming_init(layer,
+                 a=0,
+                 mode='fan_out',
+                 nonlinearity='relu',
+                 bias=0,
+                 distribution='normal'):
+    assert distribution in ['uniform', 'normal']
+    if distribution == 'uniform':
+        kaiming_uniform_(layer.weight,
+                         a=a,
+                         mode=mode,
+                         nonlinearity=nonlinearity)
+    else:
+        kaiming_normal_(layer.weight, a=a, mode=mode, nonlinearity=nonlinearity)
+    if hasattr(layer, 'bias') and layer.bias is not None:
+        constant_(layer.bias, bias)
+
+
+def init_weights(net, init_type='normal', init_gain=0.02):
+    """Initialize network weights.
+    Args:
+        net (nn.Layer): network to be initialized
+        init_type (str): the name of an initialization method: normal | xavier | kaiming | orthogonal
+        init_gain (float): scaling factor for normal, xavier and orthogonal.
+    We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might
+    work better for some applications. Feel free to try yourself.
+    """
+    def init_func(m):  # define the initialization function
+        classname = m.__class__.__name__
+        if hasattr(m, 'weight') and (classname.find('Conv') != -1
+                                     or classname.find('Linear') != -1):
+            if init_type == 'normal':
+                normal_(m.weight, 0.0, init_gain)
+            elif init_type == 'xavier':
+                xavier_normal_(m.weight, gain=init_gain)
+            elif init_type == 'kaiming':
+                kaiming_normal_(m.weight, a=0, mode='fan_in')
+            else:
+                raise NotImplementedError(
+                    'initialization method [%s] is not implemented' % init_type)
+            if hasattr(m, 'bias') and m.bias is not None:
+                constant_(m.bias, 0.0)
+        elif classname.find(
+                'BatchNorm'
+        ) != -1:  # BatchNorm Layer's weight is not a matrix; only normal distribution applies.
+            normal_(m.weight, 1.0, init_gain)
+            constant_(m.bias, 0.0)
+
+    print('initialize network with %s' % init_type)
+    net.apply(init_func)  # apply the initialization function <init_func>