fix conflict

bff7092f · LielinJiang · 52cc814f · 9ce257c6 · bff7092f · bff7092f
14 changed file
--- a/applications/tools/first-order-demo.py
+++ b/applications/tools/first-order-demo.py
-#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import argparse
-import paddle
-from ppgan.apps.first_order_predictor import FirstOrderPredictor
-parser = argparse.ArgumentParser()
-parser.add_argument("--config", default=None, help="path to config")
-parser.add_argument("--weight_path",
-                    default=None,
-                    help="path to checkpoint to restore")
-parser.add_argument("--source_image", type=str, help="path to source image")
-parser.add_argument("--driving_video", type=str, help="path to driving video")
-parser.add_argument("--output", default='output', help="path to output")
-parser.add_argument("--relative",
-                    dest="relative",
-                    action="store_true",
-                    help="use relative or absolute keypoint coordinates")
-parser.add_argument(
-    "--adapt_scale",
-    dest="adapt_scale",
-    action="store_true",
-    help="adapt movement scale based on convex hull of keypoints")
-parser.add_argument(
-    "--find_best_frame",
-    dest="find_best_frame",
-    action="store_true",
-    help=
-    "Generate from the frame that is the most alligned with source. (Only for faces, requires face_aligment lib)"
-)
-parser.add_argument("--best_frame",
-                    dest="best_frame",
-                    type=int,
-                    default=None,
-                    help="Set frame to start from.")
-parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.")
-parser.set_defaults(relative=False)
-parser.set_defaults(adapt_scale=False)
-if __name__ == "__main__":
-    args = parser.parse_args()
-    if args.cpu:
-        paddle.set_device('cpu')
-    predictor = FirstOrderPredictor(output=args.output,
-                                    weight_path=args.weight_path,
-                                    config=args.config,
-                                    relative=args.relative,
-                                    adapt_scale=args.adapt_scale,
-                                    find_best_frame=args.find_best_frame,
-                                    best_frame=args.best_frame)
-    predictor.run(args.source_image, args.driving_video)
--- a/applications/tools/video-enhance.py
+++ b/applications/tools/video-enhance.py
-#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import argparse
-import paddle
-from ppgan.apps import DAINPredictor, DeepRemasterPredictor, DeOldifyPredictor, RealSRPredictor, EDVRPredictor
-parser = argparse.ArgumentParser(description='Fix video')
-parser.add_argument('--input', type=str, default=None, help='Input video')
-parser.add_argument('--output', type=str, default='output', help='output dir')
-parser.add_argument('--DAIN_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-parser.add_argument('--DeepRemaster_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-parser.add_argument('--DeOldify_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-parser.add_argument('--RealSR_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-parser.add_argument('--EDVR_weight',
-                    type=str,
-                    default=None,
-                    help='Path to model weight')
-# DAIN args
-parser.add_argument('--time_step',
-                    type=float,
-                    default=0.5,
-                    help='choose the time steps')
-parser.add_argument('--remove_duplicates',
-                    action='store_true',
-                    default=False,
-                    help='remove duplicate frames or not')
-# DeepRemaster args
-parser.add_argument('--reference_dir',
-                    type=str,
-                    default=None,
-                    help='Path to the reference image directory')
-parser.add_argument('--colorization',
-                    action='store_true',
-                    default=False,
-                    help='Remaster with colorization')
-parser.add_argument('--mindim',
-                    type=int,
-                    default=360,
-                    help='Length of minimum image edges')
-# DeOldify args
-parser.add_argument('--render_factor',
-                    type=int,
-                    default=32,
-                    help='model inputsize=render_factor*16')
-#process order support model name:[DAIN, DeepRemaster, DeOldify, RealSR, EDVR]
-parser.add_argument('--proccess_order',
-                    type=str,
-                    default='none',
-                    nargs='+',
-                    help='Process order')
-if __name__ == "__main__":
-    args = parser.parse_args()
-    orders = args.proccess_order
-    temp_video_path = None
-    for order in orders:
-        print('Model {} proccess start..'.format(order))
-        if temp_video_path is None:
-            temp_video_path = args.input
-        if order == 'DAIN':
-            paddle.enable_static()
-            predictor = DAINPredictor(args.output,
-                                      weight_path=args.DAIN_weight,
-                                      time_step=args.time_step,
-                                      remove_duplicates=args.remove_duplicates)
-            frames_path, temp_video_path = predictor.run(temp_video_path)
-            paddle.disable_static()
-        elif order == 'DeepRemaster':
-            predictor = DeepRemasterPredictor(
-                args.output,
-                weight_path=args.DeepRemaster_weight,
-                colorization=args.colorization,
-                reference_dir=args.reference_dir,
-                mindim=args.mindim)
-            frames_path, temp_video_path = predictor.run(temp_video_path)
-        elif order == 'DeOldify':
-            predictor = DeOldifyPredictor(args.output,
-                                          weight_path=args.DeOldify_weight,
-                                          render_factor=args.render_factor)
-            frames_path, temp_video_path = predictor.run(temp_video_path)
-        elif order == 'RealSR':
-            predictor = RealSRPredictor(args.output,
-                                        weight_path=args.RealSR_weight)
-            frames_path, temp_video_path = predictor.run(temp_video_path)
-        elif order == 'EDVR':
-            paddle.enable_static()
-            predictor = EDVRPredictor(args.output, weight_path=args.EDVR_weight)
-            frames_path, temp_video_path = predictor.run(temp_video_path)
-            paddle.disable_static()
-        print('Model {} output frames path:'.format(order), frames_path)
-        print('Model {} output video path:'.format(order), temp_video_path)
-        print('Model {} proccess done!'.format(order))
--- a/configs/cyclegan_cityscapes.yaml
+++ b/configs/cyclegan_cityscapes.yaml
@@ -36,16 +36,18 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 50
-    transform:
+    transforms:
-      load_size: 286
+      - name: Resize
-      crop_size: 256
+        size: [286, 286]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: False
+      - name: RandomCrop
-      normalize:
+        output_size: [256, 256]
-        mean:
+      - name: RandomHorizontalFlip
-          (127.5, 127.5, 127.5)
+        prob: 0.5
-        std:
+      - name: Permute
-          (127.5, 127.5, 127.5)
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
  test:
    name: SingleDataset
    dataroot: data/cityscapes/testB
@@ -55,17 +57,14 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 50
-    transform:
+    transforms:
-      load_size: 256
+      - name: Resize
-      crop_size: 256
+        size: [256, 256]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: True
+      - name: Permute
-      normalize:
+      - name: Normalize
-        mean:
+        mean: [127.5, 127.5, 127.5]
-          (127.5, 127.5, 127.5)
+        std: [127.5, 127.5, 127.5]
-        std:
-          (127.5, 127.5, 127.5)
 optimizer:
  name: Adam

--- a/configs/cyclegan_horse2zebra.yaml
+++ b/configs/cyclegan_horse2zebra.yaml
@@ -35,16 +35,18 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 50
-    transform:
+    transforms:
-      load_size: 286
+      - name: Resize
-      crop_size: 256
+        size: [286, 286]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: False
+      - name: RandomCrop
-      normalize:
+        output_size: [256, 256]
-        mean:
+      - name: RandomHorizontalFlip
-          (127.5, 127.5, 127.5)
+        prob: 0.5
-        std:
+      - name: Permute
-          (127.5, 127.5, 127.5)
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
  test:
    name: SingleDataset
    dataroot: data/horse2zebra/testA
@@ -55,15 +57,14 @@ dataset:
    serial_batches: False
    pool_size: 50
    transform:
-      load_size: 256
+      transform:
-      crop_size: 256
+      - name: Resize
-      preprocess: resize_and_crop
+        size: [256, 256]
-      no_flip: True
+        interpolation: 2 #cv2.INTER_CUBIC
-      normalize:
+      - name: Permute
-        mean:
+      - name: Normalize
-          (127.5, 127.5, 127.5)
+        mean: [127.5, 127.5, 127.5]
-        std:
+        std: [127.5, 127.5, 127.5]
-          (127.5, 127.5, 127.5)
 optimizer:
  name: Adam

--- a/configs/pix2pix_cityscapes.yaml
+++ b/configs/pix2pix_cityscapes.yaml
@@ -33,16 +33,23 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 0
-    transform:
+    transforms:
-      load_size: 286
+      - name: Resize
-      crop_size: 256
+        size: [286, 286]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: False
+        keys: [image, image]
-      normalize:
+      - name: PairedRandomCrop
-        mean:
+        output_size: [256, 256]
-          (127.5, 127.5, 127.5)
+        keys: [image, image]
-        std:
+      - name: PairedRandomHorizontalFlip
-          (127.5, 127.5, 127.5)
+        prob: 0.5
+        keys: [image, image]
+      - name: Permute
+        keys: [image, image]
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
  test:
    name: PairedDataset
    dataroot: data/cityscapes/
@@ -53,16 +60,18 @@ dataset:
    output_nc: 3
    serial_batches: True
    pool_size: 50
-    transform:
+    transforms:
-      load_size: 256
+      - name: Resize
-      crop_size: 256
+        size: [256, 256]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: True
+        keys: [image, image]
-      normalize:
+      - name: Permute
-        mean:
+        keys: [image, image]
-          (127.5, 127.5, 127.5)
+      - name: Normalize
-        std:
+        mean: [127.5, 127.5, 127.5]
-          (127.5, 127.5, 127.5)
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
 optimizer:
  name: Adam

--- a/configs/pix2pix_cityscapes_2gpus.yaml
+++ b/configs/pix2pix_cityscapes_2gpus.yaml
@@ -32,16 +32,23 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 0
-    transform:
+    transforms:
-      load_size: 286
+      - name: Resize
-      crop_size: 256
+        size: [286, 286]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: False
+        keys: [image, image]
-      normalize:
+      - name: PairedRandomCrop
-        mean:
+        output_size: [256, 256]
-          (127.5, 127.5, 127.5)
+        keys: [image, image]
-        std:
+      - name: PairedRandomHorizontalFlip
-          (127.5, 127.5, 127.5)
+        prob: 0.5
+        keys: [image, image]
+      - name: Permute
+        keys: [image, image]
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
  test:
    name: PairedDataset
    dataroot: data/cityscapes/
@@ -52,16 +59,17 @@ dataset:
    output_nc: 3
    serial_batches: True
    pool_size: 50
-    transform:
+    transforms:
-      load_size: 256
+      - name: Resize
-      crop_size: 256
+        size: [256, 256]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: True
+        keys: [image, image]
-      normalize:
+      - name: Permute
-        mean:
+        keys: [image, image]
-          (127.5, 127.5, 127.5)
+      - name: Normalize
-        std:
+        mean: [127.5, 127.5, 127.5]
-          (127.5, 127.5, 127.5)
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
 optimizer:
  name: Adam

--- a/configs/pix2pix_facades.yaml
+++ b/configs/pix2pix_facades.yaml
@@ -32,16 +32,23 @@ dataset:
    output_nc: 3
    serial_batches: False
    pool_size: 0
-    transform:
+    transforms:
-      load_size: 286
+      - name: Resize
-      crop_size: 256
+        size: [286, 286]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: False
+        keys: [image, image]
-      normalize:
+      - name: PairedRandomCrop
-        mean:
+        output_size: [256, 256]
-          (127.5, 127.5, 127.5)
+        keys: [image, image]
-        std:
+      - name: PairedRandomHorizontalFlip
-          (127.5, 127.5, 127.5)
+        prob: 0.5
+        keys: [image, image]
+      - name: Permute
+        keys: [image, image]
+      - name: Normalize
+        mean: [127.5, 127.5, 127.5]
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
  test:
    name: PairedDataset
    dataroot: data/facades/
@@ -52,16 +59,17 @@ dataset:
    output_nc: 3
    serial_batches: True
    pool_size: 50
-    transform:
+    transforms:
-      load_size: 256
+      - name: Resize
-      crop_size: 256
+        size: [256, 256]
-      preprocess: resize_and_crop
+        interpolation: 2 #cv2.INTER_CUBIC
-      no_flip: True
+        keys: [image, image]
-      normalize:
+      - name: Permute
-        mean:
+        keys: [image, image]
-          (127.5, 127.5, 127.5)
+      - name: Normalize
-        std:
+        mean: [127.5, 127.5, 127.5]
-          (127.5, 127.5, 127.5)
+        std: [127.5, 127.5, 127.5]
+        keys: [image, image]
 optimizer:
  name: Adam

--- a/ppgan/datasets/paired_dataset.py
+++ b/ppgan/datasets/paired_dataset.py
@@ -5,13 +5,13 @@ from .base_dataset import BaseDataset, get_params, get_transform
 from .image_folder import make_dataset
 from .builder import DATASETS
+from .transforms.builder import build_transforms
 @DATASETS.register()
 class PairedDataset(BaseDataset):
    """A dataset class for paired image dataset.
    """
    def __init__(self, cfg):
        """Initialize this dataset class.
@@ -19,11 +19,14 @@ class PairedDataset(BaseDataset):
            cfg (dict) -- stores all the experiment flags
        """
        BaseDataset.__init__(self, cfg)
-        self.dir_AB = os.path.join(cfg.dataroot, cfg.phase)  # get the image directory
+        self.dir_AB = os.path.join(cfg.dataroot,
-        self.AB_paths = sorted(make_dataset(self.dir_AB, cfg.max_dataset_size))  # get image paths
+                                   cfg.phase)  # get the image directory
-        assert(self.cfg.transform.load_size >= self.cfg.transform.crop_size)   # crop_size should be smaller than the size of loaded image
+        self.AB_paths = sorted(make_dataset(
+            self.dir_AB, cfg.max_dataset_size))  # get image paths
        self.input_nc = self.cfg.output_nc if self.cfg.direction == 'BtoA' else self.cfg.input_nc
        self.output_nc = self.cfg.input_nc if self.cfg.direction == 'BtoA' else self.cfg.output_nc
+        self.transforms = build_transforms(cfg.transforms)
    def __getitem__(self, index):
        """Return a data point and its metadata information.
@@ -49,27 +52,11 @@ class PairedDataset(BaseDataset):
        A = AB[:h, :w2, :]
        B = AB[:h, w2:, :]
        # apply the same transform to both A and B
-        # transform_params = get_params(self.opt, A.size)
+        A, B = self.transforms((A, B))
-        transform_params = get_params(self.cfg.transform, (w2, h))
-        A_transform = get_transform(self.cfg.transform, transform_params, grayscale=(self.input_nc == 1))
-        B_transform = get_transform(self.cfg.transform, transform_params, grayscale=(self.output_nc == 1))
-        A = A_transform(A)
-        B = B_transform(B)
        return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path}
    def __len__(self):
        """Return the total number of images in the dataset."""
        return len(self.AB_paths)
-    def get_path_by_indexs(self, indexs):
-        if isinstance(indexs, paddle.Variable):
-            indexs = indexs.numpy()
-        current_paths = []
-        for index in indexs:
-            current_paths.append(self.AB_paths[index])
-        return current_paths
--- a/ppgan/datasets/transforms/__init__.py
+++ b/ppgan/datasets/transforms/__init__.py
+from .transforms import RandomCrop, Resize, RandomHorizontalFlip, PairedRandomCrop, PairedRandomHorizontalFlip, Normalize, Permute
--- a/ppgan/datasets/transforms/builder.py
+++ b/ppgan/datasets/transforms/builder.py
+import copy
+import traceback
+import paddle
+from ...utils.registry import Registry
+TRANSFORMS = Registry("TRANSFORMS")
+class Compose(object):
+    """
+    Composes several transforms together use for composing list of transforms
+    together for a dataset transform.
+    Args:
+        transforms (list): List of transforms to compose.
+    Returns:
+        A compose object which is callable, __call__ for this Compose
+        object will call each given :attr:`transforms` sequencely.
+    """
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, data):
+        for f in self.transforms:
+            try:
+                data = f(data)
+            except Exception as e:
+                stack_info = traceback.format_exc()
+                print("fail to perform transform [{}] with error: "
+                      "{} and stack:\n{}".format(f, e, str(stack_info)))
+                raise e
+        return data
+def build_transforms(cfg):
+    transforms = []
+    for trans_cfg in cfg:
+        temp_trans_cfg = copy.deepcopy(trans_cfg)
+        name = temp_trans_cfg.pop('name')
+        transforms.append(TRANSFORMS.get(name)(**temp_trans_cfg))
+    transforms = Compose(transforms)
+    return transforms
--- a/ppgan/datasets/transforms/transforms.py
+++ b/ppgan/datasets/transforms/transforms.py
+import sys
 import random
+import numbers
+import collections
+import numpy as np
+from paddle.utils import try_import
+import paddle.vision.transforms.functional as F
-class RandomCrop(object):
+from .builder import TRANSFORMS
-    def __init__(self, output_size):
+if sys.version_info < (3, 3):
+    Sequence = collections.Sequence
+    Iterable = collections.Iterable
+else:
+    Sequence = collections.abc.Sequence
+    Iterable = collections.abc.Iterable
+class Transform():
+    def _set_attributes(self, args):
+        """
+        Set attributes from the input list of parameters.
+        Args:
+            args (list): list of parameters.
+        """
+        if args:
+            for k, v in args.items():
+                if k != "self" and not k.startswith("_"):
+                    setattr(self, k, v)
+    def apply_image(self, input):
+        raise NotImplementedError
+    def __call__(self, inputs):
+        if isinstance(inputs, tuple):
+            inputs = list(inputs)
+        if self.keys is not None:
+            for i, key in enumerate(self.keys):
+                if isinstance(inputs, dict):
+                    inputs[key] = getattr(self, 'apply_' + key)(inputs[key])
+                elif isinstance(inputs, (list, tuple)):
+                    inputs[i] = getattr(self, 'apply_' + key)(inputs[i])
+        else:
+            inputs = self.apply_image(inputs)
+        if isinstance(inputs, list):
+            inputs = tuple(inputs)
+        return inputs
+@TRANSFORMS.register()
+class Resize(Transform):
+    """Resize the input Image to the given size.
+    Args:
+        size (int|list|tuple): Desired output size. If size is a sequence like
+            (h, w), output size will be matched to this. If size is an int,
+            smaller edge of the image will be matched to this number.
+            i.e, if height > width, then image will be rescaled to
+            (size * height / width, size)
+        interpolation (int, optional): Interpolation mode of resize. Default: 1.
+            0 : cv2.INTER_NEAREST
+            1 : cv2.INTER_LINEAR
+            2 : cv2.INTER_CUBIC
+            3 : cv2.INTER_AREA
+            4 : cv2.INTER_LANCZOS4
+            5 : cv2.INTER_LINEAR_EXACT
+            7 : cv2.INTER_MAX
+            8 : cv2.WARP_FILL_OUTLIERS
+            16: cv2.WARP_INVERSE_MAP
+    """
+    def __init__(self, size, interpolation=1, keys=None):
+        super().__init__()
+        assert isinstance(size, int) or (isinstance(size, Iterable)
+                                         and len(size) == 2)
+        self._set_attributes(locals())
+        if isinstance(self.size, Iterable):
+            self.size = tuple(size)
+    def apply_image(self, img):
+        return F.resize(img, self.size, self.interpolation)
+@TRANSFORMS.register()
+class RandomCrop(Transform):
+    def __init__(self, output_size, keys=None):
+        super().__init__()
+        self._set_attributes(locals())
        if isinstance(output_size, int):
            self.output_size = (output_size, output_size)
        else:
@@ -19,12 +105,162 @@ class RandomCrop(object):
        j = random.randint(0, w - tw)
        return i, j, th, tw
-    def __call__(self, img):
+    def apply_image(self, img):
        i, j, h, w = self._get_params(img)
        cropped_img = img[i:i + h, j:j + w]
        return cropped_img
+@TRANSFORMS.register()
+class PairedRandomCrop(RandomCrop):
+    def __init__(self, output_size, keys=None):
+        super().__init__(output_size, keys)
+        if isinstance(output_size, int):
+            self.output_size = (output_size, output_size)
+        else:
+            self.output_size = output_size
+    def apply_image(self, img, crop_prams=None):
+        if crop_prams is not None:
+            i, j, h, w = crop_prams
+        else:
+            i, j, h, w = self._get_params(img)
+        cropped_img = img[i:i + h, j:j + w]
+        return cropped_img
+    def __call__(self, inputs):
+        if isinstance(inputs, tuple):
+            inputs = list(inputs)
+        if self.keys is not None:
+            if isinstance(inputs, dict):
+                crop_params = self._get_params(inputs[self.keys[0]])
+            elif isinstance(inputs, (list, tuple)):
+                crop_params = self._get_params(inputs[0])
+            for i, key in enumerate(self.keys):
+                if isinstance(inputs, dict):
+                    inputs[key] = getattr(self, 'apply_' + key)(inputs[key],
+                                                                crop_params)
+                elif isinstance(inputs, (list, tuple)):
+                    inputs[i] = getattr(self, 'apply_' + key)(inputs[i],
+                                                              crop_params)
+        else:
+            crop_params = self._get_params(inputs)
+            inputs = self.apply_image(inputs, crop_params)
+        if isinstance(inputs, list):
+            inputs = tuple(inputs)
+        return inputs
+@TRANSFORMS.register()
+class RandomHorizontalFlip(Transform):
+    """Horizontally flip the input data randomly with a given probability.
+    Args:
+        prob (float): Probability of the input data being flipped. Default: 0.5
+    """
+    def __init__(self, prob=0.5, keys=None):
+        super().__init__()
+        self._set_attributes(locals())
+    def apply_image(self, img):
+        if np.random.random() < self.prob:
+            return F.flip(img, code=1)
+        return img
+@TRANSFORMS.register()
+class PairedRandomHorizontalFlip(RandomHorizontalFlip):
+    def __init__(self, prob=0.5, keys=None):
+        super().__init__()
+        self._set_attributes(locals())
+    def apply_image(self, img, flip):
+        if flip:
+            return F.flip(img, code=1)
+        return img
+    def __call__(self, inputs):
+        if isinstance(inputs, tuple):
+            inputs = list(inputs)
+        flip = np.random.random() < self.prob
+        if self.keys is not None:
+            for i, key in enumerate(self.keys):
+                if isinstance(inputs, dict):
+                    inputs[key] = getattr(self, 'apply_' + key)(inputs[key],
+                                                                flip)
+                elif isinstance(inputs, (list, tuple)):
+                    inputs[i] = getattr(self, 'apply_' + key)(inputs[i], flip)
+        else:
+            inputs = self.apply_image(inputs, flip)
+        if isinstance(inputs, list):
+            inputs = tuple(inputs)
+        return inputs
+@TRANSFORMS.register()
+class Normalize(Transform):
+    """Normalize the input data with mean and standard deviation.
+    Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels,
+    this transform will normalize each channel of the input data.
+    ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
+    Args:
+        mean (int|float|list): Sequence of means for each channel.
+        std (int|float|list): Sequence of standard deviations for each channel.
+    """
+    def __init__(self, mean=0.0, std=1.0, keys=None):
+        super().__init__()
+        self._set_attributes(locals())
+        if isinstance(mean, numbers.Number):
+            mean = [mean, mean, mean]
+        if isinstance(std, numbers.Number):
+            std = [std, std, std]
+        self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1)
+        self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1)
+    def apply_image(self, img):
+        return (img - self.mean) / self.std
+@TRANSFORMS.register()
+class Permute(Transform):
+    """Change input data to a target mode.
+    For example, most transforms use HWC mode image,
+    while the Neural Network might use CHW mode input tensor.
+    Input image should be HWC mode and an instance of numpy.ndarray.
+    Args:
+        mode (str): Output mode of input. Default: "CHW".
+        to_rgb (bool): Convert 'bgr' image to 'rgb'. Default: True.
+    """
+    def __init__(self, mode="CHW", to_rgb=True, keys=None):
+        super().__init__()
+        self._set_attributes(locals())
+        assert mode in [
+            "CHW"
+        ], "Only support 'CHW' mode, but received mode: {}".format(mode)
+        self.mode = mode
+        self.to_rgb = to_rgb
+    def apply_image(self, img):
+        if self.to_rgb:
+            img = img[..., ::-1]
+        if self.mode == "CHW":
+            return img.transpose((2, 0, 1))
+        return img
 class Crop():
    def __init__(self, pos, size):
        self.pos = pos
@@ -35,6 +271,6 @@ class Crop():
        x, y = self.pos
        th = tw = self.size
        if (ow > tw or oh > th):
-            return img[y: y + th, x: x + tw]
+            return img[y:y + th, x:x + tw]
        return img
\ No newline at end of file
--- a/ppgan/datasets/unpaired_dataset.py
+++ b/ppgan/datasets/unpaired_dataset.py
@@ -5,13 +5,13 @@ from .base_dataset import BaseDataset, get_transform
 from .image_folder import make_dataset
 from .builder import DATASETS
+from .transforms.builder import build_transforms
 @DATASETS.register()
 class UnpairedDataset(BaseDataset):
    """
    """
    def __init__(self, cfg):
        """Initialize this dataset class.
@@ -19,18 +19,25 @@ class UnpairedDataset(BaseDataset):
            cfg (dict) -- stores all the experiment flags
        """
        BaseDataset.__init__(self, cfg)
-        self.dir_A = os.path.join(cfg.dataroot, cfg.phase + 'A')  # create a path '/path/to/data/trainA'
+        self.dir_A = os.path.join(cfg.dataroot, cfg.phase +
-        self.dir_B = os.path.join(cfg.dataroot, cfg.phase + 'B')  # create a path '/path/to/data/trainB'
+                                  'A')  # create a path '/path/to/data/trainA'
+        self.dir_B = os.path.join(cfg.dataroot, cfg.phase +
+                                  'B')  # create a path '/path/to/data/trainB'
-        self.A_paths = sorted(make_dataset(self.dir_A, cfg.max_dataset_size))   # load images from '/path/to/data/trainA'
+        self.A_paths = sorted(make_dataset(
-        self.B_paths = sorted(make_dataset(self.dir_B, cfg.max_dataset_size))    # load images from '/path/to/data/trainB'
+            self.dir_A,
+            cfg.max_dataset_size))  # load images from '/path/to/data/trainA'
+        self.B_paths = sorted(make_dataset(
+            self.dir_B,
+            cfg.max_dataset_size))  # load images from '/path/to/data/trainB'
        self.A_size = len(self.A_paths)  # get the size of dataset A
        self.B_size = len(self.B_paths)  # get the size of dataset B
        btoA = self.cfg.direction == 'BtoA'
-        input_nc = self.cfg.output_nc if btoA else self.cfg.input_nc       # get the number of channels of input image
+        input_nc = self.cfg.output_nc if btoA else self.cfg.input_nc  # get the number of channels of input image
-        output_nc = self.cfg.input_nc if btoA else self.cfg.output_nc      # get the number of channels of output image
+        output_nc = self.cfg.input_nc if btoA else self.cfg.output_nc  # get the number of channels of output image
-        self.transform_A = get_transform(self.cfg.transform, grayscale=(input_nc == 1))
-        self.transform_B = get_transform(self.cfg.transform, grayscale=(output_nc == 1))
+        self.transform_A = build_transforms(self.cfg.transforms)
+        self.transform_B = build_transforms(self.cfg.transforms)
        self.reset_paths()
@@ -49,10 +56,11 @@ class UnpairedDataset(BaseDataset):
            A_paths (str)    -- image paths
            B_paths (str)    -- image paths
        """
-        A_path = self.A_paths[index % self.A_size]  # make sure index is within then range
+        A_path = self.A_paths[
-        if self.cfg.serial_batches:   # make sure index is within then range
+            index % self.A_size]  # make sure index is within then range
+        if self.cfg.serial_batches:  # make sure index is within then range
            index_B = index % self.B_size
-        else:   # randomize the index for domain B to avoid fixed pairs.
+        else:  # randomize the index for domain B to avoid fixed pairs.
            index_B = random.randint(0, self.B_size - 1)
        B_path = self.B_paths[index_B]

--- a/ppgan/models/builder.py
+++ b/ppgan/models/builder.py
@@ -2,18 +2,9 @@ import paddle
 from ..utils.registry import Registry
 MODELS = Registry("MODEL")
 def build_model(cfg):
-    # dataset = MODELS.get(cfg.MODEL.name)(cfg.MODEL)
-    # place = paddle.CUDAPlace(0)
-    # dataloader = paddle.io.DataLoader(dataset,
-    #                                 batch_size=1, #opt.batch_size,
-    #                                 places=place,
-    #                                 shuffle=True, #not opt.serial_batches,
-    #                                 num_workers=0)#int(opt.num_threads))
    model = MODELS.get(cfg.model.name)(cfg)
    return model
-    # pass
\ No newline at end of file
--- a/ppgan/utils/animate.py
+++ b/ppgan/utils/animate.py
-import os
+import numpy as np
-from tqdm import tqdm
+from scipy.spatial import ConvexHull
 import paddle
-import imageio
-from scipy.spatial import ConvexHull
-import numpy as np
 def normalize_kp(kp_source,
                 kp_driving,