add SinGAN model (#576)

* Create singan.md * Create gradient_penalty.py * add GradientPenalty * add images for singan document * add SinGANModel * add SinGANGenerator * add SinGANDiscriminator * Create discriminator_singan.py * Create generator_singan.py * Create singan_model.py * Create empty_dataset.py * add EmptyDataset * Create singan_predictor.py * add SinGANPredictor * Create singan.py * create configs for singan * add inference for singan * create tipc config for singan * Create python_singan_results_fp32.txt * add tipc prepare for singan * Update test_train_inference_python.md * Update readme.md * Update singan.md * Update singan_model.py * Update prepare.sh * Update train_infer_python.txt * Update prepare.sh * Revert "add images for singan document" This reverts commit f45fe5e55a2588611d951ae84d776c90693788df. * Update singan.md * update path format in configs * update year of copyright * modify the order of import * Update singan_predictor.py * update configs for singan * Update singan_model.py * modify urls for singan in prepare.sh * add pretrained weight for singan * Update singan_model.py * Update singan.md * Create English tutorial for SinGAN

add SinGAN model (#576)
* Create singan.md * Create gradient_penalty.py * add GradientPenalty * add images for singan document * add SinGANModel * add SinGANGenerator * add SinGANDiscriminator * Create discriminator_singan.py * Create generator_singan.py * Create singan_model.py * Create empty_dataset.py * add EmptyDataset * Create singan_predictor.py * add SinGANPredictor * Create singan.py * create configs for singan * add inference for singan * create tipc config for singan * Create python_singan_results_fp32.txt * add tipc prepare for singan * Update test_train_inference_python.md * Update readme.md * Update singan.md * Update singan_model.py * Update prepare.sh * Update train_infer_python.txt * Update prepare.sh * Revert "add images for singan document" This reverts commit f45fe5e55a2588611d951ae84d776c90693788df. * Update singan.md * update path format in configs * update year of copyright * modify the order of import * Update singan_predictor.py * update configs for singan * Update singan_model.py * modify urls for singan in prepare.sh * add pretrained weight for singan * Update singan_model.py * Update singan.md * Create English tutorial for SinGAN
581da7e9 · BrilliantYuKaimin · GitHub · 71377845 · 581da7e9 · 581da7e9
25 changed file
--- a/applications/tools/singan.py
+++ b/applications/tools/singan.py
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+import paddle
+from ppgan.apps import SinGANPredictor
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output_path",
+                        type=str,
+                        default='output_dir',
+                        help="path to output image dir")
+
+    parser.add_argument("--weight_path",
+                        type=str,
+                        default=None,
+                        help="path to model checkpoint path")
+
+    parser.add_argument("--pretrained_model",
+                        type=str,
+                        default=None,
+                        help="a pretianed model, only trees, stone, mountains, birds, and lightning are implemented.")
+
+    parser.add_argument("--mode",
+                        type=str,
+                        default="random_sample",
+                        help="type of model for loading pretrained model")
+
+    parser.add_argument("--generate_start_scale",
+                        type=int,
+                        default=0,
+                        help="sample random seed for model's image generation")
+
+    parser.add_argument("--seed",
+                        type=int,
+                        default=None,
+                        help="sample random seed for model's image generation")
+
+    parser.add_argument("--scale_h",
+                        type=float,
+                        default=1.0,
+                        help="horizontal scale")
+
+    parser.add_argument("--scale_v",
+                        type=float,
+                        default=1.0,
+                        help="vertical scale")
+
+    parser.add_argument("--ref_image",
+                        type=str,
+                        default=None,
+                        help="reference image for harmonization, editing and paint2image")
+
+    parser.add_argument("--mask_image",
+                        type=str,
+                        default=None,
+                        help="mask image for harmonization and editing")
+
+    parser.add_argument("--sr_factor",
+                        type=float,
+                        default=4.0,
+                        help="scale for super resolution")
+
+    parser.add_argument("--animation_alpha",
+                        type=float,
+                        default=0.9,
+                        help="a parameter determines how close the frames of the sequence remain to the training image")
+
+    parser.add_argument("--animation_beta",
+                        type=float,
+                        default=0.9,
+                        help="a parameter controls the smoothness and rate of change in the generated clip")
+
+    parser.add_argument("--animation_frames",
+                        type=int,
+                        default=20,
+                        help="frame number of output animation when mode is animation")
+
+    parser.add_argument("--animation_duration",
+                        type=float,
+                        default=0.1,
+                        help="duration of each frame in animation")
+
+    parser.add_argument("--n_row",
+                        type=int,
+                        default=5,
+                        help="row number of output image grid")
+
+    parser.add_argument("--n_col",
+                        type=int,
+                        default=3,
+                        help="column number of output image grid")
+
+    parser.add_argument("--cpu",
+                        dest="cpu",
+                        action="store_true",
+                        help="cpu mode.")
+
+    args = parser.parse_args()
+
+    if args.cpu:
+        paddle.set_device('cpu')
+
+    predictor = SinGANPredictor(args.output_path,
+                                args.weight_path,
+                                args.pretrained_model,
+                                args.seed)
+    predictor.run(args.mode,
+                  args.generate_start_scale,
+                  args.scale_h,
+                  args.scale_v,
+                  args.ref_image,
+                  args.mask_image,
+                  args.sr_factor,
+                  args.animation_alpha,
+                  args.animation_beta,
+                  args.animation_frames,
+                  args.animation_duration,
+                  args.n_row,
+                  args.n_col)
--- a/configs/singan_animation.yaml
+++ b/configs/singan_animation.yaml
+total_iters: 100000
+output_dir: output_dir
+export_model: null
+
+model:
+  name: SinGANModel
+  generator:
+    name: SinGANGenerator
+    nfc_init: 32
+    min_nfc_init: 32
+    noise_zero_pad: False
+  discriminator:
+    name: SinGANDiscriminator
+    nfc_init: 32
+    min_nfc_init: 32
+  gan_criterion:
+    name: GANLoss
+    gan_mode: wgangp
+    loss_weight: 1.0
+  recon_criterion:
+    name: MSELoss
+    loss_weight: 10.0
+  gp_criterion:
+    name: GradientPenalty
+    loss_weight: 0.1
+  train_image: data/singan/stone.png
+  scale_factor: 0.75
+  min_size: 25
+  is_finetune: False
+
+dataset:
+  train:
+    name: EmptyDataset
+  test:
+    name: SingleDataset
+    dataroot: data/singan
+    num_workers: 0
+    batch_size: 1
+    preprocess:
+      - name: LoadImageFromFile
+        key: A
+      - name: Transforms
+        input_keys: [A]
+        pipeline:
+          - name: Transpose
+          - name: Normalize
+            mean: [127.5, 127.5, 127.5]
+            std: [127.5, 127.5, 127.5]
+
+lr_scheduler:
+  name: MultiStepDecay
+  learning_rate: 0.0005
+  milestones: [9600]
+  gamma: 0.1
+
+optimizer:
+  optimizer_G:
+    name: Adam
+    beta1: 0.5
+    beta2: 0.999
+  optimizer_D:
+    name: Adam
+    beta1: 0.5
+    beta2: 0.999
+
+log_config:
+  interval: 100
+  visiual_interval: 2000
+
+snapshot_config:
+  interval: 10000
+
+validate:
+  interval: -1
+  save_img: True
+  metrics:
+    fid: # metric name, can be arbitrary
+      name: FID
+      batch_size: 1
--- a/configs/singan_finetune.yaml
+++ b/configs/singan_finetune.yaml
+total_iters: 12000
+output_dir: output_dir
+
+model:
+  name: SinGANModel
+  generator:
+    name: SinGANGenerator
+    nfc_init: 32
+    min_nfc_init: 32
+    noise_zero_pad: True
+  discriminator:
+    name: SinGANDiscriminator
+    nfc_init: 32
+    min_nfc_init: 32
+  gan_criterion:
+    name: GANLoss
+    gan_mode: wgangp
+    loss_weight: 1.0
+  recon_criterion:
+    name: MSELoss
+    loss_weight: 10.0
+  gp_criterion:
+    name: GradientPenalty
+    loss_weight: 0.1
+  train_image: data/singan/stone.png
+  scale_factor: 0.75
+  min_size: 25
+  is_finetune: True
+  finetune_scale: 1
+  color_num: 5
+
+dataset:
+  train:
+    name: EmptyDataset
+  test:
+    name: SingleDataset
+    dataroot: data/singan
+    num_workers: 0
+    batch_size: 1
+    preprocess:
+      - name: LoadImageFromFile
+        key: A
+      - name: Transforms
+        input_keys: [A]
+        pipeline:
+          - name: Transpose
+          - name: Normalize
+            mean: [127.5, 127.5, 127.5]
+            std: [127.5, 127.5, 127.5]
+
+lr_scheduler:
+  name: MultiStepDecay
+  learning_rate: 0.0005
+  milestones: [9600]
+  gamma: 0.1
+
+optimizer:
+  optimizer_G:
+    name: Adam
+    beta1: 0.5
+    beta2: 0.999
+  optimizer_D:
+    name: Adam
+    beta1: 0.5
+    beta2: 0.999
+
+log_config:
+  interval: 100
+  visiual_interval: 2000
+
+snapshot_config:
+  interval: 4000
--- a/configs/singan_sr.yaml
+++ b/configs/singan_sr.yaml
+total_iters: 100000
+output_dir: output_dir
+export_model: null
+
+model:
+  name: SinGANModel
+  generator:
+    name: SinGANGenerator
+    nfc_init: 32
+    min_nfc_init: 32
+    noise_zero_pad: True
+  discriminator:
+    name: SinGANDiscriminator
+    nfc_init: 32
+    min_nfc_init: 32
+  gan_criterion:
+    name: GANLoss
+    gan_mode: wgangp
+    loss_weight: 1.0
+  recon_criterion:
+    name: MSELoss
+    loss_weight: 100.0
+  gp_criterion:
+    name: GradientPenalty
+    loss_weight: 0.1
+  train_image: data/singan/stone.png
+  scale_factor: 0.793701 # (1/2)^(1/3)
+  min_size: 18
+  is_finetune: False
+
+dataset:
+  train:
+    name: EmptyDataset
+  test:
+    name: SingleDataset
+    dataroot: data/singan
+    num_workers: 0
+    batch_size: 1
+    preprocess:
+      - name: LoadImageFromFile
+        key: A
+      - name: Transforms
+        input_keys: [A]
+        pipeline:
+          - name: Transpose
+          - name: Normalize
+            mean: [127.5, 127.5, 127.5]
+            std: [127.5, 127.5, 127.5]
+
+lr_scheduler:
+  name: MultiStepDecay
+  learning_rate: 0.0005
+  milestones: [9600]
+  gamma: 0.1
+
+optimizer:
+  optimizer_G:
+    name: Adam
+    beta1: 0.5
+    beta2: 0.999
+  optimizer_D:
+    name: Adam
+    beta1: 0.5
+    beta2: 0.999
+
+log_config:
+  interval: 100
+  visiual_interval: 2000
+
+snapshot_config:
+  interval: 10000
+
+validate:
+  interval: -1
+  save_img: True
+  metrics:
+    fid: # metric name, can be arbitrary
+      name: FID
+      batch_size: 1
--- a/configs/singan_universal.yaml
+++ b/configs/singan_universal.yaml
+total_iters: 100000
+output_dir: output_dir
+export_model: null
+
+model:
+  name: SinGANModel
+  generator:
+    name: SinGANGenerator
+    nfc_init: 32
+    min_nfc_init: 32
+    noise_zero_pad: True
+  discriminator:
+    name: SinGANDiscriminator
+    nfc_init: 32
+    min_nfc_init: 32
+  gan_criterion:
+    name: GANLoss
+    gan_mode: wgangp
+    loss_weight: 1.0
+  recon_criterion:
+    name: MSELoss
+    loss_weight: 10.0
+  gp_criterion:
+    name: GradientPenalty
+    loss_weight: 0.1
+  train_image: data/singan/stone.png
+  scale_factor: 0.75
+  min_size: 25
+  is_finetune: False
+
+dataset:
+  train:
+    name: EmptyDataset
+  test:
+    name: SingleDataset
+    dataroot: data/singan
+    num_workers: 0
+    batch_size: 1
+    preprocess:
+      - name: LoadImageFromFile
+        key: A
+      - name: Transforms
+        input_keys: [A]
+        pipeline:
+          - name: Transpose
+          - name: Normalize
+            mean: [127.5, 127.5, 127.5]
+            std: [127.5, 127.5, 127.5]
+
+lr_scheduler:
+  name: MultiStepDecay
+  learning_rate: 0.0005
+  milestones: [9600]
+  gamma: 0.1
+
+optimizer:
+  optimizer_G:
+    name: Adam
+    beta1: 0.5
+    beta2: 0.999
+  optimizer_D:
+    name: Adam
+    beta1: 0.5
+    beta2: 0.999
+
+log_config:
+  interval: 100
+  visiual_interval: 2000
+
+snapshot_config:
+  interval: 10000
+
+validate:
+  interval: -1
+  save_img: True
+  metrics:
+    fid: # metric name, can be arbitrary
+      name: FID
+      batch_size: 1
--- a/docs/en_US/tutorials/singan.md
+++ b/docs/en_US/tutorials/singan.md
+# SinGAN
+
+## Introduction
+
+SinGAN is a novel unconditional* generative model that is trained using a single image. Traditionally, GANs have been trained on class-specific datasets and capture common features among images of the same class. SinGAN, on the other hand, learns from the overlapping patches at multiple scales of a particular image and learns its internal statistics. Once trained, SinGAN can produce assorted high-quality images of arbitrary sizes and aspect ratios that semantically resemble the training image but contain new object configurations and structures.
+
+** An unconditional GAN creates samples purely from randomized input, while a conditional GAN generates samples based on a "class label" that controls the type of image generated.*
+
+## Usage
+
+### About Config Files
+
+We provide 4 config files for SinGAN model:
+
+- `singan_universal.yaml`
+- `singan_sr.yaml`
+- `singan_animation.yaml`
+- `singan_finetune.yaml`
+
+Among them, `singan_universal.yaml` is a config file suit for all tasks, `singan_sr.yaml` is a config file for super resolution recommended by the author, `singan_animation.yaml` is a config file for animation recommended by the author. Results showed in this document were trained with `singan_universal.yaml`. For *Paint to Image*, we will get better results by finetuning with `singan_finetune.yaml` after training with `singan_universal.yaml`.
+
+### Train
+
+Start training:
+
+```bash
+python tools/main.py -c configs/singan_universal.yaml \
+                     -o model.train_image=train_image.png
+```
+
+Finetune for "Paint2Image":
+
+```bash
+python tools/main.py -c configs/singan_finetune.yaml \
+                     -o model.train_image=train_image.png \
+                     --load weight_saved_in_training.pdparams
+```
+
+### Evaluation
+Running following command, a random image will be generated. It should be noted that `train_image.png` ought to be in directory `data/singan`, or you can modify the value of `dataset.test.dataroot` in config file manually. Besides, this directory must contain only one image, which is `train_image.png`.
+```bash
+python tools/main.py -c configs/singan_universal.yaml \
+                     -o model.train_image=train_image.png \
+                     --load weight_saved_in_training.pdparams \
+                     --evaluate-only
+```
+
+### Extract Weight for Generator
+
+After training, we need use ``tools/extract_weight.py`` to extract weight of generator from training model which includes both generator and discriminator. Then we can use `applications/tools/singan.py` to achieve diverse application of SinGAN.
+
+```bash
+python tools/extract_weight.py weight_saved_in_training.pdparams --net-name netG --output weight_of_generator.pdparams
+```
+
+### Inference and Result
+
+*Attention: to use pretrained model, you can replace `--weight_path weight_of_generator.pdparams` in the following commands by `--pretrained_model <model>`, where `<model>` can be `trees`, `stone`, `mountains`, `birds` or `lightning`.*
+
+#### Random Sample
+
+```bash
+python applications/tools/singan.py \
+       --weight_path weight_of_generator.pdparams \
+       --mode random_sample \
+       --scale_v 1 \ # vertical scale
+       --scale_h 1 \ # horizontal scale
+       --n_row 2 \
+       --n_col 2
+```
+
+|training image|result|
+| ---- | ---- |
+|![birds](https://user-images.githubusercontent.com/91609464/153211448-2614407b-a30b-467c-b1e5-7db88ff2ca74.png)|![birds-random_sample](https://user-images.githubusercontent.com/91609464/153211573-1af108ba-ad42-438a-94a9-e8f8f3e091eb.png)|
+
+#### Editing & Harmonization
+
+```bash
+python applications/tools/singan.py \
+       --weight_path weight_of_generator.pdparams \
+       --mode editing \ # or harmonization
+       --ref_image editing_image.png \
+       --mask_image mask_of_editing.png \
+       --generate_start_scale 2
+```
+
+
+|training image|editing image|mask of editing|result|
+|----|----|----|----|
+|![stone](https://user-images.githubusercontent.com/91609464/153211778-bb94d29d-a2b4-4d04-9900-89b20ae90b90.png)|![stone-edit](https://user-images.githubusercontent.com/91609464/153211867-df3d9035-d320-45ec-8043-488e9da49bff.png)|![stone-edit-mask](https://user-images.githubusercontent.com/91609464/153212047-9620f73c-58d9-48ed-9af7-a11470ad49c8.png)|![stone-edit-mask-result](https://user-images.githubusercontent.com/91609464/153211942-e0e639c2-3ea6-4ade-852b-73757b0bbab0.png)|
+
+#### Super Resolution
+
+```bash
+python applications/tools/singan.py \
+       --weight_path weight_of_generator.pdparams \
+       --mode sr \
+       --ref_image image_to_sr.png \
+       --sr_factor 4
+```
+|training image|result|
+| ---- | ---- |
+|![mountains](https://user-images.githubusercontent.com/91609464/153212146-efbbbbd6-e045-477a-87ae-10f121341060.png)|![sr](https://user-images.githubusercontent.com/91609464/153212176-530b7075-e72b-4c05-ad3e-2f2cdfc76dea.png)|
+
+
+#### Animation
+
+```bash
+python applications/tools/singan.py \
+       --weight_path weight_of_generator.pdparams \
+       --mode animation \
+       --animation_alpha 0.6 \ # this parameter determines how close the frames of the sequence remain to the training image
+       --animation_beta 0.7 \ # this parameter controls the smoothness and rate of change in the generated clip
+       --animation_frames 20 \ # frames of animation
+       --animation_duration 0.1	# duration of each frame
+```
+
+|training image|animation|
+| ---- | ---- |
+|![lightning](https://user-images.githubusercontent.com/91609464/153212291-6f8976bd-e873-423e-ab62-77997df2df7a.png)|![animation](https://user-images.githubusercontent.com/91609464/153212372-0543e6d6-5842-472b-af50-8b22670270ae.gif)|
+
+
+#### Paint to Image
+```bash
+python applications/tools/singan.py \
+       --weight_path weight_of_generator.pdparams \
+       --mode paint2image \
+       --ref_image paint.png \
+       --generate_start_scale 2
+```
+|training image|paint|result|result after finetune|
+|----|----|----|----|
+|![trees](https://user-images.githubusercontent.com/91609464/153212536-0bb6489d-d488-49e0-a6b5-90ef578c9e4f.png)|![trees-paint](https://user-images.githubusercontent.com/91609464/153212511-ef2c6bea-1f8c-4685-951b-8db589414dfe.png)|![trees-paint2image](https://user-images.githubusercontent.com/91609464/153212531-c080c705-fd58-4ade-aac6-e2134838a75f.png)|![trees-paint2image-finetuned](https://user-images.githubusercontent.com/91609464/153212529-51d8d29b-6b58-4f29-8792-4b2b04f9266e.png)|
+
+## Reference
+
+```
+@misc{shaham2019singan,
+      title={SinGAN: Learning a Generative Model from a Single Natural Image}, 
+      author={Tamar Rott Shaham and Tali Dekel and Tomer Michaeli},
+      year={2019},
+      eprint={1905.01164},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```
+
--- a/docs/zh_CN/tutorials/singan.md
+++ b/docs/zh_CN/tutorials/singan.md
+# SinGAN
+
+## 简介
+
+SinGAN是一种新的可以从单个自然图像中学习的无条件生成模型。该模型包含一个全卷积生成对抗网络的金字塔结构，每个生成对抗网络负责学习不同在不同比例的图像上的块分布。这允许生成任意大小和纵横比的新样本，具有显著的可变性，但同时保持训练图像的全局结构和精细纹理。与以往单一图像生成方案相比，该方法不局限于纹理图像，也没有条件（即从噪声中生成样本）。
+
+## 使用方法
+
+### 配置说明
+
+我们为SinGAN提供了4个配置文件：
+
+- `singan_universal.yaml`
+- `singan_sr.yaml`
+- `singan_animation.yaml`
+- `singan_finetune.yaml`
+
+其中`singan_universal.yaml`对所有任务都适用配置，`singan_sr.yaml`是官方建议的用于超分任务的配置，`singan_animation.yaml`是官方建议的用于“静图转动”任务的配置。本文档展示的结果均由`singan_universal.yaml`训练而来。对于手绘转照片任务，使用`singan_universal.yaml`训练后再用`singan_finetune.yaml`微调会得到更好的结果。
+
+### 训练
+
+启动训练：
+
+```bash
+python tools/main.py -c configs/singan_universal.yaml \
+                     -o model.train_image=训练图片.png
+```
+
+为“手绘转照片”任务微调：
+
+```bash
+python tools/main.py -c configs/singan_finetune.yaml \
+                     -o model.train_image=训练图片.png \
+                     --load 已经训练好的模型.pdparams
+```
+
+### 测试
+运行下面的命令，可以随机生成一张图片。需要注意的是，`训练图片.png`应当位于`data/singan`目录下，或者手动调整配置文件中`dataset.test.dataroot`的值。此外，这个目录中只能包含`训练图片.png`这一张图片。
+```bash
+python tools/main.py -c configs/singan_universal.yaml \
+                     -o model.train_image=训练图片.png \
+                     --load 已经训练好的模型.pdparams \
+                     --evaluate-only
+```
+
+### 导出生成器权重
+
+训练结束后，需要使用 ``tools/extract_weight.py`` 来从训练模型（包含了生成器和判别器）中提取生成器的权重来给`applications/tools/singan.py`进行推理，以实现SinGAN的各种应用。
+
+```bash
+python tools/extract_weight.py 训练过程中保存的权重文件.pdparams --net-name netG --output 生成器权重文件.pdparams
+```
+
+### 推理及结果展示
+
+*注意：您可以下面的命令中的`--weight_path 生成器权重文件.pdparams`可以换成`--pretrained_model <model> `来体验训练好的模型，其中`<model>`可以是`trees`、`stone`、`mountains`、`birds`和`lightning`。*
+
+#### 随机采样
+
+```bash
+python applications/tools/singan.py \
+       --weight_path 生成器权重文件.pdparams \
+       --mode random_sample \
+       --scale_v 1 \ # vertical scale
+       --scale_h 1 \ # horizontal scale
+       --n_row 2 \
+       --n_col 2
+```
+
+|训练图片|随机采样结果|
+| ---- | ---- |
+|![birds](https://user-images.githubusercontent.com/91609464/153211448-2614407b-a30b-467c-b1e5-7db88ff2ca74.png)|![birds-random_sample](https://user-images.githubusercontent.com/91609464/153211573-1af108ba-ad42-438a-94a9-e8f8f3e091eb.png)|
+
+#### 图像编辑&风格和谐化
+
+```bash
+python applications/tools/singan.py \
+       --weight_path 生成器权重文件.pdparams \
+       --mode editing \ # or harmonization
+       --ref_image 编辑后的图片.png \
+       --mask_image 编辑区域标注图片.png \
+       --generate_start_scale 2
+```
+
+
+|训练图片|编辑图片|编辑区域标注|SinGAN生成|
+|----|----|----|----|
+|![stone](https://user-images.githubusercontent.com/91609464/153211778-bb94d29d-a2b4-4d04-9900-89b20ae90b90.png)|![stone-edit](https://user-images.githubusercontent.com/91609464/153211867-df3d9035-d320-45ec-8043-488e9da49bff.png)|![stone-edit-mask](https://user-images.githubusercontent.com/91609464/153212047-9620f73c-58d9-48ed-9af7-a11470ad49c8.png)|![stone-edit-mask-result](https://user-images.githubusercontent.com/91609464/153211942-e0e639c2-3ea6-4ade-852b-73757b0bbab0.png)|
+
+#### 超分
+
+```bash
+python applications/tools/singan.py \
+       --weight_path 生成器权重文件.pdparams \
+       --mode sr \
+       --ref_image 待超分的图片亦即用于训练的图片.png \
+       --sr_factor 4
+```
+|训练图片|超分结果|
+| ---- | ---- |
+|![mountains](https://user-images.githubusercontent.com/91609464/153212146-efbbbbd6-e045-477a-87ae-10f121341060.png)|![sr](https://user-images.githubusercontent.com/91609464/153212176-530b7075-e72b-4c05-ad3e-2f2cdfc76dea.png)|
+
+
+#### 静图转动
+
+```bash
+python applications/tools/singan.py \
+       --weight_path 生成器权重文件.pdparams \
+       --mode animation \
+       --animation_alpha 0.6 \ # this parameter determines how close the frames of the sequence remain to the training image
+       --animation_beta 0.7 \ # this parameter controls the smoothness and rate of change in the generated clip
+       --animation_frames 20 \ # frames of animation
+       --animation_duration 0.1	# duration of each frame
+```
+
+|训练图片|动画效果|
+| ---- | ---- |
+|![lightning](https://user-images.githubusercontent.com/91609464/153212291-6f8976bd-e873-423e-ab62-77997df2df7a.png)|![animation](https://user-images.githubusercontent.com/91609464/153212372-0543e6d6-5842-472b-af50-8b22670270ae.gif)|
+
+
+#### 手绘转照片
+```bash
+python applications/tools/singan.py \
+       --weight_path 生成器权重文件.pdparams \
+       --mode paint2image \
+       --ref_image 手绘图片.png \
+       --generate_start_scale 2
+```
+|训练图片|手绘图片|SinGAN生成|SinGAN微调后生成|
+|----|----|----|----|
+|![trees](https://user-images.githubusercontent.com/91609464/153212536-0bb6489d-d488-49e0-a6b5-90ef578c9e4f.png)|![trees-paint](https://user-images.githubusercontent.com/91609464/153212511-ef2c6bea-1f8c-4685-951b-8db589414dfe.png)|![trees-paint2image](https://user-images.githubusercontent.com/91609464/153212531-c080c705-fd58-4ade-aac6-e2134838a75f.png)|![trees-paint2image-finetuned](https://user-images.githubusercontent.com/91609464/153212529-51d8d29b-6b58-4f29-8792-4b2b04f9266e.png)|
+
+
+
+## 参考文献
+
+```
+@misc{shaham2019singan,
+      title={SinGAN: Learning a Generative Model from a Single Natural Image}, 
+      author={Tamar Rott Shaham and Tali Dekel and Tomer Michaeli},
+      year={2019},
+      eprint={1905.01164},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```
+
--- a/ppgan/apps/__init__.py
+++ b/ppgan/apps/__init__.py
@@ -34,3 +34,4 @@ from .photopen_predictor import PhotoPenPredictor
 from .recurrent_vsr_predictor import (PPMSVSRPredictor, BasicVSRPredictor, \
                                     BasiVSRPlusPlusPredictor, IconVSRPredictor, \
                                     PPMSVSRLargePredictor)
+from .singan_predictor import SinGANPredictor
--- a/ppgan/apps/singan_predictor.py
+++ b/ppgan/apps/singan_predictor.py
+#  Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import os
+import cv2
+import math
+import skimage
+import imageio
+
+import paddle
+import paddle.nn.functional as F
+import paddle.vision.transforms as T
+
+from .base_predictor import BasePredictor
+from ..models.singan_model import pad_shape
+from ppgan.models.generators import SinGANGenerator
+from ppgan.utils.download import get_path_from_url
+from ppgan.utils.visual import tensor2img, save_image, make_grid
+
+pretrained_weights_url = {
+    'trees': 'https://paddlegan.bj.bcebos.com/models/singan_universal_trees.pdparams',
+    'stone': 'https://paddlegan.bj.bcebos.com/models/singan_universal_stone.pdparams',
+    'mountains': 'https://paddlegan.bj.bcebos.com/models/singan_universal_mountains.pdparams',
+    'birds': 'https://paddlegan.bj.bcebos.com/models/singan_universal_birds.pdparams',
+    'lightning': 'https://paddlegan.bj.bcebos.com/models/singan_universal_lightning.pdparams'
+}
+
+
+def imread(path):
+    return cv2.cvtColor(
+        cv2.imread(
+            path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
+
+def imgpath2tensor(path):
+    return paddle.to_tensor(T.Compose([
+        T.Transpose(),
+        T.Normalize(127.5, 127.5)
+    ])(imread(path))).unsqueeze(0)
+    
+def dilate_mask(mask, mode):
+    if mode == "harmonization":
+        element = skimage.morphology.disk(radius=7)
+    elif mode == "editing":
+        element = skimage.morphology.disk(radius=20)
+    else:
+        raise NotImplementedError('mode %s is not implemented' % mode)
+    mask = skimage.morphology.binary_dilation(mask, selem=element)
+    mask = skimage.filters.gaussian(mask, sigma=5)
+    return mask
+
+class SinGANPredictor(BasePredictor):
+    def __init__(self,
+                 output_path='output_dir',
+                 weight_path=None,
+                 pretrained_model=None,
+                 seed=None):
+        self.output_path = output_path
+        if weight_path is None:
+            if pretrained_model in pretrained_weights_url.keys():
+                weight_path = get_path_from_url(
+                    pretrained_weights_url[pretrained_model])
+            else:
+                raise ValueError(
+                    'Predictor need a weight path or a pretrained model.')
+        checkpoint = paddle.load(weight_path)
+
+        self.scale_num = checkpoint['scale_num'].item()
+        self.coarsest_shape = checkpoint['coarsest_shape'].tolist()
+        self.nfc_init = checkpoint['nfc_init'].item()
+        self.min_nfc_init = checkpoint['min_nfc_init'].item()
+        self.num_layers = checkpoint['num_layers'].item()
+        self.ker_size = checkpoint['ker_size'].item()
+        self.noise_zero_pad = checkpoint['noise_zero_pad'].item()
+        self.generator = SinGANGenerator(self.scale_num, 
+                                         self.coarsest_shape, 
+                                         self.nfc_init, 
+                                         self.min_nfc_init, 
+                                         3, 
+                                         self.num_layers, 
+                                         self.ker_size, 
+                                         self.noise_zero_pad)
+        self.generator.set_state_dict(checkpoint)
+        self.generator.eval()
+        self.scale_factor = self.generator.scale_factor.item()
+        self.niose_pad_size = 0 if self.noise_zero_pad \
+                                else self.generator._pad_size
+        if seed is not None:
+            paddle.seed(seed)
+
+    def noise_like(self, x):
+        return paddle.randn(pad_shape(x.shape, self.niose_pad_size))
+
+    def run(self, 
+            mode='random_sample', 
+            generate_start_scale=0, 
+            scale_h=1.0, 
+            scale_v=1.0, 
+            ref_image=None,
+            mask_image=None,
+            sr_factor=4,
+            animation_alpha=0.9,
+            animation_beta=0.9,
+            animation_frames=20,
+            animation_duration=0.1,
+            n_row=5,
+            n_col=3):
+
+        # check config
+        if mode not in ['random_sample', 
+                        'sr', 'animation', 
+                        'harmonization', 
+                        'editing', 'paint2image']:
+            raise ValueError(
+                'Only random_sample, sr, animation, harmonization, \
+                 editing and paint2image is implemented.')
+        if mode in ['sr', 'harmonization', 'editing', 'paint2image'] and \
+           ref_image is None:
+            raise ValueError(
+                'When mode is sr, harmonization, editing, or \
+                 paint2image, a reference image must be privided.')
+        if mode in ['harmonization', 'editing'] and mask_image is None:
+            raise ValueError(
+                'When mode is harmonization or editing, \
+                 a mask image must be privided.')
+
+        if mode == 'animation':
+            batch_size = animation_frames
+        elif mode == 'random_sample':
+            batch_size = n_row * n_col
+        else:
+            batch_size = 1
+
+        # prepare input
+        if mode == 'harmonization' or mode == 'editing' or mode == 'paint2image':
+            ref = imgpath2tensor(ref_image)
+            x_init = F.interpolate(
+                ref, None, 
+                self.scale_factor ** (self.scale_num - generate_start_scale), 
+                'bicubic')
+            x_init = F.interpolate(
+                x_init, None, 1 / self.scale_factor, 'bicubic')
+        elif mode == 'sr':
+            ref = imgpath2tensor(ref_image)
+            sr_iters = math.ceil(math.log(sr_factor, 1 / self.scale_factor))
+            sr_scale_factor = sr_factor ** (1 / sr_factor)
+            x_init = F.interpolate(ref, None, sr_scale_factor, 'bicubic')
+        else:
+            x_init = paddle.zeros([
+                batch_size,
+                self.coarsest_shape[1],
+                int(self.coarsest_shape[2] * scale_v),
+                int(self.coarsest_shape[3] * scale_h)])
+
+        # forward
+        if mode == 'sr':
+            for _ in range(sr_iters):
+                out = self.generator([self.noise_like(x_init)], x_init, -1, -1)
+                x_init = F.interpolate(out, None, sr_scale_factor, 'bicubic')
+        else:
+            z_pyramid = [
+                self.noise_like(
+                    F.interpolate(
+                        x_init, None, 1 / self.scale_factor ** i)) 
+                for i in range(self.scale_num - generate_start_scale)]
+
+            if mode == 'animation':
+                a = animation_alpha
+                b = animation_beta
+                for i in range(len(z_pyramid)):
+                    z = paddle.chunk(z_pyramid[i], batch_size)
+                    if i == 0 and generate_start_scale == 0:
+                        z_0 = F.interpolate(
+                            self.generator.z_fixed, 
+                            pad_shape(x_init.shape[-2:], self.niose_pad_size), 
+                            None, 'bicubic')
+                    else:
+                        z_0 = 0
+                    z_1 = z_0
+                    z_2 = 0.95 * z_1 + 0.05 * z[0]
+                    for j in range(len(z)):
+                        z[j] = a * z_0 + (1 - a) * (z_2 + b * (z_2 - z_1) + (1 - b) * z[j])
+                        z_1 = z_2
+                        z_2 = z[j]
+                    z = paddle.concat(z)
+                    z_pyramid[i] = z
+
+            out = self.generator(z_pyramid, x_init, self.scale_num - 1, generate_start_scale)
+        
+        # postprocess and save    
+        os.makedirs(self.output_path, exist_ok=True)
+        if mode == 'animation':
+            frames = [tensor2img(x) for x in out.chunk(animation_frames)]
+            imageio.mimsave(
+                os.path.join(self.output_path, 'animation.gif'), 
+                frames, 'GIF', duration=animation_duration)
+        else:
+            if mode == 'harmonization' or mode == 'editing':
+                mask = cv2.imread(mask_image, cv2.IMREAD_GRAYSCALE)
+                mask = paddle.to_tensor(dilate_mask(mask, mode), 'float32')
+                out = F.interpolate(out, mask.shape, None, 'bicubic')
+                out = (1 - mask) * ref + mask * out
+            elif mode == 'sr':
+                out = F.interpolate(
+                    out, 
+                    [ref.shape[-2] * sr_factor, ref.shape[-1] * sr_factor],
+                    None, 'bicubic')
+            elif mode == 'paint2image':
+                out = F.interpolate(out, ref.shape[-2:], None, 'bicubic')
+            elif mode == 'random_sample':
+                out = make_grid(out, n_row)
+
+            save_image(tensor2img(out), os.path.join(self.output_path, mode + '.png'))
--- a/ppgan/datasets/__init__.py
+++ b/ppgan/datasets/__init__.py
@@ -29,3 +29,4 @@ from .vsr_reds_multiple_gt_dataset import VSRREDSMultipleGTDataset
 from .vsr_vimeo90k_dataset import VSRVimeo90KDataset
 from .vsr_folder_dataset import VSRFolderDataset
 from .photopen_dataset import PhotoPenDataset
+from .empty_dataset import EmptyDataset
--- a/ppgan/datasets/empty_dataset.py
+++ b/ppgan/datasets/empty_dataset.py
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .base_dataset import BaseDataset
+from .builder import DATASETS
+
+
+@DATASETS.register()
+class EmptyDataset(BaseDataset):
+    '''
+    Dataset for models who don't need a dataset.
+    '''
+    def __init__(self, size=1):
+        super().__init__()
+        self.size = size
+        self.data_infos = self.prepare_data_infos()
+    
+    def prepare_data_infos(self):
+        return [{i: 0} for i in range(self.size)]
--- a/ppgan/models/__init__.py
+++ b/ppgan/models/__init__.py
@@ -34,3 +34,4 @@ from .basicvsr_model import BasicVSRModel
 from .mpr_model import MPRModel
 from .photopen_model import PhotoPenModel
 from .msvsr_model import MultiStageVSRModel
+from .singan_model import SinGANModel
--- a/ppgan/models/criterions/__init__.py
+++ b/ppgan/models/criterions/__init__.py
@@ -4,5 +4,6 @@ from .pixel_loss import L1Loss, MSELoss, CharbonnierLoss, \
                        CalcStyleEmdLoss, CalcContentReltLoss, \
                        CalcContentLoss, CalcStyleLoss, EdgeLoss
 from .photopen_perceptual_loss import PhotoPenPerceptualLoss
+from .gradient_penalty import GradientPenalty

 from .builder import build_criterion
--- a/ppgan/models/criterions/gradient_penalty.py
+++ b/ppgan/models/criterions/gradient_penalty.py
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from .builder import CRITERIONS
+
+
+@CRITERIONS.register()
+class GradientPenalty():
+    def __init__(self, loss_weight=1.0):
+        self.loss_weight = loss_weight
+        
+    def __call__(self, net, real, fake):
+        batch_size = real.shape[0]
+        alpha = paddle.rand([batch_size])
+        for _ in range(real.ndim - 1):
+            alpha = paddle.unsqueeze(alpha, -1)
+        interpolate = alpha * real + (1 - alpha) * fake
+        interpolate.stop_gradient = False
+        interpolate_pred = net(interpolate)
+        gradient = paddle.grad(outputs=interpolate_pred, 
+                               inputs=interpolate,
+                               grad_outputs=paddle.ones_like(interpolate_pred),
+                               create_graph=True, 
+                               retain_graph=True, 
+                               only_inputs=True)[0]
+        gradient_penalty = ((gradient.norm(2, 1) - 1) ** 2).mean()
+        return gradient_penalty * self.loss_weight
--- a/ppgan/models/discriminators/__init__.py
+++ b/ppgan/models/discriminators/__init__.py
@@ -24,3 +24,4 @@ from .discriminator_starganv2 import StarGANv2Discriminator
 from .discriminator_firstorder import FirstOrderDiscriminator
 from .discriminator_lapstyle import LapStyleDiscriminator
 from .discriminator_photopen import MultiscaleDiscriminator
+from .discriminator_singan import SinGANDiscriminator
--- a/ppgan/models/discriminators/discriminator_singan.py
+++ b/ppgan/models/discriminators/discriminator_singan.py
+# code was based on https://github.com/tamarott/SinGAN
+
+import paddle.nn as nn
+
+from ..generators.generator_singan import ConvBlock
+from .builder import DISCRIMINATORS
+
+
+@DISCRIMINATORS.register()
+class SinGANDiscriminator(nn.Layer):
+    def __init__(self, 
+                 nfc=32, 
+                 min_nfc=32, 
+                 input_nc=3, 
+                 num_layers=5, 
+                 ker_size=3, 
+                 padd_size=0):
+        super(SinGANDiscriminator, self).__init__()
+        self.head = ConvBlock(input_nc, nfc, ker_size, padd_size, 1)
+        self.body = nn.Sequential()
+        for i in range(num_layers - 2):
+            N = int(nfc / pow(2, (i + 1)))
+            block = ConvBlock(max(2 * N, min_nfc), max(N, min_nfc), ker_size, padd_size, 1)
+            self.body.add_sublayer('block%d' % (i + 1), block)
+        self.tail = nn.Conv2D(max(N, min_nfc), 1, ker_size, 1, padd_size)
+
+    def forward(self, x):
+        x = self.head(x)
+        x = self.body(x)
+        x = self.tail(x)
+        return x
--- a/ppgan/models/generators/__init__.py
+++ b/ppgan/models/generators/__init__.py
@@ -38,3 +38,4 @@ from .pan import PAN
 from .generater_photopen import SPADEGenerator
 from .basicvsr_plus_plus import BasicVSRPlusPlus
 from .msvsr import MSVSR
+from .generator_singan import SinGANGenerator
--- a/ppgan/models/generators/generator_singan.py
+++ b/ppgan/models/generators/generator_singan.py
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# code was based on https://github.com/tamarott/SinGAN
+
+import math
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from .builder import GENERATORS
+
+
+class ConvBlock(nn.Sequential):
+    def __init__(self, in_channel, out_channel, ker_size, padd, stride):
+        super(ConvBlock,self).__init__()
+        self.add_sublayer('conv', nn.Conv2D(in_channel ,out_channel, ker_size, stride, padd)),
+        self.add_sublayer('norm', nn.BatchNorm2D(out_channel)),
+        self.add_sublayer('LeakyRelu', nn.LeakyReLU(0.2))
+
+class GeneratorConcatSkip2CleanAdd(nn.Layer):
+    def __init__(self, nfc=32, min_nfc=32, input_nc=3, num_layers=5, ker_size=3, padd_size=0):
+        super(GeneratorConcatSkip2CleanAdd, self).__init__()
+        self.head = ConvBlock(input_nc, nfc, ker_size, padd_size, 1)
+        self.body = nn.Sequential()
+        for i in range(num_layers - 2):
+            N = int(nfc / pow(2, i + 1))
+            block = ConvBlock(max(2 * N, min_nfc), max(N, min_nfc), ker_size, padd_size, 1)
+            self.body.add_sublayer('block%d' % (i + 1), block)
+        self.tail = nn.Sequential(
+            nn.Conv2D(max(N, min_nfc), input_nc, ker_size, 1, padd_size),
+            nn.Tanh())
+    def forward(self, x, y):
+        x = self.head(x)
+        x = self.body(x)
+        x = self.tail(x)
+        ind = int((y.shape[2] - x.shape[2]) / 2)
+        y = y[:, :, ind: (y.shape[2] - ind), ind: (y.shape[3] - ind)]
+        return x + y
+
+@GENERATORS.register()
+class SinGANGenerator(nn.Layer):
+    def __init__(self, 
+                 scale_num, 
+                 coarsest_shape, 
+                 nfc_init=32, 
+                 min_nfc_init=32, 
+                 input_nc=3, 
+                 num_layers=5, 
+                 ker_size=3, 
+                 noise_zero_pad=True):
+        super().__init__()
+        nfc_list = [min(nfc_init * pow(2, math.floor(i / 4)), 128) for i in range(scale_num)]
+        min_nfc_list = [min(min_nfc_init * pow(2, math.floor(i / 4)), 128) for i in range(scale_num)]
+        self.generators = nn.LayerList([
+            GeneratorConcatSkip2CleanAdd(
+                nfc, min_nfc, input_nc, num_layers, 
+                ker_size, 0
+            ) for nfc, min_nfc in zip(nfc_list, min_nfc_list)])
+        self._scale_num = scale_num
+        self._pad_size = int((ker_size - 1) / 2 * num_layers)
+        self.noise_pad = nn.Pad2D(self._pad_size if noise_zero_pad else 0)
+        self.image_pad = nn.Pad2D(self._pad_size)
+        self._noise_zero_pad = noise_zero_pad
+        self._coarsest_shape = coarsest_shape
+        self.register_buffer('scale_num', paddle.to_tensor(scale_num, 'int32'), True)
+        self.register_buffer('coarsest_shape', paddle.to_tensor(coarsest_shape, 'int32'), True)
+        self.register_buffer('nfc_init', paddle.to_tensor(nfc_init, 'int32'), True)
+        self.register_buffer('min_nfc_init', paddle.to_tensor(min_nfc_init, 'int32'), True)
+        self.register_buffer('num_layers', paddle.to_tensor(num_layers, 'int32'), True)
+        self.register_buffer('ker_size', paddle.to_tensor(ker_size, 'int32'), True)
+        self.register_buffer('noise_zero_pad', paddle.to_tensor(noise_zero_pad, 'bool'), True)
+        self.register_buffer('sigma', paddle.ones([scale_num]), True)
+        self.register_buffer('scale_factor', paddle.ones([1]), True)
+        self.register_buffer(
+            'z_fixed', 
+            paddle.randn(
+                F.pad(
+                    paddle.zeros(coarsest_shape), 
+                    [0 if noise_zero_pad else self._pad_size] * 4).shape), True)
+
+    def forward(self, z_pyramid, x_prev, stop_scale, start_scale=0):
+        stop_scale %= self._scale_num
+        start_scale %= self._scale_num
+        for i, scale in enumerate(range(start_scale, stop_scale + 1)):
+            x_prev = self.image_pad(x_prev)
+            z = self.noise_pad(z_pyramid[i] * self.sigma[scale]) + x_prev
+            x_prev = self.generators[scale](
+                z.detach(), 
+                x_prev.detach()
+            )
+            if scale < stop_scale:
+                x_prev = F.interpolate(x_prev, 
+                    F.pad(z_pyramid[i + 1], [0 if self._noise_zero_pad else -self._pad_size] * 4).shape[-2:],
+                    None, 'bicubic')
+        return x_prev
--- a/ppgan/models/singan_model.py
+++ b/ppgan/models/singan_model.py
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import cv2
+import math
+import warnings
+from collections import OrderedDict
+from sklearn.cluster import KMeans
+
+import paddle
+import paddle.nn.functional as F
+import paddle.vision.transforms as T
+
+from .base_model import BaseModel
+from .builder import MODELS
+from .generators.builder import build_generator
+from .criterions.builder import build_criterion
+from .discriminators.builder import build_discriminator
+from ..solver import build_lr_scheduler, build_optimizer
+warnings.filterwarnings('ignore', category=DeprecationWarning)
+warnings.filterwarnings('ignore', category=UserWarning)
+
+
+def pad_shape(shape, pad_size):
+    shape[-2] += 2 * pad_size
+    shape[-1] += 2 * pad_size
+    return shape
+
+def quant(x, num):
+    n, c, h, w = x.shape
+    kmeans = KMeans(num, random_state=0).fit(x.transpose([0, 2, 3, 1]).reshape([-1, c]))
+    centers = kmeans.cluster_centers_
+    x = centers[kmeans.labels_].reshape([n, h, w, c]).transpose([0, 3, 1, 2])
+    return paddle.to_tensor(x, 'float32'), centers
+
+def quant_to_centers(x, centers):
+    n, c, h, w = x.shape
+    num = centers.shape[0]
+    kmeans = KMeans(num, init=centers, n_init=1).fit(x.transpose([0, 2, 3, 1]).reshape([-1, c]))
+    x = centers[kmeans.labels_].reshape([n, h, w, c]).transpose([0, 3, 1, 2])
+    return paddle.to_tensor(x, 'float32')
+
+@MODELS.register()
+class SinGANModel(BaseModel):
+    def __init__(self,
+                 generator,
+                 discriminator,
+                 gan_criterion=None,
+                 recon_criterion=None,
+                 gp_criterion=None,
+                 train_image=None,
+                 scale_factor=0.75,
+                 min_size=25,
+                 is_finetune=False,
+                 finetune_scale=1,
+                 color_num=5,
+                 gen_iters=3,
+                 disc_iters=3,
+                 noise_amp_init=0.1):
+        super(SinGANModel, self).__init__()
+        
+        # setup config
+        self.gen_iters = gen_iters
+        self.disc_iters = disc_iters
+        self.min_size = min_size
+        self.is_finetune = is_finetune
+        self.noise_amp_init = noise_amp_init
+        self.train_image = T.Compose([
+            T.Transpose(),
+            T.Normalize(127.5, 127.5)
+        ])(cv2.cvtColor(cv2.imread(train_image, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB))
+        self.train_image = paddle.to_tensor(self.train_image).unsqueeze(0)
+        self.scale_num = math.ceil(math.log(
+            self.min_size / min(self.train_image.shape[-2:]), 
+            scale_factor)) + 1
+        self.scale_factor = math.pow(
+            self.min_size / min(self.train_image.shape[-2:]), 
+            1 / (self.scale_num - 1))
+        self.reals = [
+            F.interpolate(self.train_image, None, self.scale_factor ** i, 'bicubic')
+        for i in range(self.scale_num - 1, -1, -1)]
+
+        # build generator
+        generator['scale_num'] = self.scale_num
+        generator['coarsest_shape'] =self.reals[0].shape
+        self.nets['netG'] = build_generator(generator)
+        self.niose_pad_size = 0 if generator.get('noise_zero_pad', True) \
+                                else self.nets['netG']._pad_size
+        self.nets['netG'].scale_factor = paddle.to_tensor(self.scale_factor, 'float32')
+
+        # build discriminator
+        nfc_init = discriminator.pop('nfc_init', 32)
+        min_nfc_init = discriminator.pop('min_nfc_init', 32)
+        for i in range(self.scale_num):
+            discriminator['nfc'] = min(nfc_init * pow(2, math.floor(i / 4)), 128)
+            discriminator['min_nfc'] = min(min_nfc_init * pow(2, math.floor(i / 4)), 128)
+            self.nets[f'netD{i}'] = build_discriminator(discriminator)
+
+        # build criterion
+        self.gan_criterion = build_criterion(gan_criterion)
+        self.recon_criterion = build_criterion(recon_criterion)
+        self.gp_criterion = build_criterion(gp_criterion)
+        
+        if self.is_finetune:
+            self.finetune_scale = finetune_scale
+            self.quant_real, self.quant_centers = quant(self.reals[finetune_scale], color_num)
+
+
+        # setup training config
+        self.lr_schedulers = OrderedDict()
+        self.current_scale = (finetune_scale if self.is_finetune else 0) - 1
+        self.current_iter = 0
+
+    def set_total_iter(self, total_iter):
+        super().set_total_iter(total_iter)
+        if self.is_finetune:
+            self.scale_iters = total_iter
+        else:
+            self.scale_iters = math.ceil(total_iter / self.scale_num)
+
+    def setup_lr_schedulers(self, cfg):
+        for i in range(self.scale_num):
+            self.lr_schedulers[f"lr{i}"] = build_lr_scheduler(cfg)
+        return self.lr_schedulers
+
+    def setup_optimizers(self, lr_schedulers, cfg):
+        for i in range(self.scale_num):
+            self.optimizers[f'optim_netG{i}'] = build_optimizer(
+                cfg['optimizer_G'], lr_schedulers[f"lr{i}"], self.nets[f'netG'].generators[i].parameters())
+            self.optimizers[f'optim_netD{i}'] = build_optimizer(
+                cfg['optimizer_D'], lr_schedulers[f"lr{i}"], self.nets[f'netD{i}'].parameters())
+        return self.optimizers
+
+    def setup_input(self, input):
+        pass
+
+    def backward_D(self):
+        self.loss_D_real = self.gan_criterion(self.pred_real, True, True)
+        self.loss_D_fake = self.gan_criterion(self.pred_fake, False, True)
+        self.loss_D_gp = self.gp_criterion(self.nets[f'netD{self.current_scale}'], 
+                                           self.real_img, 
+                                           self.fake_img)
+        self.loss_D = self.loss_D_real + self.loss_D_fake + self.loss_D_gp
+        self.loss_D.backward()
+
+        self.losses[f'scale{self.current_scale}/D_total_loss'] = self.loss_D
+        self.losses[f'scale{self.current_scale}/D_real_loss'] = self.loss_D_real
+        self.losses[f'scale{self.current_scale}/D_fake_loss'] = self.loss_D_fake
+        self.losses[f'scale{self.current_scale}/D_gradient_penalty'] = self.loss_D_gp
+        
+    def backward_G(self):
+        self.loss_G_gan = self.gan_criterion(self.pred_fake, True, False)
+        self.loss_G_recon = self.recon_criterion(self.recon_img, self.real_img)
+        self.loss_G = self.loss_G_gan + self.loss_G_recon
+        self.loss_G.backward()
+
+        self.losses[f'scale{self.current_scale}/G_adv_loss'] = self.loss_G_gan
+        self.losses[f'scale{self.current_scale}/G_recon_loss'] = self.loss_G_recon
+
+    def scale_prepare(self):
+        self.real_img = self.reals[self.current_scale]
+        self.lr_scheduler = self.lr_schedulers[f"lr{self.current_scale}"]
+        for i in range(self.current_scale):
+            self.optimizers.pop(f'optim_netG{i}', None)
+            self.optimizers.pop(f'optim_netD{i}', None)
+        self.losses.clear()
+        self.visual_items.clear()
+        self.visual_items[f'real_img_scale{self.current_scale}'] = self.real_img
+        if self.is_finetune:
+            self.visual_items['quant_real'] = self.quant_real
+        
+        self.recon_prev = paddle.zeros_like(self.reals[0])
+        if self.current_scale > 0:
+            z_pyramid = []
+            for i in range(self.current_scale):
+                if i == 0:
+                    z = self.nets['netG'].z_fixed
+                else:
+                    z = paddle.zeros(
+                        pad_shape(
+                            self.reals[i].shape, self.niose_pad_size))
+                z_pyramid.append(z)
+            self.recon_prev = self.nets['netG'](
+                z_pyramid, self.recon_prev, 
+                self.current_scale - 1, 0).detach()
+            self.recon_prev = F.interpolate(
+                self.recon_prev, self.real_img.shape[-2:], None, 'bicubic')
+            if self.is_finetune:
+                self.recon_prev = quant_to_centers(self.recon_prev, self.quant_centers)
+            self.nets['netG'].sigma[self.current_scale] = F.mse_loss(
+                self.real_img, self.recon_prev
+            ).sqrt() * self.noise_amp_init
+
+        for i in range(self.scale_num):
+            self.set_requires_grad(self.nets['netG'].generators[i], i == self.current_scale)
+        
+    def forward(self):
+        if not self.is_finetune:
+            self.fake_img = self.nets['netG'](
+                self.z_pyramid, 
+                paddle.zeros(
+                    pad_shape(self.z_pyramid[0].shape, -self.niose_pad_size)), 
+                self.current_scale, 0)
+        else:
+            x_prev = self.nets['netG'](
+                self.z_pyramid[:self.finetune_scale], 
+                paddle.zeros(
+                    pad_shape(self.z_pyramid[0].shape, -self.niose_pad_size)), 
+                self.finetune_scale - 1, 0)
+            x_prev = F.interpolate(x_prev, self.z_pyramid[self.finetune_scale].shape[-2:], None, 'bicubic')
+            x_prev_quant = quant_to_centers(x_prev, self.quant_centers)
+            self.fake_img = self.nets['netG'](
+                self.z_pyramid[self.finetune_scale:], 
+                x_prev_quant, 
+                self.current_scale, self.finetune_scale)
+
+        self.recon_img = self.nets['netG'](
+            [(paddle.randn if self.current_scale == 0 else paddle.zeros)(
+                pad_shape(self.real_img.shape, self.niose_pad_size))],
+            self.recon_prev, 
+            self.current_scale, 
+            self.current_scale)
+
+        self.pred_real = self.nets[f'netD{self.current_scale}'](self.real_img)
+        self.pred_fake = self.nets[f'netD{self.current_scale}'](
+            self.fake_img.detach() if self.update_D else self.fake_img)
+
+        self.visual_items[f'fake_img_scale{self.current_scale}'] = self.fake_img
+        self.visual_items[f'recon_img_scale{self.current_scale}'] = self.recon_img
+        if self.is_finetune:
+            self.visual_items[f'prev_img_scale{self.current_scale}'] = x_prev
+            self.visual_items[f'quant_prev_img_scale{self.current_scale}'] = x_prev_quant
+
+    def train_iter(self, optimizers=None):
+        if self.current_iter % self.scale_iters == 0:
+            self.current_scale += 1
+            self.scale_prepare()
+
+        self.z_pyramid = [paddle.randn(
+            pad_shape(self.reals[i].shape, self.niose_pad_size))
+        for i in range(self.current_scale + 1)]
+
+        self.update_D = (self.current_iter % (self.disc_iters + self.gen_iters) < self.disc_iters)
+        self.set_requires_grad(self.nets[f'netD{self.current_scale}'], self.update_D)
+        self.forward()
+        if self.update_D:
+            optimizers[f'optim_netD{self.current_scale}'].clear_grad()
+            self.backward_D()
+            optimizers[f'optim_netD{self.current_scale}'].step()
+        else:
+            optimizers[f'optim_netG{self.current_scale}'].clear_grad()
+            self.backward_G()
+            optimizers[f'optim_netG{self.current_scale}'].step()
+
+        self.current_iter += 1
+
+    def test_iter(self, metrics=None):
+        z_pyramid = [paddle.randn(
+            pad_shape(self.reals[i].shape, self.niose_pad_size))
+        for i in range(self.scale_num)]
+        self.nets['netG'].eval()
+        fake_img = self.nets['netG'](
+            z_pyramid, 
+            paddle.zeros(pad_shape(z_pyramid[0].shape, -self.niose_pad_size)), 
+            self.scale_num - 1, 0)
+        self.visual_items['fake_img_test'] = fake_img
+        with paddle.no_grad():
+            if metrics is not None:
+                for metric in metrics.values():
+                    metric.update(fake_img, self.train_image)
+        self.nets['netG'].train()
+        
+    class InferGenerator(paddle.nn.Layer):
+        def set_config(self, generator, noise_shapes, scale_num):
+            self.generator = generator
+            self.noise_shapes = noise_shapes
+            self.scale_num = scale_num
+
+        def forward(self, x):
+            coarsest_shape = self.generator._coarsest_shape
+            z_pyramid = [paddle.randn(shp) for shp in self.noise_shapes]
+            x_init = paddle.zeros(coarsest_shape)
+            out = self.generator(z_pyramid, x_init, self.scale_num - 1, 0)
+            return out
+
+    def export_model(self,
+                     export_model=None,
+                     output_dir=None,
+                     inputs_size=None,
+                     export_serving_model=False):
+        noise_shapes = [pad_shape(x.shape, self.niose_pad_size) for x in self.reals]
+        infer_generator = self.InferGenerator()
+        infer_generator.set_config(self.nets['netG'], noise_shapes, self.scale_num)
+        paddle.jit.save(infer_generator,
+                        os.path.join(output_dir, "singan_random_sample"),
+                        input_spec=[1])
--- a/test_tipc/configs/singan/train_infer_python.txt
+++ b/test_tipc/configs/singan/train_infer_python.txt
+===========================train_params===========================
+model_name:singan
+python:python3.7
+gpu_list:0
+##
+auto_cast:null
+total_iters:lite_train_lite_infer=100|whole_train_whole_infer=100000
+output_dir:./output/
+snapshot_config.interval:lite_train_lite_infer=25|whole_train_whole_infer=10000
+pretrained_model:null
+train_model_name:singan*/*checkpoint.pdparams
+train_infer_img_dir:./data/stone
+null:null
+##
+trainer:norm_train
+norm_train:tools/main.py -c configs/singan_universal.yaml --seed 123 -o log_config.interval=50
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:null
+null:null
+##
+===========================infer_params===========================
+--output_dir:./output/
+load:null
+norm_export:tools/export_model.py -c configs/singan_universal.yaml --inputs_size=1 --load 
+quant_export:null 
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+inference_dir:singan_random_sample
+train_model:./inference/singan/singan_random_sample
+infer_export:null
+infer_quant:False
+inference:tools/inference.py --model_type singan --seed 123 -c configs/singan_universal.yaml --output_path test_tipc/output/
+--device:cpu
+null:null
+null:null
+null:null
+null:null
+null:null
+--model_path:
+null:null
+null:null
+--benchmark:True
+null:null
\ No newline at end of file
--- a/test_tipc/docs/test_train_inference_python.md
+++ b/test_tipc/docs/test_train_inference_python.md
@@ -15,6 +15,7 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho
 | FOMM |FOMM | 生成  | 支持 | 多机多卡  | | |
 | BasicVSR |BasicVSR | 超分  | 支持 | 多机多卡  | | |
 |PP-MSVSR|PP-MSVSR | 超分|
+|SinGAN|SinGAN | 生成|支持|

 - 预测相关：预测功能汇总如下，

@@ -55,25 +56,25 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho
 ```shell
 bash test_tipc/prepare.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'lite_train_lite_infer'
 bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'lite_train_lite_infer'
-```  
+```

 - 模式2：lite_train_whole_infer，使用少量数据训练，一定量数据预测，用于验证训练后的模型执行预测，预测速度是否合理；
 ```shell
 bash test_tipc/prepare.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'lite_train_whole_infer'
 bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'lite_train_whole_infer'
-```  
+```

 - 模式3：whole_infer，不训练，全量数据预测，走通开源模型评估、动转静，检查inference model预测时间和精度;
 ```shell
 bash test_tipc/prepare.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'whole_infer'
 bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'whole_infer'
-```  
+```

 - 模式4：whole_train_whole_infer，CE： 全量数据训练，全量数据预测，验证模型训练精度，预测精度，预测速度；
 ```shell
 bash test_tipc/prepare.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'whole_train_whole_infer'
 bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'whole_train_whole_infer'
-```  
+```

 运行相应指令后，在`test_tipc/output`文件夹下自动会保存运行日志。如'lite_train_lite_infer'模式下，会运行训练+inference的链条，因此，在`test_tipc/output`文件夹有以下文件：
 ```

--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -66,7 +66,13 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
        rm -rf ./data/DIV2K*
        wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/DIV2KandSet14.tar --no-check-certificate
        cd ./data/ && tar xf DIV2KandSet14.tar && cd ../ ;;
-    esac   
+    singan)
+        rm -rf ./data/SinGAN*
+        wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/singan-official_images.zip --no-check-certificate
+        cd ./data/ && unzip -q singan-official_images.zip && cd ../ ;;
+        mkdir -p ./data/singan
+        mv ./data/SinGAN-official_images/Images/stone.png ./data/singan
+    esac 
 elif [ ${MODE} = "whole_train_whole_infer" ];then
    if [ ${model_name} == "pix2pix" ]; then
        rm -rf ./data/facades*
@@ -76,6 +82,12 @@ elif [ ${MODE} = "whole_train_whole_infer" ];then
        rm -rf ./data/horse2zebra*
        wget -nc -P ./data/ https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/horse2zebra.zip --no-check-certificate
        cd ./data/ && unzip horse2zebra.zip && cd ../
+    elif [ ${model_name} == "singan" ]; then
+        rm -rf ./data/SinGAN*
+        wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/singan-official_images.zip --no-check-certificate
+        cd ./data/ && unzip -q singan-official_images.zip && cd ../
+        mkdir -p ./data/singan
+        mv ./data/SinGAN-official_images/Images/stone.png ./data/singan
    fi
 elif [ ${MODE} = "lite_train_whole_infer" ];then
    if [ ${model_name} == "pix2pix" ]; then
@@ -102,6 +114,12 @@ elif [ ${MODE} = "lite_train_whole_infer" ];then
        rm -rf ./data/reds*
        wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/reds_lite.tar --no-check-certificate
        cd ./data/ && tar xf reds_lite.tar && cd ../
+    elif [ ${model_name} == "singan" ]; then
+        rm -rf ./data/SinGAN*
+        wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/singan-official_images.zip --no-check-certificate
+        cd ./data/ && unzip -q singan-official_images.zip && cd ../
+        mkdir -p ./data/singan
+        mv ./data/SinGAN-official_images/Images/stone.png ./data/singan
    fi
 elif [ ${MODE} = "whole_infer" ];then
    if [ ${model_name} = "pix2pix" ]; then
@@ -145,6 +163,14 @@ elif [ ${MODE} = "whole_infer" ];then
        wget -nc  -P ./inference https://paddlegan.bj.bcebos.com/static_model/msvsr.tar --no-check-certificate
        cd ./inference && tar xf msvsr.tar && cd ../
        cd ./data/ && tar xf reds_lite.tar && cd ../
+    elif [ ${model_name} == "singan" ]; then
+        rm -rf ./data/SinGAN*
+        wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/singan-official_images.zip --no-check-certificate
+        wget -nc -P ./inference https://paddlegan.bj.bcebos.com/datasets/singan.zip --no-check-certificate
+        cd ./data/ && unzip -q singan-official_images.zip && cd ../
+        cd ./inference/ && unzip -q singan.zip && cd ../
+        mkdir -p ./data/singan
+        mv ./data/SinGAN-official_images/Images/stone.png ./data/singan
    fi
    
 fi
--- a/test_tipc/readme.md
+++ b/test_tipc/readme.md
@@ -25,6 +25,7 @@
 | FOMM |FOMM | 生成  | 支持 | 多机多卡  | | |
 | BasicVSR |BasicVSR | 超分  | 支持 | 多机多卡  | | |
 |PP-MSVSR|PP-MSVSR | 超分|
+|SinGAN|SinGAN | 生成| 支持 |




--- a/test_tipc/results/python_singan_results_fp32.txt
+++ b/test_tipc/results/python_singan_results_fp32.txt
+Metric fid: 124.0369
--- a/tools/inference.py
+++ b/tools/inference.py
@@ -15,7 +15,7 @@ from ppgan.metrics import build_metric


 MODEL_CLASSES = ["pix2pix", "cyclegan", "wav2lip", "esrgan", \
-                 "edvr", "fom", "stylegan2", "basicvsr", "msvsr"]
+                 "edvr", "fom", "stylegan2", "basicvsr", "msvsr", "singan"]


 def parse_args():
@@ -304,6 +304,15 @@ def main():
            metric_file = os.path.join(args.output_path, model_type, "metric.txt")
            for metric in metrics.values():
                metric.update(out_img, gt_img, is_seq=True)
+        elif model_type == "singan":
+            predictor.run()
+            prediction = output_handle.copy_to_cpu()
+            prediction = paddle.to_tensor(prediction)
+            image_numpy = tensor2img(prediction, min_max)
+            save_image(image_numpy, os.path.join(args.output_path, "singan/{}.png".format(i)))
+            metric_file = os.path.join(args.output_path, "singan/metric.txt")
+            for metric in metrics.values():
+                metric.update(prediction, data['A'])

    if metrics:
        log_file = open(metric_file, 'a')