diff --git a/applications/tools/singan.py b/applications/tools/singan.py new file mode 100755 index 0000000000000000000000000000000000000000..c668047bd31063e7c892ad41f105c195effd8333 --- /dev/null +++ b/applications/tools/singan.py @@ -0,0 +1,133 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + +import paddle +from ppgan.apps import SinGANPredictor + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--output_path", + type=str, + default='output_dir', + help="path to output image dir") + + parser.add_argument("--weight_path", + type=str, + default=None, + help="path to model checkpoint path") + + parser.add_argument("--pretrained_model", + type=str, + default=None, + help="a pretianed model, only trees, stone, mountains, birds, and lightning are implemented.") + + parser.add_argument("--mode", + type=str, + default="random_sample", + help="type of model for loading pretrained model") + + parser.add_argument("--generate_start_scale", + type=int, + default=0, + help="sample random seed for model's image generation") + + parser.add_argument("--seed", + type=int, + default=None, + help="sample random seed for model's image generation") + + parser.add_argument("--scale_h", + type=float, + default=1.0, + help="horizontal scale") + + parser.add_argument("--scale_v", + type=float, + default=1.0, + help="vertical scale") + + parser.add_argument("--ref_image", + type=str, + default=None, + help="reference image for harmonization, editing and paint2image") + + parser.add_argument("--mask_image", + type=str, + default=None, + help="mask image for harmonization and editing") + + parser.add_argument("--sr_factor", + type=float, + default=4.0, + help="scale for super resolution") + + parser.add_argument("--animation_alpha", + type=float, + default=0.9, + help="a parameter determines how close the frames of the sequence remain to the training image") + + parser.add_argument("--animation_beta", + type=float, + default=0.9, + help="a parameter controls the smoothness and rate of change in the generated clip") + + parser.add_argument("--animation_frames", + type=int, + default=20, + help="frame number of output animation when mode is animation") + + parser.add_argument("--animation_duration", + type=float, + default=0.1, + help="duration of each frame in animation") + + parser.add_argument("--n_row", + type=int, + default=5, + help="row number of output image grid") + + parser.add_argument("--n_col", + type=int, + default=3, + help="column number of output image grid") + + parser.add_argument("--cpu", + dest="cpu", + action="store_true", + help="cpu mode.") + + args = parser.parse_args() + + if args.cpu: + paddle.set_device('cpu') + + predictor = SinGANPredictor(args.output_path, + args.weight_path, + args.pretrained_model, + args.seed) + predictor.run(args.mode, + args.generate_start_scale, + args.scale_h, + args.scale_v, + args.ref_image, + args.mask_image, + args.sr_factor, + args.animation_alpha, + args.animation_beta, + args.animation_frames, + args.animation_duration, + args.n_row, + args.n_col) diff --git a/configs/singan_animation.yaml b/configs/singan_animation.yaml new file mode 100755 index 0000000000000000000000000000000000000000..e12b69f297751acc1835257658e1a560735e3730 --- /dev/null +++ b/configs/singan_animation.yaml @@ -0,0 +1,79 @@ +total_iters: 100000 +output_dir: output_dir +export_model: null + +model: + name: SinGANModel + generator: + name: SinGANGenerator + nfc_init: 32 + min_nfc_init: 32 + noise_zero_pad: False + discriminator: + name: SinGANDiscriminator + nfc_init: 32 + min_nfc_init: 32 + gan_criterion: + name: GANLoss + gan_mode: wgangp + loss_weight: 1.0 + recon_criterion: + name: MSELoss + loss_weight: 10.0 + gp_criterion: + name: GradientPenalty + loss_weight: 0.1 + train_image: data/singan/stone.png + scale_factor: 0.75 + min_size: 25 + is_finetune: False + +dataset: + train: + name: EmptyDataset + test: + name: SingleDataset + dataroot: data/singan + num_workers: 0 + batch_size: 1 + preprocess: + - name: LoadImageFromFile + key: A + - name: Transforms + input_keys: [A] + pipeline: + - name: Transpose + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] + +lr_scheduler: + name: MultiStepDecay + learning_rate: 0.0005 + milestones: [9600] + gamma: 0.1 + +optimizer: + optimizer_G: + name: Adam + beta1: 0.5 + beta2: 0.999 + optimizer_D: + name: Adam + beta1: 0.5 + beta2: 0.999 + +log_config: + interval: 100 + visiual_interval: 2000 + +snapshot_config: + interval: 10000 + +validate: + interval: -1 + save_img: True + metrics: + fid: # metric name, can be arbitrary + name: FID + batch_size: 1 diff --git a/configs/singan_finetune.yaml b/configs/singan_finetune.yaml new file mode 100755 index 0000000000000000000000000000000000000000..0a86559719c81d5704bbe70b379481245b127850 --- /dev/null +++ b/configs/singan_finetune.yaml @@ -0,0 +1,72 @@ +total_iters: 12000 +output_dir: output_dir + +model: + name: SinGANModel + generator: + name: SinGANGenerator + nfc_init: 32 + min_nfc_init: 32 + noise_zero_pad: True + discriminator: + name: SinGANDiscriminator + nfc_init: 32 + min_nfc_init: 32 + gan_criterion: + name: GANLoss + gan_mode: wgangp + loss_weight: 1.0 + recon_criterion: + name: MSELoss + loss_weight: 10.0 + gp_criterion: + name: GradientPenalty + loss_weight: 0.1 + train_image: data/singan/stone.png + scale_factor: 0.75 + min_size: 25 + is_finetune: True + finetune_scale: 1 + color_num: 5 + +dataset: + train: + name: EmptyDataset + test: + name: SingleDataset + dataroot: data/singan + num_workers: 0 + batch_size: 1 + preprocess: + - name: LoadImageFromFile + key: A + - name: Transforms + input_keys: [A] + pipeline: + - name: Transpose + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] + +lr_scheduler: + name: MultiStepDecay + learning_rate: 0.0005 + milestones: [9600] + gamma: 0.1 + +optimizer: + optimizer_G: + name: Adam + beta1: 0.5 + beta2: 0.999 + optimizer_D: + name: Adam + beta1: 0.5 + beta2: 0.999 + +log_config: + interval: 100 + visiual_interval: 2000 + +snapshot_config: + interval: 4000 diff --git a/configs/singan_sr.yaml b/configs/singan_sr.yaml new file mode 100755 index 0000000000000000000000000000000000000000..00751ef06548bf46337af0da1fbbfabc6484a003 --- /dev/null +++ b/configs/singan_sr.yaml @@ -0,0 +1,79 @@ +total_iters: 100000 +output_dir: output_dir +export_model: null + +model: + name: SinGANModel + generator: + name: SinGANGenerator + nfc_init: 32 + min_nfc_init: 32 + noise_zero_pad: True + discriminator: + name: SinGANDiscriminator + nfc_init: 32 + min_nfc_init: 32 + gan_criterion: + name: GANLoss + gan_mode: wgangp + loss_weight: 1.0 + recon_criterion: + name: MSELoss + loss_weight: 100.0 + gp_criterion: + name: GradientPenalty + loss_weight: 0.1 + train_image: data/singan/stone.png + scale_factor: 0.793701 # (1/2)^(1/3) + min_size: 18 + is_finetune: False + +dataset: + train: + name: EmptyDataset + test: + name: SingleDataset + dataroot: data/singan + num_workers: 0 + batch_size: 1 + preprocess: + - name: LoadImageFromFile + key: A + - name: Transforms + input_keys: [A] + pipeline: + - name: Transpose + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] + +lr_scheduler: + name: MultiStepDecay + learning_rate: 0.0005 + milestones: [9600] + gamma: 0.1 + +optimizer: + optimizer_G: + name: Adam + beta1: 0.5 + beta2: 0.999 + optimizer_D: + name: Adam + beta1: 0.5 + beta2: 0.999 + +log_config: + interval: 100 + visiual_interval: 2000 + +snapshot_config: + interval: 10000 + +validate: + interval: -1 + save_img: True + metrics: + fid: # metric name, can be arbitrary + name: FID + batch_size: 1 diff --git a/configs/singan_universal.yaml b/configs/singan_universal.yaml new file mode 100755 index 0000000000000000000000000000000000000000..6e50b7582e988a78cc78830599cbdbef90fa448f --- /dev/null +++ b/configs/singan_universal.yaml @@ -0,0 +1,79 @@ +total_iters: 100000 +output_dir: output_dir +export_model: null + +model: + name: SinGANModel + generator: + name: SinGANGenerator + nfc_init: 32 + min_nfc_init: 32 + noise_zero_pad: True + discriminator: + name: SinGANDiscriminator + nfc_init: 32 + min_nfc_init: 32 + gan_criterion: + name: GANLoss + gan_mode: wgangp + loss_weight: 1.0 + recon_criterion: + name: MSELoss + loss_weight: 10.0 + gp_criterion: + name: GradientPenalty + loss_weight: 0.1 + train_image: data/singan/stone.png + scale_factor: 0.75 + min_size: 25 + is_finetune: False + +dataset: + train: + name: EmptyDataset + test: + name: SingleDataset + dataroot: data/singan + num_workers: 0 + batch_size: 1 + preprocess: + - name: LoadImageFromFile + key: A + - name: Transforms + input_keys: [A] + pipeline: + - name: Transpose + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] + +lr_scheduler: + name: MultiStepDecay + learning_rate: 0.0005 + milestones: [9600] + gamma: 0.1 + +optimizer: + optimizer_G: + name: Adam + beta1: 0.5 + beta2: 0.999 + optimizer_D: + name: Adam + beta1: 0.5 + beta2: 0.999 + +log_config: + interval: 100 + visiual_interval: 2000 + +snapshot_config: + interval: 10000 + +validate: + interval: -1 + save_img: True + metrics: + fid: # metric name, can be arbitrary + name: FID + batch_size: 1 diff --git a/docs/en_US/tutorials/singan.md b/docs/en_US/tutorials/singan.md new file mode 100755 index 0000000000000000000000000000000000000000..0fbe4b30aa428f522d4476c4c2bed0c745bb3a20 --- /dev/null +++ b/docs/en_US/tutorials/singan.md @@ -0,0 +1,147 @@ +# SinGAN + +## Introduction + +SinGAN is a novel unconditional* generative model that is trained using a single image. Traditionally, GANs have been trained on class-specific datasets and capture common features among images of the same class. SinGAN, on the other hand, learns from the overlapping patches at multiple scales of a particular image and learns its internal statistics. Once trained, SinGAN can produce assorted high-quality images of arbitrary sizes and aspect ratios that semantically resemble the training image but contain new object configurations and structures. + +** An unconditional GAN creates samples purely from randomized input, while a conditional GAN generates samples based on a "class label" that controls the type of image generated.* + +## Usage + +### About Config Files + +We provide 4 config files for SinGAN model: + +- `singan_universal.yaml` +- `singan_sr.yaml` +- `singan_animation.yaml` +- `singan_finetune.yaml` + +Among them, `singan_universal.yaml` is a config file suit for all tasks, `singan_sr.yaml` is a config file for super resolution recommended by the author, `singan_animation.yaml` is a config file for animation recommended by the author. Results showed in this document were trained with `singan_universal.yaml`. For *Paint to Image*, we will get better results by finetuning with `singan_finetune.yaml` after training with `singan_universal.yaml`. + +### Train + +Start training: + +```bash +python tools/main.py -c configs/singan_universal.yaml \ + -o model.train_image=train_image.png +``` + +Finetune for "Paint2Image": + +```bash +python tools/main.py -c configs/singan_finetune.yaml \ + -o model.train_image=train_image.png \ + --load weight_saved_in_training.pdparams +``` + +### Evaluation +Running following command, a random image will be generated. It should be noted that `train_image.png` ought to be in directory `data/singan`, or you can modify the value of `dataset.test.dataroot` in config file manually. Besides, this directory must contain only one image, which is `train_image.png`. +```bash +python tools/main.py -c configs/singan_universal.yaml \ + -o model.train_image=train_image.png \ + --load weight_saved_in_training.pdparams \ + --evaluate-only +``` + +### Extract Weight for Generator + +After training, we need use ``tools/extract_weight.py`` to extract weight of generator from training model which includes both generator and discriminator. Then we can use `applications/tools/singan.py` to achieve diverse application of SinGAN. + +```bash +python tools/extract_weight.py weight_saved_in_training.pdparams --net-name netG --output weight_of_generator.pdparams +``` + +### Inference and Result + +*Attention: to use pretrained model, you can replace `--weight_path weight_of_generator.pdparams` in the following commands by `--pretrained_model `, where `` can be `trees`, `stone`, `mountains`, `birds` or `lightning`.* + +#### Random Sample + +```bash +python applications/tools/singan.py \ + --weight_path weight_of_generator.pdparams \ + --mode random_sample \ + --scale_v 1 \ # vertical scale + --scale_h 1 \ # horizontal scale + --n_row 2 \ + --n_col 2 +``` + +|training image|result| +| ---- | ---- | +|![birds](https://user-images.githubusercontent.com/91609464/153211448-2614407b-a30b-467c-b1e5-7db88ff2ca74.png)|![birds-random_sample](https://user-images.githubusercontent.com/91609464/153211573-1af108ba-ad42-438a-94a9-e8f8f3e091eb.png)| + +#### Editing & Harmonization + +```bash +python applications/tools/singan.py \ + --weight_path weight_of_generator.pdparams \ + --mode editing \ # or harmonization + --ref_image editing_image.png \ + --mask_image mask_of_editing.png \ + --generate_start_scale 2 +``` + + +|training image|editing image|mask of editing|result| +|----|----|----|----| +|![stone](https://user-images.githubusercontent.com/91609464/153211778-bb94d29d-a2b4-4d04-9900-89b20ae90b90.png)|![stone-edit](https://user-images.githubusercontent.com/91609464/153211867-df3d9035-d320-45ec-8043-488e9da49bff.png)|![stone-edit-mask](https://user-images.githubusercontent.com/91609464/153212047-9620f73c-58d9-48ed-9af7-a11470ad49c8.png)|![stone-edit-mask-result](https://user-images.githubusercontent.com/91609464/153211942-e0e639c2-3ea6-4ade-852b-73757b0bbab0.png)| + +#### Super Resolution + +```bash +python applications/tools/singan.py \ + --weight_path weight_of_generator.pdparams \ + --mode sr \ + --ref_image image_to_sr.png \ + --sr_factor 4 +``` +|training image|result| +| ---- | ---- | +|![mountains](https://user-images.githubusercontent.com/91609464/153212146-efbbbbd6-e045-477a-87ae-10f121341060.png)|![sr](https://user-images.githubusercontent.com/91609464/153212176-530b7075-e72b-4c05-ad3e-2f2cdfc76dea.png)| + + +#### Animation + +```bash +python applications/tools/singan.py \ + --weight_path weight_of_generator.pdparams \ + --mode animation \ + --animation_alpha 0.6 \ # this parameter determines how close the frames of the sequence remain to the training image + --animation_beta 0.7 \ # this parameter controls the smoothness and rate of change in the generated clip + --animation_frames 20 \ # frames of animation + --animation_duration 0.1 # duration of each frame +``` + +|training image|animation| +| ---- | ---- | +|![lightning](https://user-images.githubusercontent.com/91609464/153212291-6f8976bd-e873-423e-ab62-77997df2df7a.png)|![animation](https://user-images.githubusercontent.com/91609464/153212372-0543e6d6-5842-472b-af50-8b22670270ae.gif)| + + +#### Paint to Image +```bash +python applications/tools/singan.py \ + --weight_path weight_of_generator.pdparams \ + --mode paint2image \ + --ref_image paint.png \ + --generate_start_scale 2 +``` +|training image|paint|result|result after finetune| +|----|----|----|----| +|![trees](https://user-images.githubusercontent.com/91609464/153212536-0bb6489d-d488-49e0-a6b5-90ef578c9e4f.png)|![trees-paint](https://user-images.githubusercontent.com/91609464/153212511-ef2c6bea-1f8c-4685-951b-8db589414dfe.png)|![trees-paint2image](https://user-images.githubusercontent.com/91609464/153212531-c080c705-fd58-4ade-aac6-e2134838a75f.png)|![trees-paint2image-finetuned](https://user-images.githubusercontent.com/91609464/153212529-51d8d29b-6b58-4f29-8792-4b2b04f9266e.png)| + +## Reference + +``` +@misc{shaham2019singan, + title={SinGAN: Learning a Generative Model from a Single Natural Image}, + author={Tamar Rott Shaham and Tali Dekel and Tomer Michaeli}, + year={2019}, + eprint={1905.01164}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + diff --git a/docs/zh_CN/tutorials/singan.md b/docs/zh_CN/tutorials/singan.md new file mode 100755 index 0000000000000000000000000000000000000000..1656a55a3362253db5036850be3f2efe253ec25a --- /dev/null +++ b/docs/zh_CN/tutorials/singan.md @@ -0,0 +1,147 @@ +# SinGAN + +## 简介 + +SinGAN是一种新的可以从单个自然图像中学习的无条件生成模型。该模型包含一个全卷积生成对抗网络的金字塔结构,每个生成对抗网络负责学习不同在不同比例的图像上的块分布。这允许生成任意大小和纵横比的新样本,具有显著的可变性,但同时保持训练图像的全局结构和精细纹理。与以往单一图像生成方案相比,该方法不局限于纹理图像,也没有条件(即从噪声中生成样本)。 + +## 使用方法 + +### 配置说明 + +我们为SinGAN提供了4个配置文件: + +- `singan_universal.yaml` +- `singan_sr.yaml` +- `singan_animation.yaml` +- `singan_finetune.yaml` + +其中`singan_universal.yaml`对所有任务都适用配置,`singan_sr.yaml`是官方建议的用于超分任务的配置,`singan_animation.yaml`是官方建议的用于“静图转动”任务的配置。本文档展示的结果均由`singan_universal.yaml`训练而来。对于手绘转照片任务,使用`singan_universal.yaml`训练后再用`singan_finetune.yaml`微调会得到更好的结果。 + +### 训练 + +启动训练: + +```bash +python tools/main.py -c configs/singan_universal.yaml \ + -o model.train_image=训练图片.png +``` + +为“手绘转照片”任务微调: + +```bash +python tools/main.py -c configs/singan_finetune.yaml \ + -o model.train_image=训练图片.png \ + --load 已经训练好的模型.pdparams +``` + +### 测试 +运行下面的命令,可以随机生成一张图片。需要注意的是,`训练图片.png`应当位于`data/singan`目录下,或者手动调整配置文件中`dataset.test.dataroot`的值。此外,这个目录中只能包含`训练图片.png`这一张图片。 +```bash +python tools/main.py -c configs/singan_universal.yaml \ + -o model.train_image=训练图片.png \ + --load 已经训练好的模型.pdparams \ + --evaluate-only +``` + +### 导出生成器权重 + +训练结束后,需要使用 ``tools/extract_weight.py`` 来从训练模型(包含了生成器和判别器)中提取生成器的权重来给`applications/tools/singan.py`进行推理,以实现SinGAN的各种应用。 + +```bash +python tools/extract_weight.py 训练过程中保存的权重文件.pdparams --net-name netG --output 生成器权重文件.pdparams +``` + +### 推理及结果展示 + +*注意:您可以下面的命令中的`--weight_path 生成器权重文件.pdparams`可以换成`--pretrained_model `来体验训练好的模型,其中``可以是`trees`、`stone`、`mountains`、`birds`和`lightning`。* + +#### 随机采样 + +```bash +python applications/tools/singan.py \ + --weight_path 生成器权重文件.pdparams \ + --mode random_sample \ + --scale_v 1 \ # vertical scale + --scale_h 1 \ # horizontal scale + --n_row 2 \ + --n_col 2 +``` + +|训练图片|随机采样结果| +| ---- | ---- | +|![birds](https://user-images.githubusercontent.com/91609464/153211448-2614407b-a30b-467c-b1e5-7db88ff2ca74.png)|![birds-random_sample](https://user-images.githubusercontent.com/91609464/153211573-1af108ba-ad42-438a-94a9-e8f8f3e091eb.png)| + +#### 图像编辑&风格和谐化 + +```bash +python applications/tools/singan.py \ + --weight_path 生成器权重文件.pdparams \ + --mode editing \ # or harmonization + --ref_image 编辑后的图片.png \ + --mask_image 编辑区域标注图片.png \ + --generate_start_scale 2 +``` + + +|训练图片|编辑图片|编辑区域标注|SinGAN生成| +|----|----|----|----| +|![stone](https://user-images.githubusercontent.com/91609464/153211778-bb94d29d-a2b4-4d04-9900-89b20ae90b90.png)|![stone-edit](https://user-images.githubusercontent.com/91609464/153211867-df3d9035-d320-45ec-8043-488e9da49bff.png)|![stone-edit-mask](https://user-images.githubusercontent.com/91609464/153212047-9620f73c-58d9-48ed-9af7-a11470ad49c8.png)|![stone-edit-mask-result](https://user-images.githubusercontent.com/91609464/153211942-e0e639c2-3ea6-4ade-852b-73757b0bbab0.png)| + +#### 超分 + +```bash +python applications/tools/singan.py \ + --weight_path 生成器权重文件.pdparams \ + --mode sr \ + --ref_image 待超分的图片亦即用于训练的图片.png \ + --sr_factor 4 +``` +|训练图片|超分结果| +| ---- | ---- | +|![mountains](https://user-images.githubusercontent.com/91609464/153212146-efbbbbd6-e045-477a-87ae-10f121341060.png)|![sr](https://user-images.githubusercontent.com/91609464/153212176-530b7075-e72b-4c05-ad3e-2f2cdfc76dea.png)| + + +#### 静图转动 + +```bash +python applications/tools/singan.py \ + --weight_path 生成器权重文件.pdparams \ + --mode animation \ + --animation_alpha 0.6 \ # this parameter determines how close the frames of the sequence remain to the training image + --animation_beta 0.7 \ # this parameter controls the smoothness and rate of change in the generated clip + --animation_frames 20 \ # frames of animation + --animation_duration 0.1 # duration of each frame +``` + +|训练图片|动画效果| +| ---- | ---- | +|![lightning](https://user-images.githubusercontent.com/91609464/153212291-6f8976bd-e873-423e-ab62-77997df2df7a.png)|![animation](https://user-images.githubusercontent.com/91609464/153212372-0543e6d6-5842-472b-af50-8b22670270ae.gif)| + + +#### 手绘转照片 +```bash +python applications/tools/singan.py \ + --weight_path 生成器权重文件.pdparams \ + --mode paint2image \ + --ref_image 手绘图片.png \ + --generate_start_scale 2 +``` +|训练图片|手绘图片|SinGAN生成|SinGAN微调后生成| +|----|----|----|----| +|![trees](https://user-images.githubusercontent.com/91609464/153212536-0bb6489d-d488-49e0-a6b5-90ef578c9e4f.png)|![trees-paint](https://user-images.githubusercontent.com/91609464/153212511-ef2c6bea-1f8c-4685-951b-8db589414dfe.png)|![trees-paint2image](https://user-images.githubusercontent.com/91609464/153212531-c080c705-fd58-4ade-aac6-e2134838a75f.png)|![trees-paint2image-finetuned](https://user-images.githubusercontent.com/91609464/153212529-51d8d29b-6b58-4f29-8792-4b2b04f9266e.png)| + + + +## 参考文献 + +``` +@misc{shaham2019singan, + title={SinGAN: Learning a Generative Model from a Single Natural Image}, + author={Tamar Rott Shaham and Tali Dekel and Tomer Michaeli}, + year={2019}, + eprint={1905.01164}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + diff --git a/ppgan/apps/__init__.py b/ppgan/apps/__init__.py index 32ee395c25139616045542c58fd3a4820a6fc9b1..15d7d250ac7629551a86a1d4bcd6563a9992c811 100644 --- a/ppgan/apps/__init__.py +++ b/ppgan/apps/__init__.py @@ -34,3 +34,4 @@ from .photopen_predictor import PhotoPenPredictor from .recurrent_vsr_predictor import (PPMSVSRPredictor, BasicVSRPredictor, \ BasiVSRPlusPlusPredictor, IconVSRPredictor, \ PPMSVSRLargePredictor) +from .singan_predictor import SinGANPredictor diff --git a/ppgan/apps/singan_predictor.py b/ppgan/apps/singan_predictor.py new file mode 100755 index 0000000000000000000000000000000000000000..a75f08c3b9dce80b1f9a48252ed4352bbddee040 --- /dev/null +++ b/ppgan/apps/singan_predictor.py @@ -0,0 +1,223 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import cv2 +import math +import skimage +import imageio + +import paddle +import paddle.nn.functional as F +import paddle.vision.transforms as T + +from .base_predictor import BasePredictor +from ..models.singan_model import pad_shape +from ppgan.models.generators import SinGANGenerator +from ppgan.utils.download import get_path_from_url +from ppgan.utils.visual import tensor2img, save_image, make_grid + +pretrained_weights_url = { + 'trees': 'https://paddlegan.bj.bcebos.com/models/singan_universal_trees.pdparams', + 'stone': 'https://paddlegan.bj.bcebos.com/models/singan_universal_stone.pdparams', + 'mountains': 'https://paddlegan.bj.bcebos.com/models/singan_universal_mountains.pdparams', + 'birds': 'https://paddlegan.bj.bcebos.com/models/singan_universal_birds.pdparams', + 'lightning': 'https://paddlegan.bj.bcebos.com/models/singan_universal_lightning.pdparams' +} + + +def imread(path): + return cv2.cvtColor( + cv2.imread( + path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB) + +def imgpath2tensor(path): + return paddle.to_tensor(T.Compose([ + T.Transpose(), + T.Normalize(127.5, 127.5) + ])(imread(path))).unsqueeze(0) + +def dilate_mask(mask, mode): + if mode == "harmonization": + element = skimage.morphology.disk(radius=7) + elif mode == "editing": + element = skimage.morphology.disk(radius=20) + else: + raise NotImplementedError('mode %s is not implemented' % mode) + mask = skimage.morphology.binary_dilation(mask, selem=element) + mask = skimage.filters.gaussian(mask, sigma=5) + return mask + +class SinGANPredictor(BasePredictor): + def __init__(self, + output_path='output_dir', + weight_path=None, + pretrained_model=None, + seed=None): + self.output_path = output_path + if weight_path is None: + if pretrained_model in pretrained_weights_url.keys(): + weight_path = get_path_from_url( + pretrained_weights_url[pretrained_model]) + else: + raise ValueError( + 'Predictor need a weight path or a pretrained model.') + checkpoint = paddle.load(weight_path) + + self.scale_num = checkpoint['scale_num'].item() + self.coarsest_shape = checkpoint['coarsest_shape'].tolist() + self.nfc_init = checkpoint['nfc_init'].item() + self.min_nfc_init = checkpoint['min_nfc_init'].item() + self.num_layers = checkpoint['num_layers'].item() + self.ker_size = checkpoint['ker_size'].item() + self.noise_zero_pad = checkpoint['noise_zero_pad'].item() + self.generator = SinGANGenerator(self.scale_num, + self.coarsest_shape, + self.nfc_init, + self.min_nfc_init, + 3, + self.num_layers, + self.ker_size, + self.noise_zero_pad) + self.generator.set_state_dict(checkpoint) + self.generator.eval() + self.scale_factor = self.generator.scale_factor.item() + self.niose_pad_size = 0 if self.noise_zero_pad \ + else self.generator._pad_size + if seed is not None: + paddle.seed(seed) + + def noise_like(self, x): + return paddle.randn(pad_shape(x.shape, self.niose_pad_size)) + + def run(self, + mode='random_sample', + generate_start_scale=0, + scale_h=1.0, + scale_v=1.0, + ref_image=None, + mask_image=None, + sr_factor=4, + animation_alpha=0.9, + animation_beta=0.9, + animation_frames=20, + animation_duration=0.1, + n_row=5, + n_col=3): + + # check config + if mode not in ['random_sample', + 'sr', 'animation', + 'harmonization', + 'editing', 'paint2image']: + raise ValueError( + 'Only random_sample, sr, animation, harmonization, \ + editing and paint2image is implemented.') + if mode in ['sr', 'harmonization', 'editing', 'paint2image'] and \ + ref_image is None: + raise ValueError( + 'When mode is sr, harmonization, editing, or \ + paint2image, a reference image must be privided.') + if mode in ['harmonization', 'editing'] and mask_image is None: + raise ValueError( + 'When mode is harmonization or editing, \ + a mask image must be privided.') + + if mode == 'animation': + batch_size = animation_frames + elif mode == 'random_sample': + batch_size = n_row * n_col + else: + batch_size = 1 + + # prepare input + if mode == 'harmonization' or mode == 'editing' or mode == 'paint2image': + ref = imgpath2tensor(ref_image) + x_init = F.interpolate( + ref, None, + self.scale_factor ** (self.scale_num - generate_start_scale), + 'bicubic') + x_init = F.interpolate( + x_init, None, 1 / self.scale_factor, 'bicubic') + elif mode == 'sr': + ref = imgpath2tensor(ref_image) + sr_iters = math.ceil(math.log(sr_factor, 1 / self.scale_factor)) + sr_scale_factor = sr_factor ** (1 / sr_factor) + x_init = F.interpolate(ref, None, sr_scale_factor, 'bicubic') + else: + x_init = paddle.zeros([ + batch_size, + self.coarsest_shape[1], + int(self.coarsest_shape[2] * scale_v), + int(self.coarsest_shape[3] * scale_h)]) + + # forward + if mode == 'sr': + for _ in range(sr_iters): + out = self.generator([self.noise_like(x_init)], x_init, -1, -1) + x_init = F.interpolate(out, None, sr_scale_factor, 'bicubic') + else: + z_pyramid = [ + self.noise_like( + F.interpolate( + x_init, None, 1 / self.scale_factor ** i)) + for i in range(self.scale_num - generate_start_scale)] + + if mode == 'animation': + a = animation_alpha + b = animation_beta + for i in range(len(z_pyramid)): + z = paddle.chunk(z_pyramid[i], batch_size) + if i == 0 and generate_start_scale == 0: + z_0 = F.interpolate( + self.generator.z_fixed, + pad_shape(x_init.shape[-2:], self.niose_pad_size), + None, 'bicubic') + else: + z_0 = 0 + z_1 = z_0 + z_2 = 0.95 * z_1 + 0.05 * z[0] + for j in range(len(z)): + z[j] = a * z_0 + (1 - a) * (z_2 + b * (z_2 - z_1) + (1 - b) * z[j]) + z_1 = z_2 + z_2 = z[j] + z = paddle.concat(z) + z_pyramid[i] = z + + out = self.generator(z_pyramid, x_init, self.scale_num - 1, generate_start_scale) + + # postprocess and save + os.makedirs(self.output_path, exist_ok=True) + if mode == 'animation': + frames = [tensor2img(x) for x in out.chunk(animation_frames)] + imageio.mimsave( + os.path.join(self.output_path, 'animation.gif'), + frames, 'GIF', duration=animation_duration) + else: + if mode == 'harmonization' or mode == 'editing': + mask = cv2.imread(mask_image, cv2.IMREAD_GRAYSCALE) + mask = paddle.to_tensor(dilate_mask(mask, mode), 'float32') + out = F.interpolate(out, mask.shape, None, 'bicubic') + out = (1 - mask) * ref + mask * out + elif mode == 'sr': + out = F.interpolate( + out, + [ref.shape[-2] * sr_factor, ref.shape[-1] * sr_factor], + None, 'bicubic') + elif mode == 'paint2image': + out = F.interpolate(out, ref.shape[-2:], None, 'bicubic') + elif mode == 'random_sample': + out = make_grid(out, n_row) + + save_image(tensor2img(out), os.path.join(self.output_path, mode + '.png')) diff --git a/ppgan/datasets/__init__.py b/ppgan/datasets/__init__.py index 731eb7cd2bca4ca78587d516de943efaeace0170..21371935c9a35fc5aef165f4713e2f06d6248c72 100755 --- a/ppgan/datasets/__init__.py +++ b/ppgan/datasets/__init__.py @@ -29,3 +29,4 @@ from .vsr_reds_multiple_gt_dataset import VSRREDSMultipleGTDataset from .vsr_vimeo90k_dataset import VSRVimeo90KDataset from .vsr_folder_dataset import VSRFolderDataset from .photopen_dataset import PhotoPenDataset +from .empty_dataset import EmptyDataset diff --git a/ppgan/datasets/empty_dataset.py b/ppgan/datasets/empty_dataset.py new file mode 100755 index 0000000000000000000000000000000000000000..970341763245e91e5e307ae0d38d1ed7c42279ba --- /dev/null +++ b/ppgan/datasets/empty_dataset.py @@ -0,0 +1,30 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .base_dataset import BaseDataset +from .builder import DATASETS + + +@DATASETS.register() +class EmptyDataset(BaseDataset): + ''' + Dataset for models who don't need a dataset. + ''' + def __init__(self, size=1): + super().__init__() + self.size = size + self.data_infos = self.prepare_data_infos() + + def prepare_data_infos(self): + return [{i: 0} for i in range(self.size)] diff --git a/ppgan/models/__init__.py b/ppgan/models/__init__.py index c1df8bc6d08078dc86a43f72cfb17b4626f23cba..c77a665c140b68bdeeaa0f3855c4276d51210966 100644 --- a/ppgan/models/__init__.py +++ b/ppgan/models/__init__.py @@ -34,3 +34,4 @@ from .basicvsr_model import BasicVSRModel from .mpr_model import MPRModel from .photopen_model import PhotoPenModel from .msvsr_model import MultiStageVSRModel +from .singan_model import SinGANModel diff --git a/ppgan/models/criterions/__init__.py b/ppgan/models/criterions/__init__.py index f661bc5fbbce5f22b72bd4949e039cb4dccebbd7..b84b2f505af0306b569f3feda1dbac8f942a08c5 100644 --- a/ppgan/models/criterions/__init__.py +++ b/ppgan/models/criterions/__init__.py @@ -4,5 +4,6 @@ from .pixel_loss import L1Loss, MSELoss, CharbonnierLoss, \ CalcStyleEmdLoss, CalcContentReltLoss, \ CalcContentLoss, CalcStyleLoss, EdgeLoss from .photopen_perceptual_loss import PhotoPenPerceptualLoss +from .gradient_penalty import GradientPenalty from .builder import build_criterion diff --git a/ppgan/models/criterions/gradient_penalty.py b/ppgan/models/criterions/gradient_penalty.py new file mode 100755 index 0000000000000000000000000000000000000000..22500ec6fb9a88780a5de2e88b224327e2781074 --- /dev/null +++ b/ppgan/models/criterions/gradient_penalty.py @@ -0,0 +1,39 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from .builder import CRITERIONS + + +@CRITERIONS.register() +class GradientPenalty(): + def __init__(self, loss_weight=1.0): + self.loss_weight = loss_weight + + def __call__(self, net, real, fake): + batch_size = real.shape[0] + alpha = paddle.rand([batch_size]) + for _ in range(real.ndim - 1): + alpha = paddle.unsqueeze(alpha, -1) + interpolate = alpha * real + (1 - alpha) * fake + interpolate.stop_gradient = False + interpolate_pred = net(interpolate) + gradient = paddle.grad(outputs=interpolate_pred, + inputs=interpolate, + grad_outputs=paddle.ones_like(interpolate_pred), + create_graph=True, + retain_graph=True, + only_inputs=True)[0] + gradient_penalty = ((gradient.norm(2, 1) - 1) ** 2).mean() + return gradient_penalty * self.loss_weight diff --git a/ppgan/models/discriminators/__init__.py b/ppgan/models/discriminators/__init__.py index 3ab7a28878df0d00d73e81489d8fb484145147c1..f371047d490fb255af6c9852a680ab413d5fa7df 100644 --- a/ppgan/models/discriminators/__init__.py +++ b/ppgan/models/discriminators/__init__.py @@ -24,3 +24,4 @@ from .discriminator_starganv2 import StarGANv2Discriminator from .discriminator_firstorder import FirstOrderDiscriminator from .discriminator_lapstyle import LapStyleDiscriminator from .discriminator_photopen import MultiscaleDiscriminator +from .discriminator_singan import SinGANDiscriminator diff --git a/ppgan/models/discriminators/discriminator_singan.py b/ppgan/models/discriminators/discriminator_singan.py new file mode 100755 index 0000000000000000000000000000000000000000..6a2a78bd59fc2bab6014fbeac5f4befe0baea38a --- /dev/null +++ b/ppgan/models/discriminators/discriminator_singan.py @@ -0,0 +1,31 @@ +# code was based on https://github.com/tamarott/SinGAN + +import paddle.nn as nn + +from ..generators.generator_singan import ConvBlock +from .builder import DISCRIMINATORS + + +@DISCRIMINATORS.register() +class SinGANDiscriminator(nn.Layer): + def __init__(self, + nfc=32, + min_nfc=32, + input_nc=3, + num_layers=5, + ker_size=3, + padd_size=0): + super(SinGANDiscriminator, self).__init__() + self.head = ConvBlock(input_nc, nfc, ker_size, padd_size, 1) + self.body = nn.Sequential() + for i in range(num_layers - 2): + N = int(nfc / pow(2, (i + 1))) + block = ConvBlock(max(2 * N, min_nfc), max(N, min_nfc), ker_size, padd_size, 1) + self.body.add_sublayer('block%d' % (i + 1), block) + self.tail = nn.Conv2D(max(N, min_nfc), 1, ker_size, 1, padd_size) + + def forward(self, x): + x = self.head(x) + x = self.body(x) + x = self.tail(x) + return x diff --git a/ppgan/models/generators/__init__.py b/ppgan/models/generators/__init__.py index 24926b0ef59a56c0a002a3400f165765b9369b21..85a82b1e81a067491b728366f89ecf626607d9af 100755 --- a/ppgan/models/generators/__init__.py +++ b/ppgan/models/generators/__init__.py @@ -38,3 +38,4 @@ from .pan import PAN from .generater_photopen import SPADEGenerator from .basicvsr_plus_plus import BasicVSRPlusPlus from .msvsr import MSVSR +from .generator_singan import SinGANGenerator diff --git a/ppgan/models/generators/generator_singan.py b/ppgan/models/generators/generator_singan.py new file mode 100755 index 0000000000000000000000000000000000000000..596384653f6611eb7df2f44934027eda633d04c3 --- /dev/null +++ b/ppgan/models/generators/generator_singan.py @@ -0,0 +1,105 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# code was based on https://github.com/tamarott/SinGAN + +import math +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from .builder import GENERATORS + + +class ConvBlock(nn.Sequential): + def __init__(self, in_channel, out_channel, ker_size, padd, stride): + super(ConvBlock,self).__init__() + self.add_sublayer('conv', nn.Conv2D(in_channel ,out_channel, ker_size, stride, padd)), + self.add_sublayer('norm', nn.BatchNorm2D(out_channel)), + self.add_sublayer('LeakyRelu', nn.LeakyReLU(0.2)) + +class GeneratorConcatSkip2CleanAdd(nn.Layer): + def __init__(self, nfc=32, min_nfc=32, input_nc=3, num_layers=5, ker_size=3, padd_size=0): + super(GeneratorConcatSkip2CleanAdd, self).__init__() + self.head = ConvBlock(input_nc, nfc, ker_size, padd_size, 1) + self.body = nn.Sequential() + for i in range(num_layers - 2): + N = int(nfc / pow(2, i + 1)) + block = ConvBlock(max(2 * N, min_nfc), max(N, min_nfc), ker_size, padd_size, 1) + self.body.add_sublayer('block%d' % (i + 1), block) + self.tail = nn.Sequential( + nn.Conv2D(max(N, min_nfc), input_nc, ker_size, 1, padd_size), + nn.Tanh()) + def forward(self, x, y): + x = self.head(x) + x = self.body(x) + x = self.tail(x) + ind = int((y.shape[2] - x.shape[2]) / 2) + y = y[:, :, ind: (y.shape[2] - ind), ind: (y.shape[3] - ind)] + return x + y + +@GENERATORS.register() +class SinGANGenerator(nn.Layer): + def __init__(self, + scale_num, + coarsest_shape, + nfc_init=32, + min_nfc_init=32, + input_nc=3, + num_layers=5, + ker_size=3, + noise_zero_pad=True): + super().__init__() + nfc_list = [min(nfc_init * pow(2, math.floor(i / 4)), 128) for i in range(scale_num)] + min_nfc_list = [min(min_nfc_init * pow(2, math.floor(i / 4)), 128) for i in range(scale_num)] + self.generators = nn.LayerList([ + GeneratorConcatSkip2CleanAdd( + nfc, min_nfc, input_nc, num_layers, + ker_size, 0 + ) for nfc, min_nfc in zip(nfc_list, min_nfc_list)]) + self._scale_num = scale_num + self._pad_size = int((ker_size - 1) / 2 * num_layers) + self.noise_pad = nn.Pad2D(self._pad_size if noise_zero_pad else 0) + self.image_pad = nn.Pad2D(self._pad_size) + self._noise_zero_pad = noise_zero_pad + self._coarsest_shape = coarsest_shape + self.register_buffer('scale_num', paddle.to_tensor(scale_num, 'int32'), True) + self.register_buffer('coarsest_shape', paddle.to_tensor(coarsest_shape, 'int32'), True) + self.register_buffer('nfc_init', paddle.to_tensor(nfc_init, 'int32'), True) + self.register_buffer('min_nfc_init', paddle.to_tensor(min_nfc_init, 'int32'), True) + self.register_buffer('num_layers', paddle.to_tensor(num_layers, 'int32'), True) + self.register_buffer('ker_size', paddle.to_tensor(ker_size, 'int32'), True) + self.register_buffer('noise_zero_pad', paddle.to_tensor(noise_zero_pad, 'bool'), True) + self.register_buffer('sigma', paddle.ones([scale_num]), True) + self.register_buffer('scale_factor', paddle.ones([1]), True) + self.register_buffer( + 'z_fixed', + paddle.randn( + F.pad( + paddle.zeros(coarsest_shape), + [0 if noise_zero_pad else self._pad_size] * 4).shape), True) + + def forward(self, z_pyramid, x_prev, stop_scale, start_scale=0): + stop_scale %= self._scale_num + start_scale %= self._scale_num + for i, scale in enumerate(range(start_scale, stop_scale + 1)): + x_prev = self.image_pad(x_prev) + z = self.noise_pad(z_pyramid[i] * self.sigma[scale]) + x_prev + x_prev = self.generators[scale]( + z.detach(), + x_prev.detach() + ) + if scale < stop_scale: + x_prev = F.interpolate(x_prev, + F.pad(z_pyramid[i + 1], [0 if self._noise_zero_pad else -self._pad_size] * 4).shape[-2:], + None, 'bicubic') + return x_prev diff --git a/ppgan/models/singan_model.py b/ppgan/models/singan_model.py new file mode 100755 index 0000000000000000000000000000000000000000..612187522b944323087bdef0844109be65f0c430 --- /dev/null +++ b/ppgan/models/singan_model.py @@ -0,0 +1,308 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import cv2 +import math +import warnings +from collections import OrderedDict +from sklearn.cluster import KMeans + +import paddle +import paddle.nn.functional as F +import paddle.vision.transforms as T + +from .base_model import BaseModel +from .builder import MODELS +from .generators.builder import build_generator +from .criterions.builder import build_criterion +from .discriminators.builder import build_discriminator +from ..solver import build_lr_scheduler, build_optimizer +warnings.filterwarnings('ignore', category=DeprecationWarning) +warnings.filterwarnings('ignore', category=UserWarning) + + +def pad_shape(shape, pad_size): + shape[-2] += 2 * pad_size + shape[-1] += 2 * pad_size + return shape + +def quant(x, num): + n, c, h, w = x.shape + kmeans = KMeans(num, random_state=0).fit(x.transpose([0, 2, 3, 1]).reshape([-1, c])) + centers = kmeans.cluster_centers_ + x = centers[kmeans.labels_].reshape([n, h, w, c]).transpose([0, 3, 1, 2]) + return paddle.to_tensor(x, 'float32'), centers + +def quant_to_centers(x, centers): + n, c, h, w = x.shape + num = centers.shape[0] + kmeans = KMeans(num, init=centers, n_init=1).fit(x.transpose([0, 2, 3, 1]).reshape([-1, c])) + x = centers[kmeans.labels_].reshape([n, h, w, c]).transpose([0, 3, 1, 2]) + return paddle.to_tensor(x, 'float32') + +@MODELS.register() +class SinGANModel(BaseModel): + def __init__(self, + generator, + discriminator, + gan_criterion=None, + recon_criterion=None, + gp_criterion=None, + train_image=None, + scale_factor=0.75, + min_size=25, + is_finetune=False, + finetune_scale=1, + color_num=5, + gen_iters=3, + disc_iters=3, + noise_amp_init=0.1): + super(SinGANModel, self).__init__() + + # setup config + self.gen_iters = gen_iters + self.disc_iters = disc_iters + self.min_size = min_size + self.is_finetune = is_finetune + self.noise_amp_init = noise_amp_init + self.train_image = T.Compose([ + T.Transpose(), + T.Normalize(127.5, 127.5) + ])(cv2.cvtColor(cv2.imread(train_image, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)) + self.train_image = paddle.to_tensor(self.train_image).unsqueeze(0) + self.scale_num = math.ceil(math.log( + self.min_size / min(self.train_image.shape[-2:]), + scale_factor)) + 1 + self.scale_factor = math.pow( + self.min_size / min(self.train_image.shape[-2:]), + 1 / (self.scale_num - 1)) + self.reals = [ + F.interpolate(self.train_image, None, self.scale_factor ** i, 'bicubic') + for i in range(self.scale_num - 1, -1, -1)] + + # build generator + generator['scale_num'] = self.scale_num + generator['coarsest_shape'] =self.reals[0].shape + self.nets['netG'] = build_generator(generator) + self.niose_pad_size = 0 if generator.get('noise_zero_pad', True) \ + else self.nets['netG']._pad_size + self.nets['netG'].scale_factor = paddle.to_tensor(self.scale_factor, 'float32') + + # build discriminator + nfc_init = discriminator.pop('nfc_init', 32) + min_nfc_init = discriminator.pop('min_nfc_init', 32) + for i in range(self.scale_num): + discriminator['nfc'] = min(nfc_init * pow(2, math.floor(i / 4)), 128) + discriminator['min_nfc'] = min(min_nfc_init * pow(2, math.floor(i / 4)), 128) + self.nets[f'netD{i}'] = build_discriminator(discriminator) + + # build criterion + self.gan_criterion = build_criterion(gan_criterion) + self.recon_criterion = build_criterion(recon_criterion) + self.gp_criterion = build_criterion(gp_criterion) + + if self.is_finetune: + self.finetune_scale = finetune_scale + self.quant_real, self.quant_centers = quant(self.reals[finetune_scale], color_num) + + + # setup training config + self.lr_schedulers = OrderedDict() + self.current_scale = (finetune_scale if self.is_finetune else 0) - 1 + self.current_iter = 0 + + def set_total_iter(self, total_iter): + super().set_total_iter(total_iter) + if self.is_finetune: + self.scale_iters = total_iter + else: + self.scale_iters = math.ceil(total_iter / self.scale_num) + + def setup_lr_schedulers(self, cfg): + for i in range(self.scale_num): + self.lr_schedulers[f"lr{i}"] = build_lr_scheduler(cfg) + return self.lr_schedulers + + def setup_optimizers(self, lr_schedulers, cfg): + for i in range(self.scale_num): + self.optimizers[f'optim_netG{i}'] = build_optimizer( + cfg['optimizer_G'], lr_schedulers[f"lr{i}"], self.nets[f'netG'].generators[i].parameters()) + self.optimizers[f'optim_netD{i}'] = build_optimizer( + cfg['optimizer_D'], lr_schedulers[f"lr{i}"], self.nets[f'netD{i}'].parameters()) + return self.optimizers + + def setup_input(self, input): + pass + + def backward_D(self): + self.loss_D_real = self.gan_criterion(self.pred_real, True, True) + self.loss_D_fake = self.gan_criterion(self.pred_fake, False, True) + self.loss_D_gp = self.gp_criterion(self.nets[f'netD{self.current_scale}'], + self.real_img, + self.fake_img) + self.loss_D = self.loss_D_real + self.loss_D_fake + self.loss_D_gp + self.loss_D.backward() + + self.losses[f'scale{self.current_scale}/D_total_loss'] = self.loss_D + self.losses[f'scale{self.current_scale}/D_real_loss'] = self.loss_D_real + self.losses[f'scale{self.current_scale}/D_fake_loss'] = self.loss_D_fake + self.losses[f'scale{self.current_scale}/D_gradient_penalty'] = self.loss_D_gp + + def backward_G(self): + self.loss_G_gan = self.gan_criterion(self.pred_fake, True, False) + self.loss_G_recon = self.recon_criterion(self.recon_img, self.real_img) + self.loss_G = self.loss_G_gan + self.loss_G_recon + self.loss_G.backward() + + self.losses[f'scale{self.current_scale}/G_adv_loss'] = self.loss_G_gan + self.losses[f'scale{self.current_scale}/G_recon_loss'] = self.loss_G_recon + + def scale_prepare(self): + self.real_img = self.reals[self.current_scale] + self.lr_scheduler = self.lr_schedulers[f"lr{self.current_scale}"] + for i in range(self.current_scale): + self.optimizers.pop(f'optim_netG{i}', None) + self.optimizers.pop(f'optim_netD{i}', None) + self.losses.clear() + self.visual_items.clear() + self.visual_items[f'real_img_scale{self.current_scale}'] = self.real_img + if self.is_finetune: + self.visual_items['quant_real'] = self.quant_real + + self.recon_prev = paddle.zeros_like(self.reals[0]) + if self.current_scale > 0: + z_pyramid = [] + for i in range(self.current_scale): + if i == 0: + z = self.nets['netG'].z_fixed + else: + z = paddle.zeros( + pad_shape( + self.reals[i].shape, self.niose_pad_size)) + z_pyramid.append(z) + self.recon_prev = self.nets['netG']( + z_pyramid, self.recon_prev, + self.current_scale - 1, 0).detach() + self.recon_prev = F.interpolate( + self.recon_prev, self.real_img.shape[-2:], None, 'bicubic') + if self.is_finetune: + self.recon_prev = quant_to_centers(self.recon_prev, self.quant_centers) + self.nets['netG'].sigma[self.current_scale] = F.mse_loss( + self.real_img, self.recon_prev + ).sqrt() * self.noise_amp_init + + for i in range(self.scale_num): + self.set_requires_grad(self.nets['netG'].generators[i], i == self.current_scale) + + def forward(self): + if not self.is_finetune: + self.fake_img = self.nets['netG']( + self.z_pyramid, + paddle.zeros( + pad_shape(self.z_pyramid[0].shape, -self.niose_pad_size)), + self.current_scale, 0) + else: + x_prev = self.nets['netG']( + self.z_pyramid[:self.finetune_scale], + paddle.zeros( + pad_shape(self.z_pyramid[0].shape, -self.niose_pad_size)), + self.finetune_scale - 1, 0) + x_prev = F.interpolate(x_prev, self.z_pyramid[self.finetune_scale].shape[-2:], None, 'bicubic') + x_prev_quant = quant_to_centers(x_prev, self.quant_centers) + self.fake_img = self.nets['netG']( + self.z_pyramid[self.finetune_scale:], + x_prev_quant, + self.current_scale, self.finetune_scale) + + self.recon_img = self.nets['netG']( + [(paddle.randn if self.current_scale == 0 else paddle.zeros)( + pad_shape(self.real_img.shape, self.niose_pad_size))], + self.recon_prev, + self.current_scale, + self.current_scale) + + self.pred_real = self.nets[f'netD{self.current_scale}'](self.real_img) + self.pred_fake = self.nets[f'netD{self.current_scale}']( + self.fake_img.detach() if self.update_D else self.fake_img) + + self.visual_items[f'fake_img_scale{self.current_scale}'] = self.fake_img + self.visual_items[f'recon_img_scale{self.current_scale}'] = self.recon_img + if self.is_finetune: + self.visual_items[f'prev_img_scale{self.current_scale}'] = x_prev + self.visual_items[f'quant_prev_img_scale{self.current_scale}'] = x_prev_quant + + def train_iter(self, optimizers=None): + if self.current_iter % self.scale_iters == 0: + self.current_scale += 1 + self.scale_prepare() + + self.z_pyramid = [paddle.randn( + pad_shape(self.reals[i].shape, self.niose_pad_size)) + for i in range(self.current_scale + 1)] + + self.update_D = (self.current_iter % (self.disc_iters + self.gen_iters) < self.disc_iters) + self.set_requires_grad(self.nets[f'netD{self.current_scale}'], self.update_D) + self.forward() + if self.update_D: + optimizers[f'optim_netD{self.current_scale}'].clear_grad() + self.backward_D() + optimizers[f'optim_netD{self.current_scale}'].step() + else: + optimizers[f'optim_netG{self.current_scale}'].clear_grad() + self.backward_G() + optimizers[f'optim_netG{self.current_scale}'].step() + + self.current_iter += 1 + + def test_iter(self, metrics=None): + z_pyramid = [paddle.randn( + pad_shape(self.reals[i].shape, self.niose_pad_size)) + for i in range(self.scale_num)] + self.nets['netG'].eval() + fake_img = self.nets['netG']( + z_pyramid, + paddle.zeros(pad_shape(z_pyramid[0].shape, -self.niose_pad_size)), + self.scale_num - 1, 0) + self.visual_items['fake_img_test'] = fake_img + with paddle.no_grad(): + if metrics is not None: + for metric in metrics.values(): + metric.update(fake_img, self.train_image) + self.nets['netG'].train() + + class InferGenerator(paddle.nn.Layer): + def set_config(self, generator, noise_shapes, scale_num): + self.generator = generator + self.noise_shapes = noise_shapes + self.scale_num = scale_num + + def forward(self, x): + coarsest_shape = self.generator._coarsest_shape + z_pyramid = [paddle.randn(shp) for shp in self.noise_shapes] + x_init = paddle.zeros(coarsest_shape) + out = self.generator(z_pyramid, x_init, self.scale_num - 1, 0) + return out + + def export_model(self, + export_model=None, + output_dir=None, + inputs_size=None, + export_serving_model=False): + noise_shapes = [pad_shape(x.shape, self.niose_pad_size) for x in self.reals] + infer_generator = self.InferGenerator() + infer_generator.set_config(self.nets['netG'], noise_shapes, self.scale_num) + paddle.jit.save(infer_generator, + os.path.join(output_dir, "singan_random_sample"), + input_spec=[1]) diff --git a/test_tipc/configs/singan/train_infer_python.txt b/test_tipc/configs/singan/train_infer_python.txt new file mode 100755 index 0000000000000000000000000000000000000000..1d7f13da0921f51d6e194021294d38012c99ae19 --- /dev/null +++ b/test_tipc/configs/singan/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:singan +python:python3.7 +gpu_list:0 +## +auto_cast:null +total_iters:lite_train_lite_infer=100|whole_train_whole_infer=100000 +output_dir:./output/ +snapshot_config.interval:lite_train_lite_infer=25|whole_train_whole_infer=10000 +pretrained_model:null +train_model_name:singan*/*checkpoint.pdparams +train_infer_img_dir:./data/stone +null:null +## +trainer:norm_train +norm_train:tools/main.py -c configs/singan_universal.yaml --seed 123 -o log_config.interval=50 +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +--output_dir:./output/ +load:null +norm_export:tools/export_model.py -c configs/singan_universal.yaml --inputs_size=1 --load +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:singan_random_sample +train_model:./inference/singan/singan_random_sample +infer_export:null +infer_quant:False +inference:tools/inference.py --model_type singan --seed 123 -c configs/singan_universal.yaml --output_path test_tipc/output/ +--device:cpu +null:null +null:null +null:null +null:null +null:null +--model_path: +null:null +null:null +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/docs/test_train_inference_python.md b/test_tipc/docs/test_train_inference_python.md index 768ae88044c9c7ac3b08954a72b41b8270700701..f7228e937b4ed1ba29a1ca3d948af00f4aaa5edd 100644 --- a/test_tipc/docs/test_train_inference_python.md +++ b/test_tipc/docs/test_train_inference_python.md @@ -15,6 +15,7 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho | FOMM |FOMM | 生成 | 支持 | 多机多卡 | | | | BasicVSR |BasicVSR | 超分 | 支持 | 多机多卡 | | | |PP-MSVSR|PP-MSVSR | 超分| +|SinGAN|SinGAN | 生成|支持| - 预测相关:预测功能汇总如下, @@ -55,25 +56,25 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho ```shell bash test_tipc/prepare.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'lite_train_lite_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'lite_train_lite_infer' -``` +``` - 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理; ```shell bash test_tipc/prepare.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'lite_train_whole_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'lite_train_whole_infer' -``` +``` - 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; ```shell bash test_tipc/prepare.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'whole_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'whole_infer' -``` +``` - 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度; ```shell bash test_tipc/prepare.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'whole_train_whole_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/basicvsr/train_infer_python.txt 'whole_train_whole_infer' -``` +``` 运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如'lite_train_lite_infer'模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件: ``` diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index cf481b40950b2fa60f647759e12c57847627be09..dd1e840be0524dc60d5a9e8f00914c55ee47a805 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -66,7 +66,13 @@ if [ ${MODE} = "lite_train_lite_infer" ];then rm -rf ./data/DIV2K* wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/DIV2KandSet14.tar --no-check-certificate cd ./data/ && tar xf DIV2KandSet14.tar && cd ../ ;; - esac + singan) + rm -rf ./data/SinGAN* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/singan-official_images.zip --no-check-certificate + cd ./data/ && unzip -q singan-official_images.zip && cd ../ ;; + mkdir -p ./data/singan + mv ./data/SinGAN-official_images/Images/stone.png ./data/singan + esac elif [ ${MODE} = "whole_train_whole_infer" ];then if [ ${model_name} == "pix2pix" ]; then rm -rf ./data/facades* @@ -76,6 +82,12 @@ elif [ ${MODE} = "whole_train_whole_infer" ];then rm -rf ./data/horse2zebra* wget -nc -P ./data/ https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/horse2zebra.zip --no-check-certificate cd ./data/ && unzip horse2zebra.zip && cd ../ + elif [ ${model_name} == "singan" ]; then + rm -rf ./data/SinGAN* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/singan-official_images.zip --no-check-certificate + cd ./data/ && unzip -q singan-official_images.zip && cd ../ + mkdir -p ./data/singan + mv ./data/SinGAN-official_images/Images/stone.png ./data/singan fi elif [ ${MODE} = "lite_train_whole_infer" ];then if [ ${model_name} == "pix2pix" ]; then @@ -102,6 +114,12 @@ elif [ ${MODE} = "lite_train_whole_infer" ];then rm -rf ./data/reds* wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/reds_lite.tar --no-check-certificate cd ./data/ && tar xf reds_lite.tar && cd ../ + elif [ ${model_name} == "singan" ]; then + rm -rf ./data/SinGAN* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/singan-official_images.zip --no-check-certificate + cd ./data/ && unzip -q singan-official_images.zip && cd ../ + mkdir -p ./data/singan + mv ./data/SinGAN-official_images/Images/stone.png ./data/singan fi elif [ ${MODE} = "whole_infer" ];then if [ ${model_name} = "pix2pix" ]; then @@ -145,6 +163,14 @@ elif [ ${MODE} = "whole_infer" ];then wget -nc -P ./inference https://paddlegan.bj.bcebos.com/static_model/msvsr.tar --no-check-certificate cd ./inference && tar xf msvsr.tar && cd ../ cd ./data/ && tar xf reds_lite.tar && cd ../ + elif [ ${model_name} == "singan" ]; then + rm -rf ./data/SinGAN* + wget -nc -P ./data/ https://paddlegan.bj.bcebos.com/datasets/singan-official_images.zip --no-check-certificate + wget -nc -P ./inference https://paddlegan.bj.bcebos.com/datasets/singan.zip --no-check-certificate + cd ./data/ && unzip -q singan-official_images.zip && cd ../ + cd ./inference/ && unzip -q singan.zip && cd ../ + mkdir -p ./data/singan + mv ./data/SinGAN-official_images/Images/stone.png ./data/singan fi fi diff --git a/test_tipc/readme.md b/test_tipc/readme.md index 2fd44e4b9060b7a54c3e145d8871046674f70dcb..3d4778c32a1b4185ad495fc610b9e85e6b04b90a 100644 --- a/test_tipc/readme.md +++ b/test_tipc/readme.md @@ -25,6 +25,7 @@ | FOMM |FOMM | 生成 | 支持 | 多机多卡 | | | | BasicVSR |BasicVSR | 超分 | 支持 | 多机多卡 | | | |PP-MSVSR|PP-MSVSR | 超分| +|SinGAN|SinGAN | 生成| 支持 | diff --git a/test_tipc/results/python_singan_results_fp32.txt b/test_tipc/results/python_singan_results_fp32.txt new file mode 100644 index 0000000000000000000000000000000000000000..575aabd0cec4551b638ee62f66b87d0d8a8fef5c --- /dev/null +++ b/test_tipc/results/python_singan_results_fp32.txt @@ -0,0 +1 @@ +Metric fid: 124.0369 diff --git a/tools/inference.py b/tools/inference.py index 6322acd52c06c55e5b5a46c0aa69a083e81cf54a..bf66a85fba8ded22a8be9f2186b553642ad50b3f 100644 --- a/tools/inference.py +++ b/tools/inference.py @@ -15,7 +15,7 @@ from ppgan.metrics import build_metric MODEL_CLASSES = ["pix2pix", "cyclegan", "wav2lip", "esrgan", \ - "edvr", "fom", "stylegan2", "basicvsr", "msvsr"] + "edvr", "fom", "stylegan2", "basicvsr", "msvsr", "singan"] def parse_args(): @@ -304,6 +304,15 @@ def main(): metric_file = os.path.join(args.output_path, model_type, "metric.txt") for metric in metrics.values(): metric.update(out_img, gt_img, is_seq=True) + elif model_type == "singan": + predictor.run() + prediction = output_handle.copy_to_cpu() + prediction = paddle.to_tensor(prediction) + image_numpy = tensor2img(prediction, min_max) + save_image(image_numpy, os.path.join(args.output_path, "singan/{}.png".format(i))) + metric_file = os.path.join(args.output_path, "singan/metric.txt") + for metric in metrics.values(): + metric.update(prediction, data['A']) if metrics: log_file = open(metric_file, 'a')