diff --git a/README.md b/README.md
index 00315532aca018bbe65e6f178e112d65efca49f0..53ab209d476e72d3d2f977d2a30e166393061513 100644
--- a/README.md
+++ b/README.md
@@ -85,6 +85,13 @@ GAN--生成对抗网络,被“卷积网络之父”**Yann LeCun(杨立昆)
+### 人脸动漫化
+
+
+
+
+
+
## 版本更新
- v0.1.0 (2020.11.02)
diff --git a/README_en.md b/README_en.md
index 3ff2ca83a0c56a7288c21b1f37baabf4e175fc88..84b01549e90171d5073de32025948f85de2c551e 100644
--- a/README_en.md
+++ b/README_en.md
@@ -74,6 +74,12 @@ GAN-Generative Adversarial Network, was praised by "the Father of Convolutional
+### Face cartoonization
+
+
+
+
+
## Changelog
- v0.1.0 (2020.11.02)
diff --git a/configs/ugatit_selfie2anime_light.yaml b/configs/ugatit_selfie2anime_light.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ad0de434dd78eae2613ccf5993fe1e3a034422a2
--- /dev/null
+++ b/configs/ugatit_selfie2anime_light.yaml
@@ -0,0 +1,85 @@
+epochs: 300
+output_dir: output_dir
+adv_weight: 1.0
+cycle_weight: 10.0
+identity_weight: 10.0
+cam_weight: 1000.0
+
+model:
+ name: UGATITModel
+ generator:
+ name: ResnetUGATITGenerator
+ input_nc: 3
+ output_nc: 3
+ ngf: 64
+ n_blocks: 4
+ img_size: 256
+ light: True
+ discriminator_g:
+ name: UGATITDiscriminator
+ input_nc: 3
+ ndf: 64
+ n_layers: 7
+ discriminator_l:
+ name: UGATITDiscriminator
+ input_nc: 3
+ ndf: 64
+ n_layers: 5
+
+dataset:
+ train:
+ name: UnpairedDataset
+ dataroot: data/selfie2anime
+ num_workers: 0
+ phase: train
+ max_dataset_size: inf
+ direction: AtoB
+ input_nc: 3
+ output_nc: 3
+ serial_batches: False
+ transforms:
+ - name: Resize
+ size: [286, 286]
+ interpolation: 'bilinear' #'bicubic' #cv2.INTER_CUBIC
+ - name: RandomCrop
+ size: [256, 256]
+ - name: RandomHorizontalFlip
+ prob: 0.5
+ - name: Transpose
+ - name: Normalize
+ mean: [127.5, 127.5, 127.5]
+ std: [127.5, 127.5, 127.5]
+ test:
+ name: SingleDataset
+ dataroot: data/selfie2anime/testA
+ max_dataset_size: inf
+ direction: AtoB
+ input_nc: 3
+ output_nc: 3
+ serial_batches: False
+ transforms:
+ - name: Resize
+ size: [256, 256]
+ interpolation: 'bilinear' #cv2.INTER_CUBIC
+ - name: Transpose
+ - name: Normalize
+ mean: [127.5, 127.5, 127.5]
+ std: [127.5, 127.5, 127.5]
+
+optimizer:
+ name: Adam
+ beta1: 0.5
+ weight_decay: 0.0001
+
+lr_scheduler:
+ name: linear
+ learning_rate: 0.0001
+ start_epoch: 150
+ decay_epochs: 150
+
+log_config:
+ interval: 10
+ visiual_interval: 500
+
+snapshot_config:
+ interval: 30
diff --git a/docs/imgs/ugatit.png b/docs/imgs/ugatit.png
new file mode 100644
index 0000000000000000000000000000000000000000..a13d7847f0e6489675f08217c74511e5e760b8aa
Binary files /dev/null and b/docs/imgs/ugatit.png differ
diff --git a/ppgan/apps/edvr_predictor.py b/ppgan/apps/edvr_predictor.py
index 695ca41829712dc6160a160e92542cc92de7e6a7..0a6425dc289158bd57caa00d65d8d15a76917cad 100644
--- a/ppgan/apps/edvr_predictor.py
+++ b/ppgan/apps/edvr_predictor.py
@@ -186,7 +186,6 @@ class EDVRPredictor(BasePredictor):
period = cur_time - prev_time
periods.append(period)
- # print('Processed {} samples'.format(infer_iter + 1))
frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png')
vid_out_path = os.path.join(self.output,
'{}_edvr_out.mp4'.format(base_name))
diff --git a/ppgan/engine/trainer.py b/ppgan/engine/trainer.py
index 1fe21e84247c62e7681cd06ff0e78b4693149b1a..375af05d329c58fe289977007e763264b9663894 100644
--- a/ppgan/engine/trainer.py
+++ b/ppgan/engine/trainer.py
@@ -17,8 +17,9 @@ import time
import copy
import logging
-import paddle
+import datetime
+import paddle
from paddle.distributed import ParallelEnv
from ..datasets.builder import build_dataloader
@@ -64,6 +65,9 @@ class Trainer:
self.local_rank = ParallelEnv().local_rank
# time count
+ self.steps_per_epoch = len(self.train_dataloader)
+ self.total_steps = self.epochs * self.steps_per_epoch
+
self.time_count = {}
self.best_metric = {}
@@ -219,7 +223,14 @@ class Trainer:
message += 'reader_cost: %.5f sec ' % self.data_time
if hasattr(self, 'ips'):
- message += 'ips: %.5f images/s' % self.ips
+ message += 'ips: %.5f images/s ' % self.ips
+
+ if hasattr(self, 'step_time'):
+ cur_step = self.steps_per_epoch * (self.current_epoch -
+ 1) + self.batch_id
+ eta = self.step_time * (self.total_steps - cur_step - 1)
+ eta_str = str(datetime.timedelta(seconds=int(eta)))
+ message += f'eta: {eta_str}'
# print the message
self.logger.info(message)
diff --git a/ppgan/models/__init__.py b/ppgan/models/__init__.py
index 4a2d433c3c8bf60b7ee0f79c4845b4707c4974c9..5fea006c40d7d878a7673076a93e5a832034d1f5 100644
--- a/ppgan/models/__init__.py
+++ b/ppgan/models/__init__.py
@@ -18,4 +18,5 @@ from .pix2pix_model import Pix2PixModel
from .srgan_model import SRGANModel
from .sr_model import SRModel
from .makeup_model import MakeupModel
+from .ugatit_model import UGATITModel
from .dc_gan_model import DCGANModel
diff --git a/ppgan/models/discriminators/__init__.py b/ppgan/models/discriminators/__init__.py
index caa2f8b410f7b9a7c49b099dd08b6a4b70c2391f..9d8653fd56ad538356d19e69b5647e45d28f6f1a 100644
--- a/ppgan/models/discriminators/__init__.py
+++ b/ppgan/models/discriminators/__init__.py
@@ -13,4 +13,5 @@
# limitations under the License.
from .nlayers import NLayerDiscriminator
+from .discriminator_ugatit import UGATITDiscriminator
from .dcdiscriminator import DCDiscriminator
diff --git a/ppgan/models/discriminators/discriminator_ugatit.py b/ppgan/models/discriminators/discriminator_ugatit.py
new file mode 100644
index 0000000000000000000000000000000000000000..d08615925e17a58cf301b758f90b8752e806edcb
--- /dev/null
+++ b/ppgan/models/discriminators/discriminator_ugatit.py
@@ -0,0 +1,96 @@
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from ...modules.utils import spectral_norm
+from .builder import DISCRIMINATORS
+
+
+@DISCRIMINATORS.register()
+class UGATITDiscriminator(nn.Layer):
+ def __init__(self, input_nc, ndf=64, n_layers=5):
+ super(UGATITDiscriminator, self).__init__()
+ model = [
+ nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"),
+ spectral_norm(
+ nn.Conv2D(input_nc,
+ ndf,
+ kernel_size=4,
+ stride=2,
+ padding=0,
+ bias_attr=True)),
+ nn.LeakyReLU(0.2)
+ ]
+
+ for i in range(1, n_layers - 2):
+ mult = 2**(i - 1)
+ model += [
+ nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"),
+ spectral_norm(
+ nn.Conv2D(ndf * mult,
+ ndf * mult * 2,
+ kernel_size=4,
+ stride=2,
+ padding=0,
+ bias_attr=True)),
+ nn.LeakyReLU(0.2)
+ ]
+
+ mult = 2**(n_layers - 2 - 1)
+ model += [
+ nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"),
+ spectral_norm(
+ nn.Conv2D(ndf * mult,
+ ndf * mult * 2,
+ kernel_size=4,
+ stride=1,
+ padding=0,
+ bias_attr=True)),
+ nn.LeakyReLU(0.2)
+ ]
+
+ # Class Activation Map
+ mult = 2**(n_layers - 2)
+ self.gap_fc = spectral_norm(nn.Linear(ndf * mult, 1, bias_attr=False))
+ self.gmp_fc = spectral_norm(nn.Linear(ndf * mult, 1, bias_attr=False))
+ self.conv1x1 = nn.Conv2D(ndf * mult * 2,
+ ndf * mult,
+ kernel_size=1,
+ stride=1,
+ bias_attr=True)
+ self.leaky_relu = nn.LeakyReLU(0.2)
+
+ self.pad = nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect")
+ self.conv = spectral_norm(
+ nn.Conv2D(ndf * mult,
+ 1,
+ kernel_size=4,
+ stride=1,
+ padding=0,
+ bias_attr=False))
+
+ self.model = nn.Sequential(*model)
+
+ def forward(self, input):
+ x = self.model(input)
+
+ gap = F.adaptive_avg_pool2d(x, 1)
+ gap_logit = self.gap_fc(gap.reshape([x.shape[0], -1]))
+ gap_weight = list(self.gap_fc.parameters())[0].transpose([1, 0])
+ gap = x * gap_weight.unsqueeze(2).unsqueeze(3)
+
+ gmp = F.adaptive_max_pool2d(x, 1)
+ gmp_logit = self.gmp_fc(gmp.reshape([x.shape[0], -1]))
+ gmp_weight = list(self.gmp_fc.parameters())[0].transpose([1, 0])
+ gmp = x * gmp_weight.unsqueeze(2).unsqueeze(3)
+
+ cam_logit = paddle.concat([gap_logit, gmp_logit], 1)
+ x = paddle.concat([gap, gmp], 1)
+ x = self.leaky_relu(self.conv1x1(x))
+
+ heatmap = paddle.sum(x, 1, keepdim=True)
+
+ x = self.pad(x)
+ out = self.conv(x)
+
+ return out, cam_logit, heatmap
diff --git a/ppgan/models/generators/__init__.py b/ppgan/models/generators/__init__.py
index e407542479e8fe1707b9977be2a18246d91a9654..a3a1013f0eb629f29750f04be111416949196da4 100644
--- a/ppgan/models/generators/__init__.py
+++ b/ppgan/models/generators/__init__.py
@@ -16,4 +16,5 @@ from .resnet import ResnetGenerator
from .unet import UnetGenerator
from .rrdb_net import RRDBNet
from .makeup import GeneratorPSGANAttention
+from .resnet_ugatit import ResnetUGATITGenerator
from .dcgenerator import DCGenerator
diff --git a/ppgan/models/generators/resnet_ugatit.py b/ppgan/models/generators/resnet_ugatit.py
new file mode 100644
index 0000000000000000000000000000000000000000..187caf4ac15e3e47b0fc5a7b12b4c303ee6cd831
--- /dev/null
+++ b/ppgan/models/generators/resnet_ugatit.py
@@ -0,0 +1,305 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import functools
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from ...modules.norm import build_norm_layer
+from ...modules.utils import spectral_norm
+from .builder import GENERATORS
+
+
+@GENERATORS.register()
+class ResnetUGATITGenerator(nn.Layer):
+ def __init__(self,
+ input_nc,
+ output_nc,
+ ngf=64,
+ n_blocks=6,
+ img_size=256,
+ light=False,
+ norm_type='instance'):
+ assert (n_blocks >= 0)
+ super(ResnetUGATITGenerator, self).__init__()
+ self.input_nc = input_nc
+ self.output_nc = output_nc
+ self.ngf = ngf
+ self.n_blocks = n_blocks
+ self.img_size = img_size
+ self.light = light
+
+ norm_layer = build_norm_layer(norm_type)
+ DownBlock = []
+ DownBlock += [
+ nn.Pad2D(padding=[3, 3, 3, 3], mode="reflect"),
+ nn.Conv2D(input_nc,
+ ngf,
+ kernel_size=7,
+ stride=1,
+ padding=0,
+ bias_attr=False),
+ norm_layer(ngf),
+ nn.ReLU()
+ ]
+
+ # Down-Sampling
+ n_downsampling = 2
+ for i in range(n_downsampling):
+ mult = 2**i
+ DownBlock += [
+ nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"),
+ nn.Conv2D(ngf * mult,
+ ngf * mult * 2,
+ kernel_size=3,
+ stride=2,
+ padding=0,
+ bias_attr=False),
+ norm_layer(ngf * mult * 2),
+ nn.ReLU()
+ ]
+
+ # Down-Sampling Bottleneck
+ mult = 2**n_downsampling
+ for i in range(n_blocks):
+ DownBlock += [
+ ResnetBlock(ngf * mult, use_bias=False, norm_layer=norm_layer)
+ ]
+
+ # Class Activation Map
+ self.gap_fc = nn.Linear(ngf * mult, 1, bias_attr=False)
+ self.gmp_fc = nn.Linear(ngf * mult, 1, bias_attr=False)
+ self.conv1x1 = nn.Conv2D(ngf * mult * 2,
+ ngf * mult,
+ kernel_size=1,
+ stride=1,
+ bias_attr=True)
+ self.relu = nn.ReLU()
+
+ # Gamma, Beta block
+ if self.light:
+ FC = [
+ nn.Linear(ngf * mult, ngf * mult, bias_attr=False),
+ nn.ReLU(),
+ nn.Linear(ngf * mult, ngf * mult, bias_attr=False),
+ nn.ReLU()
+ ]
+ else:
+ FC = [
+ nn.Linear(img_size // mult * img_size // mult * ngf * mult,
+ ngf * mult,
+ bias_attr=False),
+ nn.ReLU(),
+ nn.Linear(ngf * mult, ngf * mult, bias_attr=False),
+ nn.ReLU()
+ ]
+ self.gamma = nn.Linear(ngf * mult, ngf * mult, bias_attr=False)
+ self.beta = nn.Linear(ngf * mult, ngf * mult, bias_attr=False)
+
+ # Up-Sampling Bottleneck
+ for i in range(n_blocks):
+ setattr(self, 'UpBlock1_' + str(i + 1),
+ ResnetAdaILNBlock(ngf * mult, use_bias=False))
+
+ # Up-Sampling
+ UpBlock2 = []
+ for i in range(n_downsampling):
+ mult = 2**(n_downsampling - i)
+ UpBlock2 += [
+ nn.Upsample(scale_factor=2, mode='nearest'),
+ nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"),
+ nn.Conv2D(ngf * mult,
+ int(ngf * mult / 2),
+ kernel_size=3,
+ stride=1,
+ padding=0,
+ bias_attr=False),
+ ILN(int(ngf * mult / 2)),
+ nn.ReLU()
+ ]
+
+ UpBlock2 += [
+ nn.Pad2D(padding=[3, 3, 3, 3], mode="reflect"),
+ nn.Conv2D(ngf,
+ output_nc,
+ kernel_size=7,
+ stride=1,
+ padding=0,
+ bias_attr=False),
+ nn.Tanh()
+ ]
+
+ self.DownBlock = nn.Sequential(*DownBlock)
+ self.FC = nn.Sequential(*FC)
+ self.UpBlock2 = nn.Sequential(*UpBlock2)
+
+ def forward(self, input):
+ x = self.DownBlock(input)
+
+ gap = F.adaptive_avg_pool2d(x, 1)
+ gap_logit = self.gap_fc(gap.reshape([x.shape[0], -1]))
+ gap_weight = list(self.gap_fc.parameters())[0].transpose([1, 0])
+ gap = x * gap_weight.unsqueeze(2).unsqueeze(3)
+
+ gmp = F.adaptive_max_pool2d(x, 1)
+ gmp_logit = self.gmp_fc(gmp.reshape([x.shape[0], -1]))
+ gmp_weight = list(self.gmp_fc.parameters())[0].transpose([1, 0])
+ gmp = x * gmp_weight.unsqueeze(2).unsqueeze(3)
+
+ cam_logit = paddle.concat([gap_logit, gmp_logit], 1)
+ x = paddle.concat([gap, gmp], 1)
+ x = self.relu(self.conv1x1(x))
+
+ heatmap = paddle.sum(x, axis=1, keepdim=True)
+
+ if self.light:
+ x_ = F.adaptive_avg_pool2d(x, 1)
+ x_ = self.FC(x_.reshape([x_.shape[0], -1]))
+ else:
+ x_ = self.FC(x.reshape([x.shape[0], -1]))
+ gamma, beta = self.gamma(x_), self.beta(x_)
+
+ for i in range(self.n_blocks):
+ x = getattr(self, 'UpBlock1_' + str(i + 1))(x, gamma, beta)
+ out = self.UpBlock2(x)
+
+ return out, cam_logit, heatmap
+
+
+class ResnetBlock(nn.Layer):
+ def __init__(self, dim, use_bias, norm_layer):
+ super(ResnetBlock, self).__init__()
+ conv_block = []
+ conv_block += [
+ nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"),
+ nn.Conv2D(dim,
+ dim,
+ kernel_size=3,
+ stride=1,
+ padding=0,
+ bias_attr=use_bias),
+ norm_layer(dim),
+ nn.ReLU()
+ ]
+
+ conv_block += [
+ nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"),
+ nn.Conv2D(dim,
+ dim,
+ kernel_size=3,
+ stride=1,
+ padding=0,
+ bias_attr=use_bias),
+ norm_layer(dim)
+ ]
+
+ self.conv_block = nn.Sequential(*conv_block)
+
+ def forward(self, x):
+ out = x + self.conv_block(x)
+ return out
+
+
+class ResnetAdaILNBlock(nn.Layer):
+ def __init__(self, dim, use_bias):
+ super(ResnetAdaILNBlock, self).__init__()
+ self.pad1 = nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect")
+ self.conv1 = nn.Conv2D(dim,
+ dim,
+ kernel_size=3,
+ stride=1,
+ padding=0,
+ bias_attr=use_bias)
+ self.norm1 = AdaILN(dim)
+ self.relu1 = nn.ReLU()
+
+ self.pad2 = nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect")
+ self.conv2 = nn.Conv2D(dim,
+ dim,
+ kernel_size=3,
+ stride=1,
+ padding=0,
+ bias_attr=use_bias)
+ self.norm2 = AdaILN(dim)
+
+ def forward(self, x, gamma, beta):
+ out = self.pad1(x)
+ out = self.conv1(out)
+ out = self.norm1(out, gamma, beta)
+ out = self.relu1(out)
+ out = self.pad2(out)
+ out = self.conv2(out)
+ out = self.norm2(out, gamma, beta)
+
+ return out + x
+
+
+class AdaILN(nn.Layer):
+ def __init__(self, num_features, eps=1e-5):
+ super(AdaILN, self).__init__()
+ self.eps = eps
+ shape = (1, num_features, 1, 1)
+
+ self.rho = self.create_parameter(shape)
+ self.rho.set_value(paddle.full(shape, 0.9))
+
+ def forward(self, input, gamma, beta):
+ in_mean, in_var = paddle.mean(input, [2, 3],
+ keepdim=True), paddle.var(input, [2, 3],
+ keepdim=True)
+ out_in = (input - in_mean) / paddle.sqrt(in_var + self.eps)
+ ln_mean, ln_var = paddle.mean(input, [1, 2, 3],
+ keepdim=True), paddle.var(input,
+ [1, 2, 3],
+ keepdim=True)
+ out_ln = (input - ln_mean) / paddle.sqrt(ln_var + self.eps)
+
+ out = self.rho.expand([input.shape[0], -1, -1, -1]) * out_in + (
+ 1 - self.rho.expand([input.shape[0], -1, -1, -1])) * out_ln
+ out = out * gamma.unsqueeze(2).unsqueeze(3) + beta.unsqueeze(
+ 2).unsqueeze(3)
+
+ return out
+
+
+class ILN(nn.Layer):
+ def __init__(self, num_features, eps=1e-5):
+ super(ILN, self).__init__()
+ self.eps = eps
+ shape = (1, num_features, 1, 1)
+ self.rho = self.create_parameter(shape)
+ self.gamma = self.create_parameter(shape)
+ self.beta = self.create_parameter(shape)
+ self.rho.set_value(paddle.full(shape, 0.0))
+ self.gamma.set_value(paddle.full(shape, 1.0))
+ self.beta.set_value(paddle.full(shape, 0.0))
+
+ def forward(self, input):
+ in_mean, in_var = paddle.mean(input, [2, 3],
+ keepdim=True), paddle.var(input, [2, 3],
+ keepdim=True)
+ out_in = (input - in_mean) / paddle.sqrt(in_var + self.eps)
+ ln_mean, ln_var = paddle.mean(input, [1, 2, 3],
+ keepdim=True), paddle.var(input,
+ [1, 2, 3],
+ keepdim=True)
+ out_ln = (input - ln_mean) / paddle.sqrt(ln_var + self.eps)
+ out = self.rho.expand([input.shape[0], -1, -1, -1]) * out_in + (
+ 1 - self.rho.expand([input.shape[0], -1, -1, -1])) * out_ln
+ out = out * self.gamma.expand([input.shape[0], -1, -1, -1
+ ]) + self.beta.expand(
+ [input.shape[0], -1, -1, -1])
+
+ return out
diff --git a/ppgan/models/ugatit_model.py b/ppgan/models/ugatit_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..73587e2cbd767856d260e2088b92359c72a20dd1
--- /dev/null
+++ b/ppgan/models/ugatit_model.py
@@ -0,0 +1,264 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+from .base_model import BaseModel
+
+from .builder import MODELS
+from .generators.builder import build_generator
+from .discriminators.builder import build_discriminator
+from .losses import GANLoss
+
+from ..solver import build_optimizer
+from ..modules.nn import RhoClipper
+from ..modules.init import init_weights
+from ..utils.image_pool import ImagePool
+
+
+@MODELS.register()
+class UGATITModel(BaseModel):
+ """
+ This class implements the UGATIT model, for learning image-to-image translation without paired data.
+
+ UGATIT paper: https://arxiv.org/pdf/1907.10830.pdf
+ """
+ def __init__(self, cfg):
+ """Initialize the CycleGAN class.
+
+ Parameters:
+ opt (config)-- stores all the experiment flags; needs to be a subclass of Dict
+ """
+ super(UGATITModel, self).__init__(cfg)
+
+ # define networks (both Generators and discriminators)
+ # The naming is different from those used in the paper.
+ self.nets['genA2B'] = build_generator(cfg.model.generator)
+ self.nets['genB2A'] = build_generator(cfg.model.generator)
+ init_weights(self.nets['genA2B'])
+ init_weights(self.nets['genB2A'])
+
+ if self.is_train:
+ # define discriminators
+ self.nets['disGA'] = build_discriminator(cfg.model.discriminator_g)
+ self.nets['disGB'] = build_discriminator(cfg.model.discriminator_g)
+ self.nets['disLA'] = build_discriminator(cfg.model.discriminator_l)
+ self.nets['disLB'] = build_discriminator(cfg.model.discriminator_l)
+ init_weights(self.nets['disGA'])
+ init_weights(self.nets['disGB'])
+ init_weights(self.nets['disLA'])
+ init_weights(self.nets['disLB'])
+
+ if self.is_train:
+ # define loss functions
+ self.BCE_loss = nn.BCEWithLogitsLoss()
+ self.L1_loss = nn.L1Loss()
+ self.MSE_loss = nn.MSELoss()
+
+ self.build_lr_scheduler()
+ self.optimizers['optimizer_G'] = build_optimizer(
+ cfg.optimizer,
+ self.lr_scheduler,
+ parameter_list=self.nets['genA2B'].parameters() +
+ self.nets['genB2A'].parameters())
+ self.optimizers['optimizer_D'] = build_optimizer(
+ cfg.optimizer,
+ self.lr_scheduler,
+ parameter_list=self.nets['disGA'].parameters() +
+ self.nets['disGB'].parameters() +
+ self.nets['disLA'].parameters() +
+ self.nets['disLB'].parameters())
+ self.Rho_clipper = RhoClipper(0, 1)
+
+ def set_input(self, input):
+ """Unpack input data from the dataloader and perform necessary pre-processing steps.
+
+ Args:
+ input (dict): include the data itself and its metadata information.
+
+ The option 'direction' can be used to swap domain A and domain B.
+ """
+ mode = 'train' if self.is_train else 'test'
+ AtoB = self.cfg.dataset[mode].direction == 'AtoB'
+
+ if AtoB:
+ if 'A' in input:
+ self.real_A = paddle.to_tensor(input['A'])
+ if 'B' in input:
+ self.real_B = paddle.to_tensor(input['B'])
+ else:
+ if 'B' in input:
+ self.real_A = paddle.to_tensor(input['B'])
+ if 'A' in input:
+ self.real_B = paddle.to_tensor(input['A'])
+
+ if 'A_paths' in input:
+ self.image_paths = input['A_paths']
+ elif 'B_paths' in input:
+ self.image_paths = input['B_paths']
+
+ def forward(self):
+ """Run forward pass; called by both functions and ."""
+ if hasattr(self, 'real_A'):
+ self.fake_A2B, _, _ = self.nets['genA2B'](self.real_A)
+
+ # visual
+ self.visual_items['real_A'] = self.real_A
+ self.visual_items['fake_A2B'] = self.fake_A2B
+
+ if hasattr(self, 'real_B'):
+ self.fake_B2A, _, _ = self.nets['genB2A'](self.real_B)
+
+ # visual
+ self.visual_items['real_B'] = self.real_B
+ self.visual_items['fake_B2A'] = self.fake_B2A
+
+ def test(self):
+ """Forward function used in test time.
+
+ This function wraps function in no_grad() so we don't save intermediate steps for backprop
+ It also calls to produce additional visualization results
+ """
+ self.nets['genA2B'].eval()
+ self.nets['genB2A'].eval()
+ with paddle.no_grad():
+ self.forward()
+ self.compute_visuals()
+
+ self.nets['genA2B'].train()
+ self.nets['genB2A'].train()
+
+ def optimize_parameters(self):
+ """Calculate losses, gradients, and update network weights; called in every training iteration"""
+ def _criterion(loss_func, logit, is_real):
+ if is_real:
+ target = paddle.ones_like(logit)
+ else:
+ target = paddle.zeros_like(logit)
+ return loss_func(logit, target)
+
+ # forward
+ # compute fake images and reconstruction images.
+ self.forward()
+
+ # update D
+ self.optimizers['optimizer_D'].clear_grad()
+ real_GA_logit, real_GA_cam_logit, _ = self.nets['disGA'](self.real_A)
+ real_LA_logit, real_LA_cam_logit, _ = self.nets['disLA'](self.real_A)
+ real_GB_logit, real_GB_cam_logit, _ = self.nets['disGB'](self.real_B)
+ real_LB_logit, real_LB_cam_logit, _ = self.nets['disLB'](self.real_B)
+
+ fake_GA_logit, fake_GA_cam_logit, _ = self.nets['disGA'](self.fake_B2A)
+ fake_LA_logit, fake_LA_cam_logit, _ = self.nets['disLA'](self.fake_B2A)
+ fake_GB_logit, fake_GB_cam_logit, _ = self.nets['disGB'](self.fake_A2B)
+ fake_LB_logit, fake_LB_cam_logit, _ = self.nets['disLB'](self.fake_A2B)
+
+ D_ad_loss_GA = _criterion(self.MSE_loss,
+ real_GA_logit, True) + _criterion(
+ self.MSE_loss, fake_GA_logit, False)
+
+ D_ad_cam_loss_GA = _criterion(
+ self.MSE_loss, real_GA_cam_logit, True) + _criterion(
+ self.MSE_loss, fake_GA_cam_logit, False)
+
+ D_ad_loss_LA = _criterion(self.MSE_loss,
+ real_LA_logit, True) + _criterion(
+ self.MSE_loss, fake_LA_logit, False)
+
+ D_ad_cam_loss_LA = _criterion(
+ self.MSE_loss, real_LA_cam_logit, True) + _criterion(
+ self.MSE_loss, fake_LA_cam_logit, False)
+
+ D_ad_loss_GB = _criterion(self.MSE_loss,
+ real_GB_logit, True) + _criterion(
+ self.MSE_loss, fake_GB_logit, False)
+
+ D_ad_cam_loss_GB = _criterion(
+ self.MSE_loss, real_GB_cam_logit, True) + _criterion(
+ self.MSE_loss, fake_GB_cam_logit, False)
+
+ D_ad_loss_LB = _criterion(self.MSE_loss,
+ real_LB_logit, True) + _criterion(
+ self.MSE_loss, fake_LB_logit, False)
+
+ D_ad_cam_loss_LB = _criterion(
+ self.MSE_loss, real_LB_cam_logit, True) + _criterion(
+ self.MSE_loss, fake_LB_cam_logit, False)
+
+ D_loss_A = self.cfg.adv_weight * (D_ad_loss_GA + D_ad_cam_loss_GA +
+ D_ad_loss_LA + D_ad_cam_loss_LA)
+ D_loss_B = self.cfg.adv_weight * (D_ad_loss_GB + D_ad_cam_loss_GB +
+ D_ad_loss_LB + D_ad_cam_loss_LB)
+
+ Discriminator_loss = D_loss_A + D_loss_B
+ Discriminator_loss.backward()
+ self.optimizers['optimizer_D'].step()
+
+ # update G
+ self.optimizers['optimizer_G'].clear_grad()
+
+ fake_A2B, fake_A2B_cam_logit, _ = self.nets['genA2B'](self.real_A)
+ fake_B2A, fake_B2A_cam_logit, _ = self.nets['genB2A'](self.real_B)
+
+ fake_A2B2A, _, _ = self.nets['genB2A'](fake_A2B)
+ fake_B2A2B, _, _ = self.nets['genA2B'](fake_B2A)
+
+ fake_A2A, fake_A2A_cam_logit, _ = self.nets['genB2A'](self.real_A)
+ fake_B2B, fake_B2B_cam_logit, _ = self.nets['genA2B'](self.real_B)
+
+ fake_GA_logit, fake_GA_cam_logit, _ = self.nets['disGA'](fake_B2A)
+ fake_LA_logit, fake_LA_cam_logit, _ = self.nets['disLA'](fake_B2A)
+ fake_GB_logit, fake_GB_cam_logit, _ = self.nets['disGB'](fake_A2B)
+ fake_LB_logit, fake_LB_cam_logit, _ = self.nets['disLB'](fake_A2B)
+
+ G_ad_loss_GA = _criterion(self.MSE_loss, fake_GA_logit, True)
+ G_ad_cam_loss_GA = _criterion(self.MSE_loss, fake_GA_cam_logit, True)
+ G_ad_loss_LA = _criterion(self.MSE_loss, fake_LA_logit, True)
+ G_ad_cam_loss_LA = _criterion(self.MSE_loss, fake_LA_cam_logit, True)
+ G_ad_loss_GB = _criterion(self.MSE_loss, fake_GB_logit, True)
+ G_ad_cam_loss_GB = _criterion(self.MSE_loss, fake_GB_cam_logit, True)
+ G_ad_loss_LB = _criterion(self.MSE_loss, fake_LB_logit, True)
+ G_ad_cam_loss_LB = _criterion(self.MSE_loss, fake_LB_cam_logit, True)
+
+ G_recon_loss_A = self.L1_loss(fake_A2B2A, self.real_A)
+ G_recon_loss_B = self.L1_loss(fake_B2A2B, self.real_B)
+
+ G_identity_loss_A = self.L1_loss(fake_A2A, self.real_A)
+ G_identity_loss_B = self.L1_loss(fake_B2B, self.real_B)
+
+ G_cam_loss_A = _criterion(self.BCE_loss,
+ fake_B2A_cam_logit, True) + _criterion(
+ self.BCE_loss, fake_A2A_cam_logit, False)
+
+ G_cam_loss_B = _criterion(self.BCE_loss,
+ fake_A2B_cam_logit, True) + _criterion(
+ self.BCE_loss, fake_B2B_cam_logit, False)
+
+ G_loss_A = self.cfg.adv_weight * (
+ G_ad_loss_GA + G_ad_cam_loss_GA + G_ad_loss_LA + G_ad_cam_loss_LA
+ ) + self.cfg.cycle_weight * G_recon_loss_A + self.cfg.identity_weight * G_identity_loss_A + self.cfg.cam_weight * G_cam_loss_A
+ G_loss_B = self.cfg.adv_weight * (
+ G_ad_loss_GB + G_ad_cam_loss_GB + G_ad_loss_LB + G_ad_cam_loss_LB
+ ) + self.cfg.cycle_weight * G_recon_loss_B + self.cfg.identity_weight * G_identity_loss_B + self.cfg.cam_weight * G_cam_loss_B
+
+ Generator_loss = G_loss_A + G_loss_B
+ Generator_loss.backward()
+ self.optimizers['optimizer_G'].step()
+
+ # clip parameter of AdaILN and ILN, applied after optimizer step
+ self.nets['genA2B'].apply(self.Rho_clipper)
+ self.nets['genB2A'].apply(self.Rho_clipper)
+
+ self.losses['discriminator_loss'] = Discriminator_loss
+ self.losses['generator_loss'] = Generator_loss
diff --git a/ppgan/modules/nn.py b/ppgan/modules/nn.py
index f96e92cce68af2ddcd7efc02a7fac9767d2f3dfe..d1c98bc2e8ad66030329bf5af033d5a7fc3d0be7 100644
--- a/ppgan/modules/nn.py
+++ b/ppgan/modules/nn.py
@@ -65,3 +65,17 @@ class Spectralnorm(paddle.nn.Layer):
self.layer.weight = weight
out = self.layer(x)
return out
+
+
+class RhoClipper(object):
+ def __init__(self, min, max):
+ self.clip_min = min
+ self.clip_max = max
+ assert min < max
+
+ def __call__(self, module):
+
+ if hasattr(module, 'rho'):
+ w = module.rho
+ w = w.clip(self.clip_min, self.clip_max)
+ module.rho.set_value(w)
diff --git a/ppgan/modules/utils.py b/ppgan/modules/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cd10df1f8b65b677283594823db7a916df4ae3a
--- /dev/null
+++ b/ppgan/modules/utils.py
@@ -0,0 +1,147 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import numpy as np
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from .init import normal_
+
+
+class SpectralNorm(object):
+ def __init__(self, name='weight', n_power_iterations=1, dim=0, eps=1e-12):
+ self.name = name
+ self.dim = dim
+ if n_power_iterations <= 0:
+ raise ValueError(
+ 'Expected n_power_iterations to be positive, but '
+ 'got n_power_iterations={}'.format(n_power_iterations))
+ self.n_power_iterations = n_power_iterations
+ self.eps = eps
+
+ def reshape_weight_to_matrix(self, weight):
+ weight_mat = weight
+ if self.dim != 0:
+ # transpose dim to front
+ weight_mat = weight_mat.transpose([
+ self.dim,
+ *[d for d in range(weight_mat.dim()) if d != self.dim]
+ ])
+
+ height = weight_mat.shape[0]
+
+ return weight_mat.reshape([height, -1])
+
+ def compute_weight(self, layer, do_power_iteration):
+ weight = getattr(layer, self.name + '_orig')
+ u = getattr(layer, self.name + '_u')
+ v = getattr(layer, self.name + '_v')
+ weight_mat = self.reshape_weight_to_matrix(weight)
+
+ if do_power_iteration:
+ with paddle.no_grad():
+ for _ in range(self.n_power_iterations):
+ v.set_value(
+ F.normalize(
+ paddle.matmul(weight_mat,
+ u,
+ transpose_x=True,
+ transpose_y=False),
+ axis=0,
+ epsilon=self.eps,
+ ))
+
+ u.set_value(
+ F.normalize(
+ paddle.matmul(weight_mat, v),
+ axis=0,
+ epsilon=self.eps,
+ ))
+ if self.n_power_iterations > 0:
+ u = u.clone()
+ v = v.clone()
+
+ sigma = paddle.dot(u, paddle.mv(weight_mat, v))
+ weight = weight / sigma
+ return weight
+
+ def remove(self, layer):
+ with paddle.no_grad():
+ weight = self.compute_weight(layer, do_power_iteration=False)
+ delattr(layer, self.name)
+ delattr(layer, self.name + '_u')
+ delattr(layer, self.name + '_v')
+ delattr(layer, self.name + '_orig')
+
+ layer.add_parameter(self.name, weight.detach())
+
+ def __call__(self, layer, inputs):
+ setattr(layer, self.name,
+ self.compute_weight(layer, do_power_iteration=layer.training))
+
+ @staticmethod
+ def apply(layer, name, n_power_iterations, dim, eps):
+ for k, hook in layer._forward_pre_hooks.items():
+ if isinstance(hook, SpectralNorm) and hook.name == name:
+ raise RuntimeError("Cannot register two spectral_norm hooks on "
+ "the same parameter {}".format(name))
+
+ fn = SpectralNorm(name, n_power_iterations, dim, eps)
+ weight = layer._parameters[name]
+
+ with paddle.no_grad():
+ weight_mat = fn.reshape_weight_to_matrix(weight)
+ h, w = weight_mat.shape
+
+ # randomly initialize u and v
+ u = layer.create_parameter([h])
+ u = normal_(u, 0., 1.)
+ v = layer.create_parameter([w])
+ v = normal_(v, 0., 1.)
+ u = F.normalize(u, axis=0, epsilon=fn.eps)
+ v = F.normalize(v, axis=0, epsilon=fn.eps)
+
+ # delete fn.name form parameters, otherwise you can not set attribute
+ del layer._parameters[fn.name]
+ layer.add_parameter(fn.name + "_orig", weight)
+ # still need to assign weight back as fn.name because all sorts of
+ # things may assume that it exists, e.g., when initializing weights.
+ # However, we can't directly assign as it could be an Parameter and
+ # gets added as a parameter. Instead, we register weight * 1.0 as a plain
+ # attribute.
+ setattr(layer, fn.name, weight * 1.0)
+ layer.register_buffer(fn.name + "_u", u)
+ layer.register_buffer(fn.name + "_v", v)
+
+ layer.register_forward_pre_hook(fn)
+ return fn
+
+
+def spectral_norm(layer,
+ name='weight',
+ n_power_iterations=1,
+ eps=1e-12,
+ dim=None):
+
+ if dim is None:
+ if isinstance(layer, (nn.Conv1DTranspose, nn.Conv2DTranspose,
+ nn.Conv3DTranspose, nn.Linear)):
+ dim = 1
+ else:
+ dim = 0
+ SpectralNorm.apply(layer, name, n_power_iterations, dim, eps)
+ return layer