diff --git a/README.md b/README.md index 00315532aca018bbe65e6f178e112d65efca49f0..53ab209d476e72d3d2f977d2a30e166393061513 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,13 @@ GAN--生成对抗网络,被“卷积网络之父”**Yann LeCun(杨立昆) +### 人脸动漫化 + +
+ +
+ + ## 版本更新 - v0.1.0 (2020.11.02) diff --git a/README_en.md b/README_en.md index 3ff2ca83a0c56a7288c21b1f37baabf4e175fc88..84b01549e90171d5073de32025948f85de2c551e 100644 --- a/README_en.md +++ b/README_en.md @@ -74,6 +74,12 @@ GAN-Generative Adversarial Network, was praised by "the Father of Convolutional +### Face cartoonization + +
+ +
+ ## Changelog - v0.1.0 (2020.11.02) diff --git a/configs/ugatit_selfie2anime_light.yaml b/configs/ugatit_selfie2anime_light.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad0de434dd78eae2613ccf5993fe1e3a034422a2 --- /dev/null +++ b/configs/ugatit_selfie2anime_light.yaml @@ -0,0 +1,85 @@ +epochs: 300 +output_dir: output_dir +adv_weight: 1.0 +cycle_weight: 10.0 +identity_weight: 10.0 +cam_weight: 1000.0 + +model: + name: UGATITModel + generator: + name: ResnetUGATITGenerator + input_nc: 3 + output_nc: 3 + ngf: 64 + n_blocks: 4 + img_size: 256 + light: True + discriminator_g: + name: UGATITDiscriminator + input_nc: 3 + ndf: 64 + n_layers: 7 + discriminator_l: + name: UGATITDiscriminator + input_nc: 3 + ndf: 64 + n_layers: 5 + +dataset: + train: + name: UnpairedDataset + dataroot: data/selfie2anime + num_workers: 0 + phase: train + max_dataset_size: inf + direction: AtoB + input_nc: 3 + output_nc: 3 + serial_batches: False + transforms: + - name: Resize + size: [286, 286] + interpolation: 'bilinear' #'bicubic' #cv2.INTER_CUBIC + - name: RandomCrop + size: [256, 256] + - name: RandomHorizontalFlip + prob: 0.5 + - name: Transpose + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] + test: + name: SingleDataset + dataroot: data/selfie2anime/testA + max_dataset_size: inf + direction: AtoB + input_nc: 3 + output_nc: 3 + serial_batches: False + transforms: + - name: Resize + size: [256, 256] + interpolation: 'bilinear' #cv2.INTER_CUBIC + - name: Transpose + - name: Normalize + mean: [127.5, 127.5, 127.5] + std: [127.5, 127.5, 127.5] + +optimizer: + name: Adam + beta1: 0.5 + weight_decay: 0.0001 + +lr_scheduler: + name: linear + learning_rate: 0.0001 + start_epoch: 150 + decay_epochs: 150 + +log_config: + interval: 10 + visiual_interval: 500 + +snapshot_config: + interval: 30 diff --git a/docs/imgs/ugatit.png b/docs/imgs/ugatit.png new file mode 100644 index 0000000000000000000000000000000000000000..a13d7847f0e6489675f08217c74511e5e760b8aa Binary files /dev/null and b/docs/imgs/ugatit.png differ diff --git a/ppgan/apps/edvr_predictor.py b/ppgan/apps/edvr_predictor.py index 695ca41829712dc6160a160e92542cc92de7e6a7..0a6425dc289158bd57caa00d65d8d15a76917cad 100644 --- a/ppgan/apps/edvr_predictor.py +++ b/ppgan/apps/edvr_predictor.py @@ -186,7 +186,6 @@ class EDVRPredictor(BasePredictor): period = cur_time - prev_time periods.append(period) - # print('Processed {} samples'.format(infer_iter + 1)) frame_pattern_combined = os.path.join(pred_frame_path, '%08d.png') vid_out_path = os.path.join(self.output, '{}_edvr_out.mp4'.format(base_name)) diff --git a/ppgan/engine/trainer.py b/ppgan/engine/trainer.py index 1fe21e84247c62e7681cd06ff0e78b4693149b1a..375af05d329c58fe289977007e763264b9663894 100644 --- a/ppgan/engine/trainer.py +++ b/ppgan/engine/trainer.py @@ -17,8 +17,9 @@ import time import copy import logging -import paddle +import datetime +import paddle from paddle.distributed import ParallelEnv from ..datasets.builder import build_dataloader @@ -64,6 +65,9 @@ class Trainer: self.local_rank = ParallelEnv().local_rank # time count + self.steps_per_epoch = len(self.train_dataloader) + self.total_steps = self.epochs * self.steps_per_epoch + self.time_count = {} self.best_metric = {} @@ -219,7 +223,14 @@ class Trainer: message += 'reader_cost: %.5f sec ' % self.data_time if hasattr(self, 'ips'): - message += 'ips: %.5f images/s' % self.ips + message += 'ips: %.5f images/s ' % self.ips + + if hasattr(self, 'step_time'): + cur_step = self.steps_per_epoch * (self.current_epoch - + 1) + self.batch_id + eta = self.step_time * (self.total_steps - cur_step - 1) + eta_str = str(datetime.timedelta(seconds=int(eta))) + message += f'eta: {eta_str}' # print the message self.logger.info(message) diff --git a/ppgan/models/__init__.py b/ppgan/models/__init__.py index 4a2d433c3c8bf60b7ee0f79c4845b4707c4974c9..5fea006c40d7d878a7673076a93e5a832034d1f5 100644 --- a/ppgan/models/__init__.py +++ b/ppgan/models/__init__.py @@ -18,4 +18,5 @@ from .pix2pix_model import Pix2PixModel from .srgan_model import SRGANModel from .sr_model import SRModel from .makeup_model import MakeupModel +from .ugatit_model import UGATITModel from .dc_gan_model import DCGANModel diff --git a/ppgan/models/discriminators/__init__.py b/ppgan/models/discriminators/__init__.py index caa2f8b410f7b9a7c49b099dd08b6a4b70c2391f..9d8653fd56ad538356d19e69b5647e45d28f6f1a 100644 --- a/ppgan/models/discriminators/__init__.py +++ b/ppgan/models/discriminators/__init__.py @@ -13,4 +13,5 @@ # limitations under the License. from .nlayers import NLayerDiscriminator +from .discriminator_ugatit import UGATITDiscriminator from .dcdiscriminator import DCDiscriminator diff --git a/ppgan/models/discriminators/discriminator_ugatit.py b/ppgan/models/discriminators/discriminator_ugatit.py new file mode 100644 index 0000000000000000000000000000000000000000..d08615925e17a58cf301b758f90b8752e806edcb --- /dev/null +++ b/ppgan/models/discriminators/discriminator_ugatit.py @@ -0,0 +1,96 @@ +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from ...modules.utils import spectral_norm +from .builder import DISCRIMINATORS + + +@DISCRIMINATORS.register() +class UGATITDiscriminator(nn.Layer): + def __init__(self, input_nc, ndf=64, n_layers=5): + super(UGATITDiscriminator, self).__init__() + model = [ + nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"), + spectral_norm( + nn.Conv2D(input_nc, + ndf, + kernel_size=4, + stride=2, + padding=0, + bias_attr=True)), + nn.LeakyReLU(0.2) + ] + + for i in range(1, n_layers - 2): + mult = 2**(i - 1) + model += [ + nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"), + spectral_norm( + nn.Conv2D(ndf * mult, + ndf * mult * 2, + kernel_size=4, + stride=2, + padding=0, + bias_attr=True)), + nn.LeakyReLU(0.2) + ] + + mult = 2**(n_layers - 2 - 1) + model += [ + nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"), + spectral_norm( + nn.Conv2D(ndf * mult, + ndf * mult * 2, + kernel_size=4, + stride=1, + padding=0, + bias_attr=True)), + nn.LeakyReLU(0.2) + ] + + # Class Activation Map + mult = 2**(n_layers - 2) + self.gap_fc = spectral_norm(nn.Linear(ndf * mult, 1, bias_attr=False)) + self.gmp_fc = spectral_norm(nn.Linear(ndf * mult, 1, bias_attr=False)) + self.conv1x1 = nn.Conv2D(ndf * mult * 2, + ndf * mult, + kernel_size=1, + stride=1, + bias_attr=True) + self.leaky_relu = nn.LeakyReLU(0.2) + + self.pad = nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect") + self.conv = spectral_norm( + nn.Conv2D(ndf * mult, + 1, + kernel_size=4, + stride=1, + padding=0, + bias_attr=False)) + + self.model = nn.Sequential(*model) + + def forward(self, input): + x = self.model(input) + + gap = F.adaptive_avg_pool2d(x, 1) + gap_logit = self.gap_fc(gap.reshape([x.shape[0], -1])) + gap_weight = list(self.gap_fc.parameters())[0].transpose([1, 0]) + gap = x * gap_weight.unsqueeze(2).unsqueeze(3) + + gmp = F.adaptive_max_pool2d(x, 1) + gmp_logit = self.gmp_fc(gmp.reshape([x.shape[0], -1])) + gmp_weight = list(self.gmp_fc.parameters())[0].transpose([1, 0]) + gmp = x * gmp_weight.unsqueeze(2).unsqueeze(3) + + cam_logit = paddle.concat([gap_logit, gmp_logit], 1) + x = paddle.concat([gap, gmp], 1) + x = self.leaky_relu(self.conv1x1(x)) + + heatmap = paddle.sum(x, 1, keepdim=True) + + x = self.pad(x) + out = self.conv(x) + + return out, cam_logit, heatmap diff --git a/ppgan/models/generators/__init__.py b/ppgan/models/generators/__init__.py index e407542479e8fe1707b9977be2a18246d91a9654..a3a1013f0eb629f29750f04be111416949196da4 100644 --- a/ppgan/models/generators/__init__.py +++ b/ppgan/models/generators/__init__.py @@ -16,4 +16,5 @@ from .resnet import ResnetGenerator from .unet import UnetGenerator from .rrdb_net import RRDBNet from .makeup import GeneratorPSGANAttention +from .resnet_ugatit import ResnetUGATITGenerator from .dcgenerator import DCGenerator diff --git a/ppgan/models/generators/resnet_ugatit.py b/ppgan/models/generators/resnet_ugatit.py new file mode 100644 index 0000000000000000000000000000000000000000..187caf4ac15e3e47b0fc5a7b12b4c303ee6cd831 --- /dev/null +++ b/ppgan/models/generators/resnet_ugatit.py @@ -0,0 +1,305 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from ...modules.norm import build_norm_layer +from ...modules.utils import spectral_norm +from .builder import GENERATORS + + +@GENERATORS.register() +class ResnetUGATITGenerator(nn.Layer): + def __init__(self, + input_nc, + output_nc, + ngf=64, + n_blocks=6, + img_size=256, + light=False, + norm_type='instance'): + assert (n_blocks >= 0) + super(ResnetUGATITGenerator, self).__init__() + self.input_nc = input_nc + self.output_nc = output_nc + self.ngf = ngf + self.n_blocks = n_blocks + self.img_size = img_size + self.light = light + + norm_layer = build_norm_layer(norm_type) + DownBlock = [] + DownBlock += [ + nn.Pad2D(padding=[3, 3, 3, 3], mode="reflect"), + nn.Conv2D(input_nc, + ngf, + kernel_size=7, + stride=1, + padding=0, + bias_attr=False), + norm_layer(ngf), + nn.ReLU() + ] + + # Down-Sampling + n_downsampling = 2 + for i in range(n_downsampling): + mult = 2**i + DownBlock += [ + nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"), + nn.Conv2D(ngf * mult, + ngf * mult * 2, + kernel_size=3, + stride=2, + padding=0, + bias_attr=False), + norm_layer(ngf * mult * 2), + nn.ReLU() + ] + + # Down-Sampling Bottleneck + mult = 2**n_downsampling + for i in range(n_blocks): + DownBlock += [ + ResnetBlock(ngf * mult, use_bias=False, norm_layer=norm_layer) + ] + + # Class Activation Map + self.gap_fc = nn.Linear(ngf * mult, 1, bias_attr=False) + self.gmp_fc = nn.Linear(ngf * mult, 1, bias_attr=False) + self.conv1x1 = nn.Conv2D(ngf * mult * 2, + ngf * mult, + kernel_size=1, + stride=1, + bias_attr=True) + self.relu = nn.ReLU() + + # Gamma, Beta block + if self.light: + FC = [ + nn.Linear(ngf * mult, ngf * mult, bias_attr=False), + nn.ReLU(), + nn.Linear(ngf * mult, ngf * mult, bias_attr=False), + nn.ReLU() + ] + else: + FC = [ + nn.Linear(img_size // mult * img_size // mult * ngf * mult, + ngf * mult, + bias_attr=False), + nn.ReLU(), + nn.Linear(ngf * mult, ngf * mult, bias_attr=False), + nn.ReLU() + ] + self.gamma = nn.Linear(ngf * mult, ngf * mult, bias_attr=False) + self.beta = nn.Linear(ngf * mult, ngf * mult, bias_attr=False) + + # Up-Sampling Bottleneck + for i in range(n_blocks): + setattr(self, 'UpBlock1_' + str(i + 1), + ResnetAdaILNBlock(ngf * mult, use_bias=False)) + + # Up-Sampling + UpBlock2 = [] + for i in range(n_downsampling): + mult = 2**(n_downsampling - i) + UpBlock2 += [ + nn.Upsample(scale_factor=2, mode='nearest'), + nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"), + nn.Conv2D(ngf * mult, + int(ngf * mult / 2), + kernel_size=3, + stride=1, + padding=0, + bias_attr=False), + ILN(int(ngf * mult / 2)), + nn.ReLU() + ] + + UpBlock2 += [ + nn.Pad2D(padding=[3, 3, 3, 3], mode="reflect"), + nn.Conv2D(ngf, + output_nc, + kernel_size=7, + stride=1, + padding=0, + bias_attr=False), + nn.Tanh() + ] + + self.DownBlock = nn.Sequential(*DownBlock) + self.FC = nn.Sequential(*FC) + self.UpBlock2 = nn.Sequential(*UpBlock2) + + def forward(self, input): + x = self.DownBlock(input) + + gap = F.adaptive_avg_pool2d(x, 1) + gap_logit = self.gap_fc(gap.reshape([x.shape[0], -1])) + gap_weight = list(self.gap_fc.parameters())[0].transpose([1, 0]) + gap = x * gap_weight.unsqueeze(2).unsqueeze(3) + + gmp = F.adaptive_max_pool2d(x, 1) + gmp_logit = self.gmp_fc(gmp.reshape([x.shape[0], -1])) + gmp_weight = list(self.gmp_fc.parameters())[0].transpose([1, 0]) + gmp = x * gmp_weight.unsqueeze(2).unsqueeze(3) + + cam_logit = paddle.concat([gap_logit, gmp_logit], 1) + x = paddle.concat([gap, gmp], 1) + x = self.relu(self.conv1x1(x)) + + heatmap = paddle.sum(x, axis=1, keepdim=True) + + if self.light: + x_ = F.adaptive_avg_pool2d(x, 1) + x_ = self.FC(x_.reshape([x_.shape[0], -1])) + else: + x_ = self.FC(x.reshape([x.shape[0], -1])) + gamma, beta = self.gamma(x_), self.beta(x_) + + for i in range(self.n_blocks): + x = getattr(self, 'UpBlock1_' + str(i + 1))(x, gamma, beta) + out = self.UpBlock2(x) + + return out, cam_logit, heatmap + + +class ResnetBlock(nn.Layer): + def __init__(self, dim, use_bias, norm_layer): + super(ResnetBlock, self).__init__() + conv_block = [] + conv_block += [ + nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"), + nn.Conv2D(dim, + dim, + kernel_size=3, + stride=1, + padding=0, + bias_attr=use_bias), + norm_layer(dim), + nn.ReLU() + ] + + conv_block += [ + nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"), + nn.Conv2D(dim, + dim, + kernel_size=3, + stride=1, + padding=0, + bias_attr=use_bias), + norm_layer(dim) + ] + + self.conv_block = nn.Sequential(*conv_block) + + def forward(self, x): + out = x + self.conv_block(x) + return out + + +class ResnetAdaILNBlock(nn.Layer): + def __init__(self, dim, use_bias): + super(ResnetAdaILNBlock, self).__init__() + self.pad1 = nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect") + self.conv1 = nn.Conv2D(dim, + dim, + kernel_size=3, + stride=1, + padding=0, + bias_attr=use_bias) + self.norm1 = AdaILN(dim) + self.relu1 = nn.ReLU() + + self.pad2 = nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect") + self.conv2 = nn.Conv2D(dim, + dim, + kernel_size=3, + stride=1, + padding=0, + bias_attr=use_bias) + self.norm2 = AdaILN(dim) + + def forward(self, x, gamma, beta): + out = self.pad1(x) + out = self.conv1(out) + out = self.norm1(out, gamma, beta) + out = self.relu1(out) + out = self.pad2(out) + out = self.conv2(out) + out = self.norm2(out, gamma, beta) + + return out + x + + +class AdaILN(nn.Layer): + def __init__(self, num_features, eps=1e-5): + super(AdaILN, self).__init__() + self.eps = eps + shape = (1, num_features, 1, 1) + + self.rho = self.create_parameter(shape) + self.rho.set_value(paddle.full(shape, 0.9)) + + def forward(self, input, gamma, beta): + in_mean, in_var = paddle.mean(input, [2, 3], + keepdim=True), paddle.var(input, [2, 3], + keepdim=True) + out_in = (input - in_mean) / paddle.sqrt(in_var + self.eps) + ln_mean, ln_var = paddle.mean(input, [1, 2, 3], + keepdim=True), paddle.var(input, + [1, 2, 3], + keepdim=True) + out_ln = (input - ln_mean) / paddle.sqrt(ln_var + self.eps) + + out = self.rho.expand([input.shape[0], -1, -1, -1]) * out_in + ( + 1 - self.rho.expand([input.shape[0], -1, -1, -1])) * out_ln + out = out * gamma.unsqueeze(2).unsqueeze(3) + beta.unsqueeze( + 2).unsqueeze(3) + + return out + + +class ILN(nn.Layer): + def __init__(self, num_features, eps=1e-5): + super(ILN, self).__init__() + self.eps = eps + shape = (1, num_features, 1, 1) + self.rho = self.create_parameter(shape) + self.gamma = self.create_parameter(shape) + self.beta = self.create_parameter(shape) + self.rho.set_value(paddle.full(shape, 0.0)) + self.gamma.set_value(paddle.full(shape, 1.0)) + self.beta.set_value(paddle.full(shape, 0.0)) + + def forward(self, input): + in_mean, in_var = paddle.mean(input, [2, 3], + keepdim=True), paddle.var(input, [2, 3], + keepdim=True) + out_in = (input - in_mean) / paddle.sqrt(in_var + self.eps) + ln_mean, ln_var = paddle.mean(input, [1, 2, 3], + keepdim=True), paddle.var(input, + [1, 2, 3], + keepdim=True) + out_ln = (input - ln_mean) / paddle.sqrt(ln_var + self.eps) + out = self.rho.expand([input.shape[0], -1, -1, -1]) * out_in + ( + 1 - self.rho.expand([input.shape[0], -1, -1, -1])) * out_ln + out = out * self.gamma.expand([input.shape[0], -1, -1, -1 + ]) + self.beta.expand( + [input.shape[0], -1, -1, -1]) + + return out diff --git a/ppgan/models/ugatit_model.py b/ppgan/models/ugatit_model.py new file mode 100644 index 0000000000000000000000000000000000000000..73587e2cbd767856d260e2088b92359c72a20dd1 --- /dev/null +++ b/ppgan/models/ugatit_model.py @@ -0,0 +1,264 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +from .base_model import BaseModel + +from .builder import MODELS +from .generators.builder import build_generator +from .discriminators.builder import build_discriminator +from .losses import GANLoss + +from ..solver import build_optimizer +from ..modules.nn import RhoClipper +from ..modules.init import init_weights +from ..utils.image_pool import ImagePool + + +@MODELS.register() +class UGATITModel(BaseModel): + """ + This class implements the UGATIT model, for learning image-to-image translation without paired data. + + UGATIT paper: https://arxiv.org/pdf/1907.10830.pdf + """ + def __init__(self, cfg): + """Initialize the CycleGAN class. + + Parameters: + opt (config)-- stores all the experiment flags; needs to be a subclass of Dict + """ + super(UGATITModel, self).__init__(cfg) + + # define networks (both Generators and discriminators) + # The naming is different from those used in the paper. + self.nets['genA2B'] = build_generator(cfg.model.generator) + self.nets['genB2A'] = build_generator(cfg.model.generator) + init_weights(self.nets['genA2B']) + init_weights(self.nets['genB2A']) + + if self.is_train: + # define discriminators + self.nets['disGA'] = build_discriminator(cfg.model.discriminator_g) + self.nets['disGB'] = build_discriminator(cfg.model.discriminator_g) + self.nets['disLA'] = build_discriminator(cfg.model.discriminator_l) + self.nets['disLB'] = build_discriminator(cfg.model.discriminator_l) + init_weights(self.nets['disGA']) + init_weights(self.nets['disGB']) + init_weights(self.nets['disLA']) + init_weights(self.nets['disLB']) + + if self.is_train: + # define loss functions + self.BCE_loss = nn.BCEWithLogitsLoss() + self.L1_loss = nn.L1Loss() + self.MSE_loss = nn.MSELoss() + + self.build_lr_scheduler() + self.optimizers['optimizer_G'] = build_optimizer( + cfg.optimizer, + self.lr_scheduler, + parameter_list=self.nets['genA2B'].parameters() + + self.nets['genB2A'].parameters()) + self.optimizers['optimizer_D'] = build_optimizer( + cfg.optimizer, + self.lr_scheduler, + parameter_list=self.nets['disGA'].parameters() + + self.nets['disGB'].parameters() + + self.nets['disLA'].parameters() + + self.nets['disLB'].parameters()) + self.Rho_clipper = RhoClipper(0, 1) + + def set_input(self, input): + """Unpack input data from the dataloader and perform necessary pre-processing steps. + + Args: + input (dict): include the data itself and its metadata information. + + The option 'direction' can be used to swap domain A and domain B. + """ + mode = 'train' if self.is_train else 'test' + AtoB = self.cfg.dataset[mode].direction == 'AtoB' + + if AtoB: + if 'A' in input: + self.real_A = paddle.to_tensor(input['A']) + if 'B' in input: + self.real_B = paddle.to_tensor(input['B']) + else: + if 'B' in input: + self.real_A = paddle.to_tensor(input['B']) + if 'A' in input: + self.real_B = paddle.to_tensor(input['A']) + + if 'A_paths' in input: + self.image_paths = input['A_paths'] + elif 'B_paths' in input: + self.image_paths = input['B_paths'] + + def forward(self): + """Run forward pass; called by both functions and .""" + if hasattr(self, 'real_A'): + self.fake_A2B, _, _ = self.nets['genA2B'](self.real_A) + + # visual + self.visual_items['real_A'] = self.real_A + self.visual_items['fake_A2B'] = self.fake_A2B + + if hasattr(self, 'real_B'): + self.fake_B2A, _, _ = self.nets['genB2A'](self.real_B) + + # visual + self.visual_items['real_B'] = self.real_B + self.visual_items['fake_B2A'] = self.fake_B2A + + def test(self): + """Forward function used in test time. + + This function wraps function in no_grad() so we don't save intermediate steps for backprop + It also calls to produce additional visualization results + """ + self.nets['genA2B'].eval() + self.nets['genB2A'].eval() + with paddle.no_grad(): + self.forward() + self.compute_visuals() + + self.nets['genA2B'].train() + self.nets['genB2A'].train() + + def optimize_parameters(self): + """Calculate losses, gradients, and update network weights; called in every training iteration""" + def _criterion(loss_func, logit, is_real): + if is_real: + target = paddle.ones_like(logit) + else: + target = paddle.zeros_like(logit) + return loss_func(logit, target) + + # forward + # compute fake images and reconstruction images. + self.forward() + + # update D + self.optimizers['optimizer_D'].clear_grad() + real_GA_logit, real_GA_cam_logit, _ = self.nets['disGA'](self.real_A) + real_LA_logit, real_LA_cam_logit, _ = self.nets['disLA'](self.real_A) + real_GB_logit, real_GB_cam_logit, _ = self.nets['disGB'](self.real_B) + real_LB_logit, real_LB_cam_logit, _ = self.nets['disLB'](self.real_B) + + fake_GA_logit, fake_GA_cam_logit, _ = self.nets['disGA'](self.fake_B2A) + fake_LA_logit, fake_LA_cam_logit, _ = self.nets['disLA'](self.fake_B2A) + fake_GB_logit, fake_GB_cam_logit, _ = self.nets['disGB'](self.fake_A2B) + fake_LB_logit, fake_LB_cam_logit, _ = self.nets['disLB'](self.fake_A2B) + + D_ad_loss_GA = _criterion(self.MSE_loss, + real_GA_logit, True) + _criterion( + self.MSE_loss, fake_GA_logit, False) + + D_ad_cam_loss_GA = _criterion( + self.MSE_loss, real_GA_cam_logit, True) + _criterion( + self.MSE_loss, fake_GA_cam_logit, False) + + D_ad_loss_LA = _criterion(self.MSE_loss, + real_LA_logit, True) + _criterion( + self.MSE_loss, fake_LA_logit, False) + + D_ad_cam_loss_LA = _criterion( + self.MSE_loss, real_LA_cam_logit, True) + _criterion( + self.MSE_loss, fake_LA_cam_logit, False) + + D_ad_loss_GB = _criterion(self.MSE_loss, + real_GB_logit, True) + _criterion( + self.MSE_loss, fake_GB_logit, False) + + D_ad_cam_loss_GB = _criterion( + self.MSE_loss, real_GB_cam_logit, True) + _criterion( + self.MSE_loss, fake_GB_cam_logit, False) + + D_ad_loss_LB = _criterion(self.MSE_loss, + real_LB_logit, True) + _criterion( + self.MSE_loss, fake_LB_logit, False) + + D_ad_cam_loss_LB = _criterion( + self.MSE_loss, real_LB_cam_logit, True) + _criterion( + self.MSE_loss, fake_LB_cam_logit, False) + + D_loss_A = self.cfg.adv_weight * (D_ad_loss_GA + D_ad_cam_loss_GA + + D_ad_loss_LA + D_ad_cam_loss_LA) + D_loss_B = self.cfg.adv_weight * (D_ad_loss_GB + D_ad_cam_loss_GB + + D_ad_loss_LB + D_ad_cam_loss_LB) + + Discriminator_loss = D_loss_A + D_loss_B + Discriminator_loss.backward() + self.optimizers['optimizer_D'].step() + + # update G + self.optimizers['optimizer_G'].clear_grad() + + fake_A2B, fake_A2B_cam_logit, _ = self.nets['genA2B'](self.real_A) + fake_B2A, fake_B2A_cam_logit, _ = self.nets['genB2A'](self.real_B) + + fake_A2B2A, _, _ = self.nets['genB2A'](fake_A2B) + fake_B2A2B, _, _ = self.nets['genA2B'](fake_B2A) + + fake_A2A, fake_A2A_cam_logit, _ = self.nets['genB2A'](self.real_A) + fake_B2B, fake_B2B_cam_logit, _ = self.nets['genA2B'](self.real_B) + + fake_GA_logit, fake_GA_cam_logit, _ = self.nets['disGA'](fake_B2A) + fake_LA_logit, fake_LA_cam_logit, _ = self.nets['disLA'](fake_B2A) + fake_GB_logit, fake_GB_cam_logit, _ = self.nets['disGB'](fake_A2B) + fake_LB_logit, fake_LB_cam_logit, _ = self.nets['disLB'](fake_A2B) + + G_ad_loss_GA = _criterion(self.MSE_loss, fake_GA_logit, True) + G_ad_cam_loss_GA = _criterion(self.MSE_loss, fake_GA_cam_logit, True) + G_ad_loss_LA = _criterion(self.MSE_loss, fake_LA_logit, True) + G_ad_cam_loss_LA = _criterion(self.MSE_loss, fake_LA_cam_logit, True) + G_ad_loss_GB = _criterion(self.MSE_loss, fake_GB_logit, True) + G_ad_cam_loss_GB = _criterion(self.MSE_loss, fake_GB_cam_logit, True) + G_ad_loss_LB = _criterion(self.MSE_loss, fake_LB_logit, True) + G_ad_cam_loss_LB = _criterion(self.MSE_loss, fake_LB_cam_logit, True) + + G_recon_loss_A = self.L1_loss(fake_A2B2A, self.real_A) + G_recon_loss_B = self.L1_loss(fake_B2A2B, self.real_B) + + G_identity_loss_A = self.L1_loss(fake_A2A, self.real_A) + G_identity_loss_B = self.L1_loss(fake_B2B, self.real_B) + + G_cam_loss_A = _criterion(self.BCE_loss, + fake_B2A_cam_logit, True) + _criterion( + self.BCE_loss, fake_A2A_cam_logit, False) + + G_cam_loss_B = _criterion(self.BCE_loss, + fake_A2B_cam_logit, True) + _criterion( + self.BCE_loss, fake_B2B_cam_logit, False) + + G_loss_A = self.cfg.adv_weight * ( + G_ad_loss_GA + G_ad_cam_loss_GA + G_ad_loss_LA + G_ad_cam_loss_LA + ) + self.cfg.cycle_weight * G_recon_loss_A + self.cfg.identity_weight * G_identity_loss_A + self.cfg.cam_weight * G_cam_loss_A + G_loss_B = self.cfg.adv_weight * ( + G_ad_loss_GB + G_ad_cam_loss_GB + G_ad_loss_LB + G_ad_cam_loss_LB + ) + self.cfg.cycle_weight * G_recon_loss_B + self.cfg.identity_weight * G_identity_loss_B + self.cfg.cam_weight * G_cam_loss_B + + Generator_loss = G_loss_A + G_loss_B + Generator_loss.backward() + self.optimizers['optimizer_G'].step() + + # clip parameter of AdaILN and ILN, applied after optimizer step + self.nets['genA2B'].apply(self.Rho_clipper) + self.nets['genB2A'].apply(self.Rho_clipper) + + self.losses['discriminator_loss'] = Discriminator_loss + self.losses['generator_loss'] = Generator_loss diff --git a/ppgan/modules/nn.py b/ppgan/modules/nn.py index f96e92cce68af2ddcd7efc02a7fac9767d2f3dfe..d1c98bc2e8ad66030329bf5af033d5a7fc3d0be7 100644 --- a/ppgan/modules/nn.py +++ b/ppgan/modules/nn.py @@ -65,3 +65,17 @@ class Spectralnorm(paddle.nn.Layer): self.layer.weight = weight out = self.layer(x) return out + + +class RhoClipper(object): + def __init__(self, min, max): + self.clip_min = min + self.clip_max = max + assert min < max + + def __call__(self, module): + + if hasattr(module, 'rho'): + w = module.rho + w = w.clip(self.clip_min, self.clip_max) + module.rho.set_value(w) diff --git a/ppgan/modules/utils.py b/ppgan/modules/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3cd10df1f8b65b677283594823db7a916df4ae3a --- /dev/null +++ b/ppgan/modules/utils.py @@ -0,0 +1,147 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import numpy as np + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from .init import normal_ + + +class SpectralNorm(object): + def __init__(self, name='weight', n_power_iterations=1, dim=0, eps=1e-12): + self.name = name + self.dim = dim + if n_power_iterations <= 0: + raise ValueError( + 'Expected n_power_iterations to be positive, but ' + 'got n_power_iterations={}'.format(n_power_iterations)) + self.n_power_iterations = n_power_iterations + self.eps = eps + + def reshape_weight_to_matrix(self, weight): + weight_mat = weight + if self.dim != 0: + # transpose dim to front + weight_mat = weight_mat.transpose([ + self.dim, + *[d for d in range(weight_mat.dim()) if d != self.dim] + ]) + + height = weight_mat.shape[0] + + return weight_mat.reshape([height, -1]) + + def compute_weight(self, layer, do_power_iteration): + weight = getattr(layer, self.name + '_orig') + u = getattr(layer, self.name + '_u') + v = getattr(layer, self.name + '_v') + weight_mat = self.reshape_weight_to_matrix(weight) + + if do_power_iteration: + with paddle.no_grad(): + for _ in range(self.n_power_iterations): + v.set_value( + F.normalize( + paddle.matmul(weight_mat, + u, + transpose_x=True, + transpose_y=False), + axis=0, + epsilon=self.eps, + )) + + u.set_value( + F.normalize( + paddle.matmul(weight_mat, v), + axis=0, + epsilon=self.eps, + )) + if self.n_power_iterations > 0: + u = u.clone() + v = v.clone() + + sigma = paddle.dot(u, paddle.mv(weight_mat, v)) + weight = weight / sigma + return weight + + def remove(self, layer): + with paddle.no_grad(): + weight = self.compute_weight(layer, do_power_iteration=False) + delattr(layer, self.name) + delattr(layer, self.name + '_u') + delattr(layer, self.name + '_v') + delattr(layer, self.name + '_orig') + + layer.add_parameter(self.name, weight.detach()) + + def __call__(self, layer, inputs): + setattr(layer, self.name, + self.compute_weight(layer, do_power_iteration=layer.training)) + + @staticmethod + def apply(layer, name, n_power_iterations, dim, eps): + for k, hook in layer._forward_pre_hooks.items(): + if isinstance(hook, SpectralNorm) and hook.name == name: + raise RuntimeError("Cannot register two spectral_norm hooks on " + "the same parameter {}".format(name)) + + fn = SpectralNorm(name, n_power_iterations, dim, eps) + weight = layer._parameters[name] + + with paddle.no_grad(): + weight_mat = fn.reshape_weight_to_matrix(weight) + h, w = weight_mat.shape + + # randomly initialize u and v + u = layer.create_parameter([h]) + u = normal_(u, 0., 1.) + v = layer.create_parameter([w]) + v = normal_(v, 0., 1.) + u = F.normalize(u, axis=0, epsilon=fn.eps) + v = F.normalize(v, axis=0, epsilon=fn.eps) + + # delete fn.name form parameters, otherwise you can not set attribute + del layer._parameters[fn.name] + layer.add_parameter(fn.name + "_orig", weight) + # still need to assign weight back as fn.name because all sorts of + # things may assume that it exists, e.g., when initializing weights. + # However, we can't directly assign as it could be an Parameter and + # gets added as a parameter. Instead, we register weight * 1.0 as a plain + # attribute. + setattr(layer, fn.name, weight * 1.0) + layer.register_buffer(fn.name + "_u", u) + layer.register_buffer(fn.name + "_v", v) + + layer.register_forward_pre_hook(fn) + return fn + + +def spectral_norm(layer, + name='weight', + n_power_iterations=1, + eps=1e-12, + dim=None): + + if dim is None: + if isinstance(layer, (nn.Conv1DTranspose, nn.Conv2DTranspose, + nn.Conv3DTranspose, nn.Linear)): + dim = 1 + else: + dim = 0 + SpectralNorm.apply(layer, name, n_power_iterations, dim, eps) + return layer