diff --git a/applications/DAIN/my_args.py b/applications/DAIN/my_args.py index 7a5df29b04f6c4d4aeb3b61dcb19a10c5c803b88..448c3c2215c4a5fcba61bcb79ca242d3a3db1b18 100644 --- a/applications/DAIN/my_args.py +++ b/applications/DAIN/my_args.py @@ -90,4 +90,7 @@ parser.add_argument('--use_cuda', type=bool, help='use cuda or not') parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not') - +parser.add_argument('--remove_duplicates', + default=True, + type=bool, + help='remove duplicate frames or not') diff --git a/applications/DAIN/predict.py b/applications/DAIN/predict.py index f43a794150d52e452fdb7d5c3aadcc93cbbda3da..39290433b2b0cfbc2bfcce18d800740987c1d131 100644 --- a/applications/DAIN/predict.py +++ b/applications/DAIN/predict.py @@ -80,7 +80,8 @@ class VideoFrameInterp(object): video_path, use_gpu=True, key_frame_thread=0., - output_path='output'): + output_path='output', + remove_duplicates=True): self.video_path = video_path self.output_path = os.path.join(output_path, 'DAIN') if model_path is None: @@ -138,6 +139,8 @@ class VideoFrameInterp(object): end = time.time() frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) + if remove_duplicates: + frames = remove_duplicates(out_path) img = imread(frames[0]) @@ -199,58 +202,51 @@ class VideoFrameInterp(object): X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255 X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255 - if key_frame: - y_ = [ - np.transpose(255.0 * X0.clip(0, 1.0), (1, 2, 0)) - for i in range(num_frames) - ] - else: - assert (X0.shape[1] == X1.shape[1]) - assert (X0.shape[2] == X1.shape[2]) - - X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \ - (padding_left, padding_right)), mode='edge') - X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \ - (padding_left, padding_right)), mode='edge') - - X0 = np.expand_dims(X0, axis=0) - X1 = np.expand_dims(X1, axis=0) - - X0 = np.expand_dims(X0, axis=0) - X1 = np.expand_dims(X1, axis=0) - - X = np.concatenate((X0, X1), axis=0) - - proc_end = time.time() - o = self.exe.run(self.program, - fetch_list=self.fetch_targets, - feed={"image": X}) - - y_ = o[0] - - proc_timer.update(time.time() - proc_end) - tot_timer.update(time.time() - end) - end = time.time() - - y_ = [ - np.transpose( - 255.0 * item.clip( - 0, 1.0)[0, :, - padding_top:padding_top + int_height, - padding_left:padding_left + int_width], - (1, 2, 0)) for item in y_ - ] - time_offsets = [ - kk * timestep for kk in range(1, 1 + num_frames, 1) - ] - - count = 1 - for item, time_offset in zip(y_, time_offsets): - out_dir = os.path.join( - frame_path_interpolated, vidname, - "{:0>6d}_{:0>4d}.png".format(i, count)) - count = count + 1 - imsave(out_dir, np.round(item).astype(np.uint8)) + assert (X0.shape[1] == X1.shape[1]) + assert (X0.shape[2] == X1.shape[2]) + + X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \ + (padding_left, padding_right)), mode='edge') + X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \ + (padding_left, padding_right)), mode='edge') + + X0 = np.expand_dims(X0, axis=0) + X1 = np.expand_dims(X1, axis=0) + + X0 = np.expand_dims(X0, axis=0) + X1 = np.expand_dims(X1, axis=0) + + X = np.concatenate((X0, X1), axis=0) + + proc_end = time.time() + o = self.exe.run(self.program, + fetch_list=self.fetch_targets, + feed={"image": X}) + + y_ = o[0] + + proc_timer.update(time.time() - proc_end) + tot_timer.update(time.time() - end) + end = time.time() + + y_ = [ + np.transpose( + 255.0 * item.clip( + 0, 1.0)[0, :, padding_top:padding_top + int_height, + padding_left:padding_left + int_width], + (1, 2, 0)) for item in y_ + ] + time_offsets = [ + kk * timestep for kk in range(1, 1 + num_frames, 1) + ] + + count = 1 + for item, time_offset in zip(y_, time_offsets): + out_dir = os.path.join( + frame_path_interpolated, vidname, + "{:0>6d}_{:0>4d}.png".format(i, count)) + count = count + 1 + imsave(out_dir, np.round(item).astype(np.uint8)) num_frames = int(1.0 / timestep) - 1 @@ -266,14 +262,16 @@ class VideoFrameInterp(object): vidname + '.mp4') if os.path.exists(video_pattern_output): os.remove(video_pattern_output) - frames2video(frame_pattern_combined, video_pattern_output, - r2) + frames2video(frame_pattern_combined, video_pattern_output, r2) return frame_pattern_combined, video_pattern_output if __name__ == '__main__': args = parser.parse_args() - predictor = VideoFrameInterp(args.time_step, args.saved_model, - args.video_path, args.output_path) + predictor = VideoFrameInterp(args.time_step, + args.saved_model, + args.video_path, + args.output_path, + remove_duplicates=args.remove_duplicates) predictor.run() diff --git a/applications/DAIN/util.py b/applications/DAIN/util.py index 24ea2741517660581c12d8b174e3e8af03ae9a8e..dc343ff20668f0b9db3a426a3789943ea4e28cb5 100644 --- a/applications/DAIN/util.py +++ b/applications/DAIN/util.py @@ -1,6 +1,7 @@ import os, sys import glob import shutil +import cv2 class AverageMeter(object): @@ -44,3 +45,34 @@ def combine_frames(input, interpolated, combined, num_frames): except Exception as e: print(e) print(len(frames2), num_frames, i, k, i * num_frames + k) + + +def remove_duplicates(paths): + def dhash(image, hash_size=8): + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + resized = cv2.resize(gray, (hash_size + 1, hash_size)) + diff = resized[:, 1:] > resized[:, :-1] + return sum([2**i for (i, v) in enumerate(diff.flatten()) if v]) + + hashes = {} + image_paths = sorted(glob.glob(os.path.join(paths, '*.png'))) + for image_path in image_paths: + image = cv2.imread(image_path) + h = dhash(image) + p = hashes.get(h, []) + p.append(image_path) + hashes[h] = p + + for (h, hashed_paths) in hashes.items(): + if len(hashed_paths) > 1: + for p in hashed_paths[1:]: + os.remove(p) + + frames = sorted(glob.glob(os.path.join(paths, '*.png'))) + for fid, frame in enumerate(frames): + new_name = '{:08d}'.format(fid) + '.png' + new_name = os.path.join(paths, new_name) + os.rename(frame, new_name) + + frames = sorted(glob.glob(os.path.join(paths, '*.png'))) + return frames diff --git a/ppgan/datasets/builder.py b/ppgan/datasets/builder.py index 62b5346795c1383d683926e46064b97ea8a14aee..7c0b02bfeed825554bd9756e84b91b1d05d96d1a 100644 --- a/ppgan/datasets/builder.py +++ b/ppgan/datasets/builder.py @@ -56,8 +56,8 @@ class DictDataLoader(): self.dataset = DictDataset(dataset) - place = paddle.fluid.CUDAPlace(ParallelEnv().dev_id) \ - if ParallelEnv().nranks > 1 else paddle.fluid.CUDAPlace(0) + place = paddle.CUDAPlace(ParallelEnv().dev_id) \ + if ParallelEnv().nranks > 1 else paddle.CUDAPlace(0) sampler = DistributedBatchSampler(self.dataset, batch_size=batch_size, diff --git a/ppgan/engine/trainer.py b/ppgan/engine/trainer.py index 650aab765e4841e0a4e62d8bb6f6404a37b69d66..fb3de0bb2655eb0e9505704e7622f1d6e7ebba3a 100644 --- a/ppgan/engine/trainer.py +++ b/ppgan/engine/trainer.py @@ -11,8 +11,10 @@ from ..datasets.builder import build_dataloader from ..models.builder import build_model from ..utils.visual import tensor2img, save_image from ..utils.filesystem import save, load, makedirs +from ..utils.timer import TimeAverager from ..metric.psnr_ssim import calculate_psnr, calculate_ssim + class Trainer: def __init__(self, cfg): @@ -51,7 +53,6 @@ class Trainer: self.time_count = {} self.best_metric = {} - def distributed_data_parallel(self): strategy = paddle.distributed.prepare_context() for name in self.model.model_names: @@ -61,29 +62,37 @@ class Trainer: paddle.DataParallel(net, strategy)) def train(self): + reader_cost_averager = TimeAverager() + batch_cost_averager = TimeAverager() for epoch in range(self.start_epoch, self.epochs): self.current_epoch = epoch start_time = step_start_time = time.time() for i, data in enumerate(self.train_dataloader): - data_time = time.time() + reader_cost_averager.record(time.time() - step_start_time) + self.batch_id = i # unpack data from dataset and apply preprocessing # data input should be dict self.model.set_input(data) self.model.optimize_parameters() - self.data_time = data_time - step_start_time - self.step_time = time.time() - step_start_time + batch_cost_averager.record(time.time() - step_start_time) if i % self.log_interval == 0: + self.data_time = reader_cost_averager.get_average() + self.step_time = batch_cost_averager.get_average() self.print_log() + reader_cost_averager.reset() + batch_cost_averager.reset() + if i % self.visual_interval == 0: self.visual('visual_train') step_start_time = time.time() - self.logger.info('train one epoch time: {}'.format(time.time() - - start_time)) + + self.logger.info( + 'train one epoch time: {}'.format(time.time() - start_time)) if self.validate_interval > -1 and epoch % self.validate_interval: self.validate() self.model.lr_scheduler.step() @@ -93,7 +102,8 @@ class Trainer: def validate(self): if not hasattr(self, 'val_dataloader'): - self.val_dataloader = build_dataloader(self.cfg.dataset.val, is_train=False) + self.val_dataloader = build_dataloader( + self.cfg.dataset.val, is_train=False) metric_result = {} @@ -106,7 +116,7 @@ class Trainer: visual_results = {} current_paths = self.model.get_image_paths() current_visuals = self.model.get_current_visuals() - + for j in range(len(current_paths)): short_path = os.path.basename(current_paths[j]) basename = os.path.splitext(short_path)[0] @@ -115,31 +125,43 @@ class Trainer: visual_results.update({name: img_tensor[j]}) if 'psnr' in self.cfg.validate.metrics: if 'psnr' not in metric_result: - metric_result['psnr'] = calculate_psnr(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.psnr) + metric_result['psnr'] = calculate_psnr( + tensor2img(current_visuals['output'][j], (0., 1.)), + tensor2img(current_visuals['gt'][j], (0., 1.)), + **self.cfg.validate.metrics.psnr) else: - metric_result['psnr'] += calculate_psnr(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.psnr) + metric_result['psnr'] += calculate_psnr( + tensor2img(current_visuals['output'][j], (0., 1.)), + tensor2img(current_visuals['gt'][j], (0., 1.)), + **self.cfg.validate.metrics.psnr) if 'ssim' in self.cfg.validate.metrics: if 'ssim' not in metric_result: - metric_result['ssim'] = calculate_ssim(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.ssim) + metric_result['ssim'] = calculate_ssim( + tensor2img(current_visuals['output'][j], (0., 1.)), + tensor2img(current_visuals['gt'][j], (0., 1.)), + **self.cfg.validate.metrics.ssim) else: - metric_result['ssim'] += calculate_ssim(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.ssim) - + metric_result['ssim'] += calculate_ssim( + tensor2img(current_visuals['output'][j], (0., 1.)), + tensor2img(current_visuals['gt'][j], (0., 1.)), + **self.cfg.validate.metrics.ssim) + self.visual('visual_val', visual_results=visual_results) if i % self.log_interval == 0: - self.logger.info('val iter: [%d/%d]' % - (i, len(self.val_dataloader))) - + self.logger.info( + 'val iter: [%d/%d]' % (i, len(self.val_dataloader))) + for metric_name in metric_result.keys(): metric_result[metric_name] /= len(self.val_dataloader.dataset) - self.logger.info('Epoch {} validate end: {}'.format(self.current_epoch, metric_result)) - + self.logger.info('Epoch {} validate end: {}'.format( + self.current_epoch, metric_result)) def test(self): if not hasattr(self, 'test_dataloader'): - self.test_dataloader = build_dataloader(self.cfg.dataset.test, - is_train=False) + self.test_dataloader = build_dataloader( + self.cfg.dataset.test, is_train=False) # data[0]: img, data[1]: img path index # test batch size must be 1 @@ -163,8 +185,8 @@ class Trainer: self.visual('visual_test', visual_results=visual_results) if i % self.log_interval == 0: - self.logger.info('Test iter: [%d/%d]' % - (i, len(self.test_dataloader))) + self.logger.info( + 'Test iter: [%d/%d]' % (i, len(self.test_dataloader))) def print_log(self): losses = self.model.get_current_losses() @@ -266,6 +288,7 @@ class Trainer: for name in self.model.model_names: if isinstance(name, str): - self.logger.info('laod model {} {} params!'.format(self.cfg.model.name, 'net' + name)) + self.logger.info('laod model {} {} params!'.format( + self.cfg.model.name, 'net' + name)) net = getattr(self.model, 'net' + name) net.set_dict(state_dicts['net' + name]) diff --git a/ppgan/metric/README.md b/ppgan/metric/README.md index d27e99d639bfed7b63ff90fdd1d54e12b45f78e0..08fe7e700a48b8e8a9ad3053d03138498c1f5b61 100644 --- a/ppgan/metric/README.md +++ b/ppgan/metric/README.md @@ -8,3 +8,12 @@ wget https://paddlegan.bj.bcebos.com/InceptionV3.pdparams ``` python test_fid_score.py --image_data_path1 /path/to/dataset1 --image_data_path2 /path/to/dataset2 --inference_model ./InceptionV3.pdparams ``` + +### Inception-V3 weights converted from torchvision + +Download: https://aistudio.baidu.com/aistudio/datasetdetail/51890 + +This model weights file is converted from official torchvision inception-v3 model. And both BigGAN and StarGAN-v2 is using it to calculate FID score. + +Note that this model weights is different from above one (which is converted from tensorflow unofficial version) + diff --git a/ppgan/metric/compute_fid.py b/ppgan/metric/compute_fid.py index c8fc8059e2658768b1f07436f2bca6e08446014c..3e1d013ed2dfcc75ec034e30c886a6f3c2619efd 100644 --- a/ppgan/metric/compute_fid.py +++ b/ppgan/metric/compute_fid.py @@ -16,15 +16,18 @@ import os import fnmatch import numpy as np import cv2 +from PIL import Image from cv2 import imread from scipy import linalg import paddle.fluid as fluid from inception import InceptionV3 from paddle.fluid.dygraph.base import to_variable - -def tqdm(x): - return x +try: + from tqdm import tqdm +except: + def tqdm(x): + return x """ based on https://github.com/mit-han-lab/gan-compression/blob/master/metric/fid_score.py @@ -128,7 +131,7 @@ def calculate_fid_given_img(img_fake, return fid_value -def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path): +def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path, style=None): if len(files) % batch_size != 0: print(('Warning: number of images is not a multiple of the ' 'batch size. Some samples are going to be ignored.')) @@ -144,8 +147,23 @@ def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path): for i in tqdm(range(n_batches)): start = i * batch_size end = start + batch_size - images = np.array( - [imread(str(f)).astype(np.float32) for f in files[start:end]]) + + # same as stargan-v2 official implementation: resize to 256 first, then resize to 299 + if style == 'stargan': + img_list = [] + for f in files[start:end]: + im = Image.open(str(f)).convert('RGB') + if im.size[0] != 299: + im = im.resize((256, 256), 2) + im = im.resize((299, 299), 2) + + img_list.append(np.array(im).astype('float32')) + + images = np.array( + img_list) + else: + images = np.array( + [imread(str(f)).astype(np.float32) for f in files[start:end]]) if len(images.shape) != 4: images = imread(str(files[start])) @@ -155,33 +173,53 @@ def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path): images = images.transpose((0, 3, 1, 2)) images /= 255 - images = to_variable(images) - param_dict, _ = fluid.load_dygraph(premodel_path) - model.set_dict(param_dict) - model.eval() + # imagenet normalization + if style == 'stargan': + mean = np.array([0.485, 0.456, 0.406]).astype('float32') + std = np.array([0.229, 0.224, 0.225]).astype('float32') + images[:] = (images[:] - mean[:, None, None]) / std[:, None, None] - pred = model(images)[0][0].numpy() + if style=='stargan': + pred_arr[start:end] = inception_infer(images, premodel_path) + else: + with fluid.dygraph.guard(): + images = to_variable(images) + param_dict, _ = fluid.load_dygraph(premodel_path) + model.set_dict(param_dict) + model.eval() - pred_arr[start:end] = pred.reshape(end - start, -1) + pred = model(images)[0][0].numpy() + + pred_arr[start:end] = pred.reshape(end - start, -1) return pred_arr +def inception_infer(x, model_path): + exe = fluid.Executor() + [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(model_path, exe) + results = exe.run(inference_program, + feed={feed_target_names[0]: x}, + fetch_list=fetch_targets) + return results[0] + + def _calculate_activation_statistics(files, model, premodel_path, batch_size=50, dims=2048, - use_gpu=False): + use_gpu=False, + style = None): act = _get_activations(files, model, batch_size, dims, use_gpu, - premodel_path) + premodel_path, style) mu = np.mean(act, axis=0) sigma = np.cov(act, rowvar=False) return mu, sigma def _compute_statistics_of_path(path, model, batch_size, dims, use_gpu, - premodel_path): + premodel_path, style=None): if path.endswith('.npz'): f = np.load(path) m, s = f['mu'][:], f['sigma'][:] @@ -193,7 +231,7 @@ def _compute_statistics_of_path(path, model, batch_size, dims, use_gpu, filenames, '*.jpg') or fnmatch.filter(filenames, '*.png'): files.append(os.path.join(root, filename)) m, s = _calculate_activation_statistics(files, model, premodel_path, - batch_size, dims, use_gpu) + batch_size, dims, use_gpu, style) return m, s @@ -202,7 +240,8 @@ def calculate_fid_given_paths(paths, batch_size, use_gpu, dims, - model=None): + model=None, + style = None): assert os.path.exists( premodel_path ), 'pretrain_model path {} is not exists! Please download it first'.format( @@ -211,14 +250,15 @@ def calculate_fid_given_paths(paths, if not os.path.exists(p): raise RuntimeError('Invalid path: %s' % p) - if model is None: - block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims] - model = InceptionV3([block_idx], class_dim=1008) + if model is None and style != 'stargan': + with fluid.dygraph.guard(): + block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims] + model = InceptionV3([block_idx], class_dim=1008) m1, s1 = _compute_statistics_of_path(paths[0], model, batch_size, dims, - use_gpu, premodel_path) + use_gpu, premodel_path, style) m2, s2 = _compute_statistics_of_path(paths[1], model, batch_size, dims, - use_gpu, premodel_path) + use_gpu, premodel_path, style) fid_value = _calculate_frechet_distance(m1, s1, m2, s2) return fid_value diff --git a/ppgan/metric/test_fid_score.py b/ppgan/metric/test_fid_score.py index e8abccaaf3e8c4bda5a5c51e7621014b12a0664d..36412a55104085154dc7dac3b7d923a369ceab07 100644 --- a/ppgan/metric/test_fid_score.py +++ b/ppgan/metric/test_fid_score.py @@ -38,6 +38,9 @@ def parse_args(): type=int, default=1, help='sample number in a batch for inference.') + parser.add_argument('--style', + type=str, + help='calculation style: stargan or default (gan-compression style)') args = parser.parse_args() return args @@ -50,10 +53,9 @@ def main(): inference_model_path = args.inference_model batch_size = args.batch_size - with fluid.dygraph.guard(): - fid_value = calculate_fid_given_paths(paths, inference_model_path, - batch_size, args.use_gpu, 2048) - print('FID: ', fid_value) + fid_value = calculate_fid_given_paths(paths, inference_model_path, + batch_size, args.use_gpu, 2048, style=args.style) + print('FID: ', fid_value) if __name__ == "__main__": diff --git a/ppgan/models/backbones/resnet_backbone.py b/ppgan/models/backbones/resnet_backbone.py index 6c5195a0ea90961eba5c8939ab376af539d2e79d..d198f2d050fb0214b0d4e69bc7e2b5838940b51d 100644 --- a/ppgan/models/backbones/resnet_backbone.py +++ b/ppgan/models/backbones/resnet_backbone.py @@ -1,18 +1,22 @@ import paddle import paddle.nn as nn - -__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', - 'resnet152'] +__all__ = [ + 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152' +] def conv3x3(in_planes, out_planes, stride=1): "3x3 convolution with padding" - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, - padding=1, bias_attr=False) + return nn.Conv2d(in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + bias_attr=False) -class BasicBlock(paddle.fluid.Layer): +class BasicBlock(nn.Layer): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): @@ -44,17 +48,24 @@ class BasicBlock(paddle.fluid.Layer): return out -class Bottleneck(paddle.fluid.Layer): +class Bottleneck(nn.Layer): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias_attr=False) self.bn1 = nn.BatchNorm(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=1, bias_attr=False) + self.conv2 = nn.Conv2d(planes, + planes, + kernel_size=3, + stride=stride, + padding=1, + bias_attr=False) self.bn2 = nn.BatchNorm(planes) - self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias_attr=False) + self.conv3 = nn.Conv2d(planes, + planes * 4, + kernel_size=1, + bias_attr=False) self.bn3 = nn.BatchNorm(planes * 4) self.relu = nn.ReLU() self.downsample = downsample @@ -82,12 +93,15 @@ class Bottleneck(paddle.fluid.Layer): return out -class ResNet(paddle.fluid.Layer): - +class ResNet(nn.Layer): def __init__(self, block, layers, num_classes=1000): self.inplanes = 64 super(ResNet, self).__init__() - self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + self.conv1 = nn.Conv2d(3, + 64, + kernel_size=7, + stride=2, + padding=3, bias_attr=False) self.bn1 = nn.BatchNorm(64) self.relu = nn.ReLU() @@ -103,8 +117,11 @@ class ResNet(paddle.fluid.Layer): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias_attr=False), + nn.Conv2d(self.inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias_attr=False), nn.BatchNorm(planes * block.expansion), ) diff --git a/ppgan/models/cycle_gan_model.py b/ppgan/models/cycle_gan_model.py index c6afecdea1a921ec7edb434535b013d55341a7f1..65247a74e759c91c9ed9ae03b53e13d792b8f63c 100644 --- a/ppgan/models/cycle_gan_model.py +++ b/ppgan/models/cycle_gan_model.py @@ -8,6 +8,7 @@ from .discriminators.builder import build_discriminator from .losses import GANLoss from ..solver import build_optimizer +from ..modules.init import init_weights from ..utils.image_pool import ImagePool @@ -56,10 +57,14 @@ class CycleGANModel(BaseModel): # Code (vs. paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X) self.netG_A = build_generator(opt.model.generator) self.netG_B = build_generator(opt.model.generator) + init_weights(self.netG_A) + init_weights(self.netG_B) if self.isTrain: # define discriminators self.netD_A = build_discriminator(opt.model.discriminator) self.netD_B = build_discriminator(opt.model.discriminator) + init_weights(self.netD_A) + init_weights(self.netD_B) if self.isTrain: if opt.lambda_identity > 0.0: # only works when input and output images have the same number of channels diff --git a/ppgan/models/discriminators/nlayers.py b/ppgan/models/discriminators/nlayers.py index 55e01a27ea5813c5362e40ca374ad379732f8ad4..9a718115e4c41f491ef92900e76c50d15237f4c4 100644 --- a/ppgan/models/discriminators/nlayers.py +++ b/ppgan/models/discriminators/nlayers.py @@ -1,18 +1,16 @@ -import paddle import functools import numpy as np -import paddle.nn as nn -from ...modules.nn import ReflectionPad2d, LeakyReLU, Dropout, BCEWithLogitsLoss, Pad2D, MSELoss +import paddle +import paddle.nn as nn from ...modules.norm import build_norm_layer from .builder import DISCRIMINATORS @DISCRIMINATORS.register() -class NLayerDiscriminator(paddle.fluid.dygraph.Layer): +class NLayerDiscriminator(nn.Layer): """Defines a PatchGAN discriminator""" - def __init__(self, input_nc, ndf=64, n_layers=3, norm_type='instance'): """Construct a PatchGAN discriminator @@ -24,36 +22,51 @@ class NLayerDiscriminator(paddle.fluid.dygraph.Layer): """ super(NLayerDiscriminator, self).__init__() norm_layer = build_norm_layer(norm_type) - if type(norm_layer) == functools.partial: + if type(norm_layer) == functools.partial: use_bias = norm_layer.func == nn.InstanceNorm else: use_bias = norm_layer == nn.InstanceNorm - + kw = 4 padw = 1 - sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), LeakyReLU(0.2, True)] + sequence = [ + nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), + nn.LeakyReLU(0.2) + ] nf_mult = 1 nf_mult_prev = 1 - for n in range(1, n_layers): + for n in range(1, n_layers): nf_mult_prev = nf_mult - nf_mult = min(2 ** n, 8) + nf_mult = min(2**n, 8) sequence += [ - nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias_attr=use_bias), + nn.Conv2d(ndf * nf_mult_prev, + ndf * nf_mult, + kernel_size=kw, + stride=2, + padding=padw, + bias_attr=use_bias), norm_layer(ndf * nf_mult), - LeakyReLU(0.2, True) + nn.LeakyReLU(0.2) ] nf_mult_prev = nf_mult - nf_mult = min(2 ** n_layers, 8) + nf_mult = min(2**n_layers, 8) sequence += [ - nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias_attr=use_bias), + nn.Conv2d(ndf * nf_mult_prev, + ndf * nf_mult, + kernel_size=kw, + stride=1, + padding=padw, + bias_attr=use_bias), norm_layer(ndf * nf_mult), - LeakyReLU(0.2, True) + nn.LeakyReLU(0.2) ] - sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)] + sequence += [ + nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw) + ] self.model = nn.Sequential(*sequence) def forward(self, input): """Standard forward.""" - return self.model(input) \ No newline at end of file + return self.model(input) diff --git a/ppgan/models/generators/deoldify.py b/ppgan/models/generators/deoldify.py index b7f875364dee3edfedf98c4a9bf89c0a50dd5ad9..a02c4272fc86f932947962348921260b5518d453 100644 --- a/ppgan/models/generators/deoldify.py +++ b/ppgan/models/generators/deoldify.py @@ -432,8 +432,7 @@ class SelfAttention(nn.Layer): self.key = conv1d(n_channels, n_channels // 8) self.value = conv1d(n_channels, n_channels) self.gamma = self.create_parameter( - shape=[1], - default_initializer=paddle.fluid.initializer.Constant( + shape=[1], default_initializer=paddle.nn.initializer.Constant( 0.0)) #nn.Parameter(tensor([0.])) def forward(self, x): diff --git a/ppgan/models/generators/mobile_resnet.py b/ppgan/models/generators/mobile_resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..61833a0490f9db1430f7d7e5d393600d85a61104 --- /dev/null +++ b/ppgan/models/generators/mobile_resnet.py @@ -0,0 +1,202 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import paddle +import paddle.nn as nn +import functools +from ...modules.norm import build_norm_layer +from .builder import GENERATORS + +@GENERATORS.register() +class MobileResnetGenerator(nn.Layer): + def __init__(self, + input_channel, + output_nc, + ngf=64, + norm_type='instance', + use_dropout=False, + n_blocks=9, + padding_type='reflect'): + super(MobileResnetGenerator, self).__init__() + + norm_layer = build_norm_layer(norm_type) + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == InstanceNorm + else: + use_bias = norm_layer == InstanceNorm + + self.model = nn.LayerList([ + nn.ReflectionPad2d([3, 3, 3, 3]), + nn.Conv2d( + input_channel, + int(ngf), + kernel_size=7, + padding=0, + bias_attr=use_bias), norm_layer(ngf), nn.ReLU() + ]) + + n_downsampling = 2 + for i in range(n_downsampling): + mult = 2**i + self.model.extend([ + nn.Conv2d( + ngf * mult, + ngf * mult * 2, + kernel_size=3, + stride=2, + padding=1, + bias_attr=use_bias), norm_layer(ngf * mult * 2), nn.ReLU() + ]) + + mult = 2**n_downsampling + + for i in range(n_blocks): + self.model.extend([ + MobileResnetBlock( + ngf * mult, + ngf * mult, + padding_type=padding_type, + norm_layer=norm_layer, + use_dropout=use_dropout, + use_bias=use_bias) + ]) + + + for i in range(n_downsampling): + mult = 2**(n_downsampling - i) + output_size = (i + 1) * 128 + self.model.extend([ + nn.ConvTranspose2d( + ngf * mult, + int(ngf * mult / 2), + kernel_size=3, + stride=2, + padding=1, + output_padding=1, + bias_attr=use_bias), norm_layer(int(ngf * mult / 2)), + nn.ReLU() + ]) + + self.model.extend([nn.ReflectionPad2d([3, 3, 3, 3])]) + self.model.extend([nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]) + self.model.extend([nn.Tanh()]) + + def forward(self, inputs): + y = inputs + for sublayer in self.model: + y = sublayer(y) + return y + + +class MobileResnetBlock(nn.Layer): + def __init__(self, in_c, out_c, padding_type, norm_layer, use_dropout, + use_bias): + super(MobileResnetBlock, self).__init__() + self.padding_type = padding_type + self.use_dropout = use_dropout + self.conv_block = nn.LayerList([]) + + p = 0 + if self.padding_type == 'reflect': + self.conv_block.extend([nn.ReflectionPad2d([1, 1, 1, 1])]) + elif self.padding_type == 'replicate': + self.conv_block.extend([nn.ReplicationPad2d([1, 1, 1, 1])]) + elif self.padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % + self.padding_type) + + self.conv_block.extend([ + SeparableConv2D( + num_channels=in_c, + num_filters=out_c, + filter_size=3, + padding=p, + stride=1), norm_layer(out_c), nn.ReLU() + ]) + + self.conv_block.extend([nn.Dropout(0.5)]) + + if self.padding_type == 'reflect': + self.conv_block.extend([nn.ReflectionPad2d([1, 1, 1, 1])]) + elif self.padding_type == 'replicate': + self.conv_block.extend([nn.ReplicationPad2d([1, 1, 1, 1])]) + elif self.padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % + self.padding_type) + + self.conv_block.extend([ + SeparableConv2D( + num_channels=out_c, + num_filters=in_c, + filter_size=3, + padding=p, + stride=1), norm_layer(in_c) + ]) + + def forward(self, inputs): + y = inputs + for sublayer in self.conv_block: + y = sublayer(y) + out = inputs + y + return out + +class SeparableConv2D(nn.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + padding=0, + norm_layer=InstanceNorm, + use_bias=True, + scale_factor=1, + stddev=0.02): + super(SeparableConv2D, self).__init__() + + self.conv = nn.LayerList([ + nn.Conv2d( + in_channels=num_channels, + out_channels=num_channels * scale_factor, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_channels, + weight_attr=paddle.ParamAttr( + initializer=nn.initializer.Normal( + loc=0.0, scale=stddev)), + bias_attr=use_bias) + ]) + + self.conv.extend([norm_layer(num_channels * scale_factor)]) + + self.conv.extend([ + nn.Conv2d( + in_channels=num_channels * scale_factor, + out_channels=num_filters, + kernel_size=1, + stride=1, + weight_attr=paddle.ParamAttr( + initializer=nn.initializer.Normal( + loc=0.0, scale=stddev)), + bias_attr=use_bias) + ]) + + def forward(self, inputs): + for sublayer in self.conv: + inputs = sublayer(inputs) + return inputs + diff --git a/ppgan/models/generators/remaster.py b/ppgan/models/generators/remaster.py index 8bd84536bafe01c752d34055e5e441d6ce21b5f5..7714f3093a1916f118ce967652c0046a8eaf55e0 100644 --- a/ppgan/models/generators/remaster.py +++ b/ppgan/models/generators/remaster.py @@ -2,43 +2,79 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F + class TempConv(nn.Layer): - def __init__(self, in_planes, out_planes, kernel_size=(1,3,3), stride=(1,1,1), padding=(0,1,1) ): - super(TempConv, self).__init__() - self.conv3d = nn.Conv3d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding) - self.bn = nn.BatchNorm( out_planes ) - def forward(self, x): - return F.elu( self.bn(self.conv3d(x))) + def __init__(self, + in_planes, + out_planes, + kernel_size=(1, 3, 3), + stride=(1, 1, 1), + padding=(0, 1, 1)): + super(TempConv, self).__init__() + self.conv3d = nn.Conv3d(in_planes, + out_planes, + kernel_size=kernel_size, + stride=stride, + padding=padding) + self.bn = nn.BatchNorm(out_planes) + + def forward(self, x): + return F.elu(self.bn(self.conv3d(x))) + class Upsample(nn.Layer): - def __init__(self, in_planes, out_planes, scale_factor=(1,2,2)): - super(Upsample, self).__init__() - self.scale_factor = scale_factor - self.conv3d = nn.Conv3d( in_planes, out_planes, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) - self.bn = nn.BatchNorm( out_planes ) - - def forward(self, x): - out_size = x.shape[2:] - for i in range(3): - out_size[i] = self.scale_factor[i] * out_size[i] - - return F.elu( self.bn( self.conv3d( F.interpolate(x, size=out_size, mode='trilinear', align_corners=False, data_format='NCDHW', align_mode=0)))) + def __init__(self, in_planes, out_planes, scale_factor=(1, 2, 2)): + super(Upsample, self).__init__() + self.scale_factor = scale_factor + self.conv3d = nn.Conv3d(in_planes, + out_planes, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1)) + self.bn = nn.BatchNorm(out_planes) + + def forward(self, x): + out_size = x.shape[2:] + for i in range(3): + out_size[i] = self.scale_factor[i] * out_size[i] + + return F.elu( + self.bn( + self.conv3d( + F.interpolate(x, + size=out_size, + mode='trilinear', + align_corners=False, + data_format='NCDHW', + align_mode=0)))) + class UpsampleConcat(nn.Layer): - def __init__(self, in_planes_up, in_planes_flat, out_planes): - super(UpsampleConcat, self).__init__() - self.conv3d = TempConv( in_planes_up + in_planes_flat, out_planes, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) - def forward(self, x1, x2): - scale_factor=(1,2,2) + def __init__(self, in_planes_up, in_planes_flat, out_planes): + super(UpsampleConcat, self).__init__() + self.conv3d = TempConv(in_planes_up + in_planes_flat, + out_planes, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1)) + + def forward(self, x1, x2): + scale_factor = (1, 2, 2) out_size = x1.shape[2:] for i in range(3): out_size[i] = scale_factor[i] * out_size[i] - - x1 = F.interpolate(x1, size=out_size, mode='trilinear', align_corners=False, data_format='NCDHW', align_mode=0) + + x1 = F.interpolate(x1, + size=out_size, + mode='trilinear', + align_corners=False, + data_format='NCDHW', + align_mode=0) x = paddle.concat([x1, x2], axis=1) return self.conv3d(x) -class SourceReferenceAttention(paddle.fluid.dygraph.Layer): + +class SourceReferenceAttention(nn.Layer): """ Source-Reference Attention Layer """ @@ -51,137 +87,166 @@ class SourceReferenceAttention(paddle.fluid.dygraph.Layer): in_planes_r: int Number of input reference feature vector channels. """ - super(SourceReferenceAttention,self).__init__() + super(SourceReferenceAttention, self).__init__() self.query_conv = nn.Conv3d(in_channels=in_planes_s, - out_channels=in_planes_s//8, kernel_size=1 ) - self.key_conv = nn.Conv3d(in_channels=in_planes_r, - out_channels=in_planes_r//8, kernel_size=1 ) + out_channels=in_planes_s // 8, + kernel_size=1) + self.key_conv = nn.Conv3d(in_channels=in_planes_r, + out_channels=in_planes_r // 8, + kernel_size=1) self.value_conv = nn.Conv3d(in_channels=in_planes_r, - out_channels=in_planes_r, kernel_size=1 ) - self.gamma = self.create_parameter(shape=[1], dtype=self.query_conv.weight.dtype, - default_initializer=paddle.fluid.initializer.Constant(0.0)) + out_channels=in_planes_r, + kernel_size=1) + self.gamma = self.create_parameter( + shape=[1], + dtype=self.query_conv.weight.dtype, + default_initializer=nn.initializer.Constant(0.0)) def forward(self, source, reference): s_batchsize, sC, sT, sH, sW = source.shape r_batchsize, rC, rT, rH, rW = reference.shape - - proj_query = paddle.reshape(self.query_conv(source), [s_batchsize,-1,sT*sH*sW]) + + proj_query = paddle.reshape(self.query_conv(source), + [s_batchsize, -1, sT * sH * sW]) proj_query = paddle.transpose(proj_query, [0, 2, 1]) - proj_key = paddle.reshape(self.key_conv(reference), [r_batchsize,-1,rT*rW*rH]) - energy = paddle.bmm( proj_query, proj_key ) - attention = F.softmax(energy) - - proj_value = paddle.reshape(self.value_conv(reference), [r_batchsize,-1,rT*rH*rW]) - - out = paddle.bmm(proj_value,paddle.transpose(attention, [0,2,1])) - out = paddle.reshape(out, [s_batchsize, sC, sT, sH, sW]) - out = self.gamma*out + source + proj_key = paddle.reshape(self.key_conv(reference), + [r_batchsize, -1, rT * rW * rH]) + energy = paddle.bmm(proj_query, proj_key) + attention = F.softmax(energy) + + proj_value = paddle.reshape(self.value_conv(reference), + [r_batchsize, -1, rT * rH * rW]) + + out = paddle.bmm(proj_value, paddle.transpose(attention, [0, 2, 1])) + out = paddle.reshape(out, [s_batchsize, sC, sT, sH, sW]) + out = self.gamma * out + source return out, attention -class NetworkR( nn.Layer ): - def __init__(self): - super(NetworkR, self).__init__() - - self.layers = nn.Sequential( - nn.ReplicationPad3d((1,1,1,1,1,1)), - TempConv( 1, 64, kernel_size=(3,3,3), stride=(1,2,2), padding=(0,0,0) ), - TempConv( 64, 128, kernel_size=(3,3,3), padding=(1,1,1) ), - TempConv( 128, 128, kernel_size=(3,3,3), padding=(1,1,1) ), - TempConv( 128, 256, kernel_size=(3,3,3), stride=(1,2,2), padding=(1,1,1) ), - TempConv( 256, 256, kernel_size=(3,3,3), padding=(1,1,1) ), - TempConv( 256, 256, kernel_size=(3,3,3), padding=(1,1,1) ), - TempConv( 256, 256, kernel_size=(3,3,3), padding=(1,1,1) ), - TempConv( 256, 256, kernel_size=(3,3,3), padding=(1,1,1) ), - Upsample( 256, 128 ), - TempConv( 128, 64, kernel_size=(3,3,3), padding=(1,1,1) ), - TempConv( 64, 64, kernel_size=(3,3,3), padding=(1,1,1) ), - Upsample( 64, 16 ), - nn.Conv3d( 16, 1, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) - ) - def forward(self, x): - return paddle.clip((x + paddle.fluid.layers.tanh( self.layers( ((x * 1).detach())-0.4462414 ) )), 0.0, 1.0) - -class NetworkC( nn.Layer ): - def __init__(self): - super(NetworkC, self).__init__() - - self.down1 = nn.Sequential( - nn.ReplicationPad3d((1,1,1,1,0,0)), - TempConv( 1, 64, stride=(1,2,2), padding=(0,0,0) ), - TempConv( 64, 128 ), - TempConv( 128, 128 ), - TempConv( 128, 256, stride=(1,2,2) ), - TempConv( 256, 256 ), - TempConv( 256, 256 ), - TempConv( 256, 512, stride=(1,2,2) ), - TempConv( 512, 512 ), - TempConv( 512, 512 ) - ) - self.flat = nn.Sequential( - TempConv( 512, 512 ), - TempConv( 512, 512 ) - ) - self.down2 = nn.Sequential( - TempConv( 512, 512, stride=(1,2,2) ), - TempConv( 512, 512 ), - ) - self.stattn1 = SourceReferenceAttention( 512, 512 ) # Source-Reference Attention - self.stattn2 = SourceReferenceAttention( 512, 512 ) # Source-Reference Attention - self.selfattn1 = SourceReferenceAttention( 512, 512 ) # Self Attention - self.conv1 = TempConv( 512, 512 ) - self.up1 = UpsampleConcat( 512, 512, 512 ) # 1/8 - self.selfattn2 = SourceReferenceAttention( 512, 512 ) # Self Attention - self.conv2 = TempConv( 512, 256, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) - self.up2 = nn.Sequential( - Upsample( 256, 128 ), # 1/4 - TempConv( 128, 64, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) - ) - self.up3 = nn.Sequential( - Upsample( 64, 32 ), # 1/2 - TempConv( 32, 16, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) - ) - self.up4 = nn.Sequential( - Upsample( 16, 8 ), # 1/1 - nn.Conv3d( 8, 2, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) - ) - self.reffeatnet1 = nn.Sequential( - TempConv( 3, 64, stride=(1,2,2) ), - TempConv( 64, 128 ), - TempConv( 128, 128 ), - TempConv( 128, 256, stride=(1,2,2) ), - TempConv( 256, 256 ), - TempConv( 256, 256 ), - TempConv( 256, 512, stride=(1,2,2) ), - TempConv( 512, 512 ), - TempConv( 512, 512 ), - ) - self.reffeatnet2 = nn.Sequential( - TempConv( 512, 512, stride=(1,2,2) ), - TempConv( 512, 512 ), - TempConv( 512, 512 ), - ) - - def forward(self, x, x_refs=None): - x1 = self.down1( x - 0.4462414 ) - if x_refs is not None: - x_refs = paddle.transpose(x_refs, [0, 2, 1, 3, 4]) # [B,T,C,H,W] --> [B,C,T,H,W] - reffeat = self.reffeatnet1( x_refs-0.48 ) - x1, _ = self.stattn1( x1, reffeat ) - - x2 = self.flat( x1 ) - out = self.down2( x1 ) - if x_refs is not None: - reffeat2 = self.reffeatnet2( reffeat ) - out, _ = self.stattn2( out, reffeat2 ) - out = self.conv1( out ) - out, _ = self.selfattn1( out, out ) - out = self.up1( out, x2 ) - out, _ = self.selfattn2( out, out ) - out = self.conv2( out ) - out = self.up2( out ) - out = self.up3( out ) - out = self.up4( out ) - - return F.sigmoid( out ) - \ No newline at end of file +class NetworkR(nn.Layer): + def __init__(self): + super(NetworkR, self).__init__() + + self.layers = nn.Sequential( + nn.ReplicationPad3d((1, 1, 1, 1, 1, 1)), + TempConv(1, + 64, + kernel_size=(3, 3, 3), + stride=(1, 2, 2), + padding=(0, 0, 0)), + TempConv(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + TempConv(128, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + TempConv(128, + 256, + kernel_size=(3, 3, 3), + stride=(1, 2, 2), + padding=(1, 1, 1)), + TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + Upsample(256, 128), + TempConv(128, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + TempConv(64, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + Upsample(64, 16), + nn.Conv3d(16, + 1, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1))) + + def forward(self, x): + return paddle.clip( + (x + F.tanh(self.layers(((x * 1).detach()) - 0.4462414))), 0.0, 1.0) + + +class NetworkC(nn.Layer): + def __init__(self): + super(NetworkC, self).__init__() + + self.down1 = nn.Sequential( + nn.ReplicationPad3d((1, 1, 1, 1, 0, 0)), + TempConv(1, 64, stride=(1, 2, 2), padding=(0, 0, 0)), + TempConv(64, 128), TempConv(128, 128), + TempConv(128, 256, stride=(1, 2, 2)), TempConv(256, 256), + TempConv(256, 256), TempConv(256, 512, stride=(1, 2, 2)), + TempConv(512, 512), TempConv(512, 512)) + self.flat = nn.Sequential(TempConv(512, 512), TempConv(512, 512)) + self.down2 = nn.Sequential( + TempConv(512, 512, stride=(1, 2, 2)), + TempConv(512, 512), + ) + self.stattn1 = SourceReferenceAttention( + 512, 512) # Source-Reference Attention + self.stattn2 = SourceReferenceAttention( + 512, 512) # Source-Reference Attention + self.selfattn1 = SourceReferenceAttention(512, 512) # Self Attention + self.conv1 = TempConv(512, 512) + self.up1 = UpsampleConcat(512, 512, 512) # 1/8 + self.selfattn2 = SourceReferenceAttention(512, 512) # Self Attention + self.conv2 = TempConv(512, + 256, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1)) + self.up2 = nn.Sequential( + Upsample(256, 128), # 1/4 + TempConv(128, + 64, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1))) + self.up3 = nn.Sequential( + Upsample(64, 32), # 1/2 + TempConv(32, + 16, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1))) + self.up4 = nn.Sequential( + Upsample(16, 8), # 1/1 + nn.Conv3d(8, + 2, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1))) + self.reffeatnet1 = nn.Sequential( + TempConv(3, 64, stride=(1, 2, 2)), + TempConv(64, 128), + TempConv(128, 128), + TempConv(128, 256, stride=(1, 2, 2)), + TempConv(256, 256), + TempConv(256, 256), + TempConv(256, 512, stride=(1, 2, 2)), + TempConv(512, 512), + TempConv(512, 512), + ) + self.reffeatnet2 = nn.Sequential( + TempConv(512, 512, stride=(1, 2, 2)), + TempConv(512, 512), + TempConv(512, 512), + ) + + def forward(self, x, x_refs=None): + x1 = self.down1(x - 0.4462414) + if x_refs is not None: + x_refs = paddle.transpose( + x_refs, [0, 2, 1, 3, 4]) # [B,T,C,H,W] --> [B,C,T,H,W] + reffeat = self.reffeatnet1(x_refs - 0.48) + x1, _ = self.stattn1(x1, reffeat) + + x2 = self.flat(x1) + out = self.down2(x1) + if x_refs is not None: + reffeat2 = self.reffeatnet2(reffeat) + out, _ = self.stattn2(out, reffeat2) + out = self.conv1(out) + out, _ = self.selfattn1(out, out) + out = self.up1(out, x2) + out, _ = self.selfattn2(out, out) + out = self.conv2(out) + out = self.up2(out) + out = self.up3(out) + out = self.up4(out) + + return F.sigmoid(out) diff --git a/ppgan/models/generators/resnet.py b/ppgan/models/generators/resnet.py index 824fc386d12d405daf115a4d5609912d4c729c82..c08bb33d91f900bd94329031b2cb504022709f4f 100644 --- a/ppgan/models/generators/resnet.py +++ b/ppgan/models/generators/resnet.py @@ -2,20 +2,25 @@ import paddle import paddle.nn as nn import functools -from ...modules.nn import ReflectionPad2d, LeakyReLU, Tanh, Dropout, BCEWithLogitsLoss, Pad2D, MSELoss from ...modules.norm import build_norm_layer from .builder import GENERATORS @GENERATORS.register() -class ResnetGenerator(paddle.fluid.dygraph.Layer): +class ResnetGenerator(nn.Layer): """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations. code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style) """ - - def __init__(self, input_nc, output_nc, ngf=64, norm_type='instance', use_dropout=False, n_blocks=6, padding_type='reflect'): + def __init__(self, + input_nc, + output_nc, + ngf=64, + norm_type='instance', + use_dropout=False, + n_blocks=6, + padding_type='reflect'): """Construct a Resnet-based generator Args: @@ -27,7 +32,7 @@ class ResnetGenerator(paddle.fluid.dygraph.Layer): n_blocks (int) -- the number of ResNet blocks padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero """ - assert(n_blocks >= 0) + assert (n_blocks >= 0) super(ResnetGenerator, self).__init__() norm_layer = build_norm_layer(norm_type) @@ -36,35 +41,56 @@ class ResnetGenerator(paddle.fluid.dygraph.Layer): else: use_bias = norm_layer == nn.InstanceNorm - model = [ReflectionPad2d(3), - nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias_attr=use_bias), - norm_layer(ngf), - nn.ReLU()] + model = [ + nn.ReflectionPad2d([3, 3, 3, 3]), + nn.Conv2d(input_nc, + ngf, + kernel_size=7, + padding=0, + bias_attr=use_bias), + norm_layer(ngf), + nn.ReLU() + ] n_downsampling = 2 for i in range(n_downsampling): # add downsampling layers - mult = 2 ** i + mult = 2**i model += [ - nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias_attr=use_bias), - norm_layer(ngf * mult * 2), - nn.ReLU()] - - mult = 2 ** n_downsampling - for i in range(n_blocks): # add ResNet blocks + nn.Conv2d(ngf * mult, + ngf * mult * 2, + kernel_size=3, + stride=2, + padding=1, + bias_attr=use_bias), + norm_layer(ngf * mult * 2), + nn.ReLU() + ] + + mult = 2**n_downsampling + for i in range(n_blocks): # add ResNet blocks - model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] + model += [ + ResnetBlock(ngf * mult, + padding_type=padding_type, + norm_layer=norm_layer, + use_dropout=use_dropout, + use_bias=use_bias) + ] for i in range(n_downsampling): # add upsampling layers - mult = 2 ** (n_downsampling - i) + mult = 2**(n_downsampling - i) model += [ - nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), - kernel_size=3, stride=2, - padding=1, - output_padding=1, - bias_attr=use_bias), - norm_layer(int(ngf * mult / 2)), - nn.ReLU()] - model += [ReflectionPad2d(3)] + nn.ConvTranspose2d(ngf * mult, + int(ngf * mult / 2), + kernel_size=3, + stride=2, + padding=1, + output_padding=1, + bias_attr=use_bias), + norm_layer(int(ngf * mult / 2)), + nn.ReLU() + ] + model += [nn.ReflectionPad2d([3, 3, 3, 3])] model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] model += [nn.Tanh()] @@ -75,9 +101,8 @@ class ResnetGenerator(paddle.fluid.dygraph.Layer): return self.model(x) -class ResnetBlock(paddle.fluid.dygraph.Layer): +class ResnetBlock(nn.Layer): """Define a Resnet block""" - def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias): """Initialize the Resnet block @@ -87,9 +112,11 @@ class ResnetBlock(paddle.fluid.dygraph.Layer): Original Resnet paper: https://arxiv.org/pdf/1512.03385.pdf """ super(ResnetBlock, self).__init__() - self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias) + self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, + use_dropout, use_bias) - def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias): + def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, + use_bias): """Construct a convolutional block. Parameters: @@ -104,28 +131,37 @@ class ResnetBlock(paddle.fluid.dygraph.Layer): conv_block = [] p = 0 if padding_type == 'reflect': - conv_block += [ReflectionPad2d(1)] + conv_block += [nn.ReflectionPad2d([1, 1, 1, 1])] elif padding_type == 'replicate': - conv_block += [ReplicationPad2d(1)] + conv_block += [nn.ReplicationPad2d([1, 1, 1, 1])] elif padding_type == 'zero': p = 1 else: - raise NotImplementedError('padding [%s] is not implemented' % padding_type) - - conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias), norm_layer(dim), nn.ReLU()] + raise NotImplementedError('padding [%s] is not implemented' % + padding_type) + + conv_block += [ + nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias), + norm_layer(dim), + nn.ReLU() + ] if use_dropout: - conv_block += [Dropout(0.5)] - + conv_block += [nn.Dropout(0.5)] + p = 0 if padding_type == 'reflect': - conv_block += [ReflectionPad2d(1)] + conv_block += [nn.ReflectionPad2d([1, 1, 1, 1])] elif padding_type == 'replicate': - conv_block += [ReplicationPad2d(1)] + conv_block += [nn.ReplicationPad2d([1, 1, 1, 1])] elif padding_type == 'zero': p = 1 else: - raise NotImplementedError('padding [%s] is not implemented' % padding_type) - conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias), norm_layer(dim)] + raise NotImplementedError('padding [%s] is not implemented' % + padding_type) + conv_block += [ + nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias), + norm_layer(dim) + ] return nn.Sequential(*conv_block) diff --git a/ppgan/models/generators/unet.py b/ppgan/models/generators/unet.py index f3570619678ac55a671e95ea9cf700f23d1302db..7161353de337b136d58d90f821e344ae07565bfc 100644 --- a/ppgan/models/generators/unet.py +++ b/ppgan/models/generators/unet.py @@ -1,17 +1,21 @@ +import functools import paddle import paddle.nn as nn -import functools -from ...modules.nn import ReflectionPad2d, LeakyReLU, Tanh, Dropout from ...modules.norm import build_norm_layer from .builder import GENERATORS @GENERATORS.register() -class UnetGenerator(paddle.fluid.dygraph.Layer): +class UnetGenerator(nn.Layer): """Create a Unet-based generator""" - - def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_type='batch', use_dropout=False): + def __init__(self, + input_nc, + output_nc, + num_downs, + ngf=64, + norm_type='batch', + use_dropout=False): """Construct a Unet generator Args: input_nc (int) -- the number of channels in input images @@ -27,36 +31,64 @@ class UnetGenerator(paddle.fluid.dygraph.Layer): super(UnetGenerator, self).__init__() norm_layer = build_norm_layer(norm_type) # construct unet structure - unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) # add the innermost layer - for i in range(num_downs - 5): # add intermediate layers with ngf * 8 filters - unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout) + unet_block = UnetSkipConnectionBlock( + ngf * 8, + ngf * 8, + input_nc=None, + submodule=None, + norm_layer=norm_layer, + innermost=True) # add the innermost layer + for i in range(num_downs - + 5): # add intermediate layers with ngf * 8 filters + unet_block = UnetSkipConnectionBlock(ngf * 8, + ngf * 8, + input_nc=None, + submodule=unet_block, + norm_layer=norm_layer, + use_dropout=use_dropout) # gradually reduce the number of filters from ngf * 8 to ngf - unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer) - unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer) - unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer) - self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) # add the outermost layer + unet_block = UnetSkipConnectionBlock(ngf * 4, + ngf * 8, + input_nc=None, + submodule=unet_block, + norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock(ngf * 2, + ngf * 4, + input_nc=None, + submodule=unet_block, + norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock(ngf, + ngf * 2, + input_nc=None, + submodule=unet_block, + norm_layer=norm_layer) + self.model = UnetSkipConnectionBlock( + output_nc, + ngf, + input_nc=input_nc, + submodule=unet_block, + outermost=True, + norm_layer=norm_layer) # add the outermost layer def forward(self, input): """Standard forward""" - # tmp = self.model._sub_layers['model'][0](input) - # tmp1 = self.model._sub_layers['model'][1](tmp) - # tmp2 = self.model._sub_layers['model'][2](tmp1) - # import pickle - # pickle.dump(tmp2.numpy(), open('/workspace/notebook/align_pix2pix/tmp2-pd.pkl', 'wb')) - # tmp3 = self.model._sub_layers['model'][3](tmp2) - # pickle.dump(tmp3.numpy(), open('/workspace/notebook/align_pix2pix/tmp3-pd.pkl', 'wb')) - # tmp4 = self.model._sub_layers['model'][4](tmp3) return self.model(input) -class UnetSkipConnectionBlock(paddle.fluid.dygraph.Layer): +class UnetSkipConnectionBlock(nn.Layer): """Defines the Unet submodule with skip connection. X -------------------identity---------------------- |-- downsampling -- |submodule| -- upsampling --| """ - - def __init__(self, outer_nc, inner_nc, input_nc=None, - submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm, use_dropout=False): + def __init__(self, + outer_nc, + inner_nc, + input_nc=None, + submodule=None, + outermost=False, + innermost=False, + norm_layer=nn.BatchNorm, + use_dropout=False): """Construct a Unet submodule with skip connections. Parameters: @@ -77,36 +109,48 @@ class UnetSkipConnectionBlock(paddle.fluid.dygraph.Layer): use_bias = norm_layer == nn.InstanceNorm if input_nc is None: input_nc = outer_nc - downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, - stride=2, padding=1, bias_attr=use_bias) - downrelu = LeakyReLU(0.2, True) + downconv = nn.Conv2d(input_nc, + inner_nc, + kernel_size=4, + stride=2, + padding=1, + bias_attr=use_bias) + downrelu = nn.LeakyReLU(0.2) downnorm = norm_layer(inner_nc) - uprelu = nn.ReLU(True) + uprelu = nn.ReLU() upnorm = norm_layer(outer_nc) if outermost: - upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, - kernel_size=4, stride=2, + upconv = nn.ConvTranspose2d(inner_nc * 2, + outer_nc, + kernel_size=4, + stride=2, padding=1) down = [downconv] - up = [uprelu, upconv, Tanh()] + up = [uprelu, upconv, nn.Tanh()] model = down + [submodule] + up elif innermost: - upconv = nn.ConvTranspose2d(inner_nc, outer_nc, - kernel_size=4, stride=2, - padding=1, bias_attr=use_bias) + upconv = nn.ConvTranspose2d(inner_nc, + outer_nc, + kernel_size=4, + stride=2, + padding=1, + bias_attr=use_bias) down = [downrelu, downconv] up = [uprelu, upconv, upnorm] model = down + up else: - upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, - kernel_size=4, stride=2, - padding=1, bias_attr=use_bias) + upconv = nn.ConvTranspose2d(inner_nc * 2, + outer_nc, + kernel_size=4, + stride=2, + padding=1, + bias_attr=use_bias) down = [downrelu, downconv, downnorm] up = [uprelu, upconv, upnorm] if use_dropout: - model = down + [submodule] + up + [Dropout(0.5)] + model = down + [submodule] + up + [nn.Dropout(0.5)] else: model = down + [submodule] + up @@ -115,5 +159,5 @@ class UnetSkipConnectionBlock(paddle.fluid.dygraph.Layer): def forward(self, x): if self.outermost: return self.model(x) - else: # add skip connections + else: # add skip connections return paddle.concat([x, self.model(x)], 1) diff --git a/ppgan/models/losses.py b/ppgan/models/losses.py index 75d7e00f63b73b68c71ac67b8979809dfd3f1983..28c77ca4c49ab5c0aae718830868ecc396236cb8 100644 --- a/ppgan/models/losses.py +++ b/ppgan/models/losses.py @@ -1,17 +1,15 @@ -import paddle -import paddle.nn as nn import numpy as np -from ..modules.nn import BCEWithLogitsLoss +import paddle +import paddle.nn as nn -class GANLoss(paddle.fluid.dygraph.Layer): +class GANLoss(nn.Layer): """Define different GAN objectives. The GANLoss class abstracts away the need to create the target label tensor that has the same size as the input. """ - def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0): """ Initialize the GANLoss class. @@ -31,7 +29,7 @@ class GANLoss(paddle.fluid.dygraph.Layer): if gan_mode == 'lsgan': self.loss = nn.MSELoss() elif gan_mode == 'vanilla': - self.loss = BCEWithLogitsLoss() + self.loss = nn.BCEWithLogitsLoss() elif gan_mode in ['wgangp']: self.loss = None else: @@ -50,11 +48,17 @@ class GANLoss(paddle.fluid.dygraph.Layer): if target_is_real: if not hasattr(self, 'target_real_tensor'): - self.target_real_tensor = paddle.fill_constant(shape=paddle.shape(prediction), value=self.target_real_label, dtype='float32') + self.target_real_tensor = paddle.fill_constant( + shape=paddle.shape(prediction), + value=self.target_real_label, + dtype='float32') target_tensor = self.target_real_tensor else: if not hasattr(self, 'target_fake_tensor'): - self.target_fake_tensor = paddle.fill_constant(shape=paddle.shape(prediction), value=self.target_fake_label, dtype='float32') + self.target_fake_tensor = paddle.fill_constant( + shape=paddle.shape(prediction), + value=self.target_fake_label, + dtype='float32') target_tensor = self.target_fake_tensor # target_tensor.stop_gradient = True @@ -78,4 +82,4 @@ class GANLoss(paddle.fluid.dygraph.Layer): loss = -prediction.mean() else: loss = prediction.mean() - return loss \ No newline at end of file + return loss diff --git a/ppgan/models/pix2pix_model.py b/ppgan/models/pix2pix_model.py index e9ff9a28d45fc8eb8916eaf254a8a35012440ab4..c68926fdd20ade2f1d4cc02297371ac42fd3eacd 100644 --- a/ppgan/models/pix2pix_model.py +++ b/ppgan/models/pix2pix_model.py @@ -8,6 +8,7 @@ from .discriminators.builder import build_discriminator from .losses import GANLoss from ..solver import build_optimizer +from ..modules.init import init_weights from ..utils.image_pool import ImagePool @@ -42,12 +43,15 @@ class Pix2PixModel(BaseModel): # define networks (both generator and discriminator) self.netG = build_generator(opt.model.generator) + init_weights(self.netG) # define a discriminator; conditional GANs need to take both input and output images; Therefore, #channels for D is input_nc + output_nc if self.isTrain: self.netD = build_discriminator(opt.model.discriminator) + init_weights(self.netD) if self.isTrain: + self.losses = {} # define loss functions self.criterionGAN = GANLoss(opt.model.gan_mode) self.criterionL1 = paddle.nn.L1Loss() @@ -79,6 +83,7 @@ class Pix2PixModel(BaseModel): AtoB = self.opt.dataset.train.direction == 'AtoB' self.real_A = paddle.to_variable(input['A' if AtoB else 'B']) self.real_B = paddle.to_variable(input['B' if AtoB else 'A']) + self.image_paths = input['A_paths' if AtoB else 'B_paths'] def forward(self): @@ -118,6 +123,7 @@ class Pix2PixModel(BaseModel): # Second, G(A) = B self.loss_G_L1 = self.criterionL1(self.fake_B, self.real_B) * self.opt.lambda_L1 + # combine loss and calculate gradients self.loss_G = self.loss_G_GAN + self.loss_G_L1 diff --git a/ppgan/modules/init.py b/ppgan/modules/init.py new file mode 100644 index 0000000000000000000000000000000000000000..37e4257a1706d9cc2cdd7fcb50719ef8bdb577bc --- /dev/null +++ b/ppgan/modules/init.py @@ -0,0 +1,300 @@ +import math +import numpy as np + +import paddle + + +def _calculate_fan_in_and_fan_out(tensor): + dimensions = len(tensor.shape) + if dimensions < 2: + raise ValueError( + "Fan in and fan out can not be computed for tensor with fewer than 2 dimensions" + ) + + num_input_fmaps = tensor.shape[1] + num_output_fmaps = tensor.shape[0] + receptive_field_size = 1 + if len(tensor.shape) > 2: + receptive_field_size = paddle.numel(tensor[0][0]) + fan_in = num_input_fmaps * receptive_field_size + fan_out = num_output_fmaps * receptive_field_size + + return fan_in, fan_out + + +def _calculate_correct_fan(tensor, mode): + mode = mode.lower() + valid_modes = ['fan_in', 'fan_out'] + if mode not in valid_modes: + raise ValueError("Mode {} not supported, please use one of {}".format( + mode, valid_modes)) + + fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) + return fan_in if mode == 'fan_in' else fan_out + + +def calculate_gain(nonlinearity, param=None): + """Return the recommended gain value for the given nonlinearity function. + The values are as follows: + + ================= ==================================================== + nonlinearity gain + ================= ==================================================== + Linear / Identity :math:`1` + Conv{1,2,3}D :math:`1` + Sigmoid :math:`1` + Tanh :math:`\frac{5}{3}` + ReLU :math:`\sqrt{2}` + Leaky Relu :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}` + ================= ==================================================== + + Args: + nonlinearity: the non-linear function (`nn.functional` name) + param: optional parameter for the non-linear function + + """ + linear_fns = [ + 'linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', + 'conv_transpose2d', 'conv_transpose3d' + ] + if nonlinearity in linear_fns or nonlinearity == 'sigmoid': + return 1 + elif nonlinearity == 'tanh': + return 5.0 / 3 + elif nonlinearity == 'relu': + return math.sqrt(2.0) + elif nonlinearity == 'leaky_relu': + if param is None: + negative_slope = 0.01 + elif not isinstance(param, bool) and isinstance( + param, int) or isinstance(param, float): + # True/False are instances of int, hence check above + negative_slope = param + else: + raise ValueError( + "negative_slope {} not a valid number".format(param)) + return math.sqrt(2.0 / (1 + negative_slope**2)) + else: + raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) + + +@paddle.no_grad() +def constant_(x, value): + temp_value = paddle.fill_constant(x.shape, x.dtype, value) + x.set_value(temp_value) + return x + + +@paddle.no_grad() +def normal_(x, mean=0., std=1.): + temp_value = paddle.normal(mean, std, shape=x.shape) + x.set_value(temp_value) + return x + + +@paddle.no_grad() +def uniform_(x, a=-1., b=1.): + temp_value = paddle.uniform(min=a, max=b, shape=x.shape) + x.set_value(temp_value) + return x + + +@paddle.no_grad() +def xavier_uniform_(x, gain=1.): + """Fills the input `Tensor` with values according to the method + described in `Understanding the difficulty of training deep feedforward + neural networks` - Glorot, X. & Bengio, Y. (2010), using a uniform + distribution. The resulting tensor will have values sampled from + :math:`\mathcal{U}(-a, a)` where + + .. math:: + a = \text{gain} \times \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}} + + Also known as Glorot initialization. + + Args: + x: an n-dimensional `paddle.Tensor` + gain: an optional scaling factor + + """ + fan_in, fan_out = _calculate_fan_in_and_fan_out(x) + std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) + a = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation + + return uniform_(x, -a, a) + + +@paddle.no_grad() +def xavier_normal_(x, gain=1.): + """Fills the input `Tensor` with values according to the method + described in `Understanding the difficulty of training deep feedforward + neural networks` - Glorot, X. & Bengio, Y. (2010), using a normal + distribution. The resulting tensor will have values sampled from + :math:`\mathcal{N}(0, \text{std}^2)` where + + .. math:: + \text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}} + + Also known as Glorot initialization. + + Args: + tensor: an n-dimensional `paddle.Tensor` + gain: an optional scaling factor + + """ + fan_in, fan_out = _calculate_fan_in_and_fan_out(x) + std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) + + return normal_(x, 0., std) + + +@paddle.no_grad() +def kaiming_uniform_(x, a=0, mode='fan_in', nonlinearity='leaky_relu'): + """Fills the input `Tensor` with values according to the method + described in `Delving deep into rectifiers: Surpassing human-level + performance on ImageNet classification` - He, K. et al. (2015), using a + uniform distribution. The resulting tensor will have values sampled from + :math:`\mathcal{U}(-\text{bound}, \text{bound})` where + + .. math:: + \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}} + + Also known as He initialization. + + Args: + x: an n-dimensional `paddle.Tensor` + a: the negative slope of the rectifier used after this layer (only + used with ``'leaky_relu'``) + mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'`` + preserves the magnitude of the variance of the weights in the + forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the + backwards pass. + nonlinearity: the non-linear function (`nn.functional` name), + recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default). + + """ + fan = _calculate_correct_fan(x, mode) + gain = calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + bound = math.sqrt( + 3.0) * std # Calculate uniform bounds from standard deviation + + temp_value = paddle.uniform(x.shape, min=-bound, max=bound) + x.set_value(temp_value) + + return x + + +@paddle.no_grad() +def kaiming_normal_(x, a=0, mode='fan_in', nonlinearity='leaky_relu'): + """Fills the input `Tensor` with values according to the method + described in `Delving deep into rectifiers: Surpassing human-level + performance on ImageNet classification` - He, K. et al. (2015), using a + normal distribution. The resulting tensor will have values sampled from + :math:`\mathcal{N}(0, \text{std}^2)` where + + .. math:: + \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}} + + Also known as He initialization. + + Args: + x: an n-dimensional `paddle.Tensor` + a: the negative slope of the rectifier used after this layer (only + used with ``'leaky_relu'``) + mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'`` + preserves the magnitude of the variance of the weights in the + forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the + backwards pass. + nonlinearity: the non-linear function (`nn.functional` name), + recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default). + + """ + fan = _calculate_correct_fan(x, mode) + gain = calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + + temp_value = paddle.normal(0, std, shape=x.shape) + x.set_value(temp_value) + return x + + +def constant_init(layer, val, bias=0): + if hasattr(layer, 'weight') and layer.weight is not None: + constant_(layer.weight, val) + if hasattr(layer, 'bias') and layer.bias is not None: + constant_(layer.bias, bias) + + +def xavier_init(layer, gain=1, bias=0, distribution='normal'): + assert distribution in ['uniform', 'normal'] + if distribution == 'uniform': + xavier_uniform_(layer.weight, gain=gain) + else: + xavier_normal_(layer.weight, gain=gain) + if hasattr(layer, 'bias') and layer.bias is not None: + constant_(layer.bias, bias) + + +def normal_init(layer, mean=0, std=1, bias=0): + normal_(layer.weight, mean, std) + if hasattr(layer, 'bias') and layer.bias is not None: + constant_(layer.bias, bias) + + +def uniform_init(layer, a=0, b=1, bias=0): + uniform_(layer.weight, a, b) + if hasattr(layer, 'bias') and layer.bias is not None: + constant_(layer.bias, bias) + + +def kaiming_init(layer, + a=0, + mode='fan_out', + nonlinearity='relu', + bias=0, + distribution='normal'): + assert distribution in ['uniform', 'normal'] + if distribution == 'uniform': + kaiming_uniform_(layer.weight, + a=a, + mode=mode, + nonlinearity=nonlinearity) + else: + kaiming_normal_(layer.weight, a=a, mode=mode, nonlinearity=nonlinearity) + if hasattr(layer, 'bias') and layer.bias is not None: + constant_(layer.bias, bias) + + +def init_weights(net, init_type='normal', init_gain=0.02): + """Initialize network weights. + Args: + net (nn.Layer): network to be initialized + init_type (str): the name of an initialization method: normal | xavier | kaiming | orthogonal + init_gain (float): scaling factor for normal, xavier and orthogonal. + We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might + work better for some applications. Feel free to try yourself. + """ + def init_func(m): # define the initialization function + classname = m.__class__.__name__ + if hasattr(m, 'weight') and (classname.find('Conv') != -1 + or classname.find('Linear') != -1): + if init_type == 'normal': + normal_(m.weight, 0.0, init_gain) + elif init_type == 'xavier': + xavier_normal_(m.weight, gain=init_gain) + elif init_type == 'kaiming': + kaiming_normal_(m.weight, a=0, mode='fan_in') + else: + raise NotImplementedError( + 'initialization method [%s] is not implemented' % init_type) + if hasattr(m, 'bias') and m.bias is not None: + constant_(m.bias, 0.0) + elif classname.find( + 'BatchNorm' + ) != -1: # BatchNorm Layer's weight is not a matrix; only normal distribution applies. + normal_(m.weight, 1.0, init_gain) + constant_(m.bias, 0.0) + + print('initialize network with %s' % init_type) + net.apply(init_func) # apply the initialization function diff --git a/ppgan/modules/nn.py b/ppgan/modules/nn.py index f867b7284efc5db50e9b7f22dfd6d6c31073e682..81178b12b0e774aba6c62c96bd6260311e1c42d5 100644 --- a/ppgan/modules/nn.py +++ b/ppgan/modules/nn.py @@ -1,179 +1,52 @@ import paddle +import paddle.nn as nn -from paddle.fluid.dygraph import Layer -from paddle import fluid - -class MSELoss(): - def __init__(self): - pass - - def __call__(self, prediction, label): - return fluid.layers.mse_loss(prediction, label) - -class L1Loss(): - def __init__(self): - pass - - def __call__(self, prediction, label): - return fluid.layers.reduce_mean(fluid.layers.elementwise_sub(prediction, label, act='abs')) - -class ReflectionPad2d(Layer): - def __init__(self, size): - super(ReflectionPad2d, self).__init__() - self.size = size - - def forward(self, x): - return fluid.layers.pad2d(x, [self.size] * 4, mode="reflect") - - -class LeakyReLU(Layer): - def __init__(self, alpha, inplace=False): - super(LeakyReLU, self).__init__() - self.alpha = alpha - - def forward(self, x): - return fluid.layers.leaky_relu(x, self.alpha) - - -class Tanh(Layer): - def __init__(self): - super(Tanh, self).__init__() - - def forward(self, x): - return fluid.layers.tanh(x) - - -class Dropout(Layer): - def __init__(self, prob, mode='upscale_in_train'): - super(Dropout, self).__init__() - self.prob = prob - self.mode = mode - - def forward(self, x): - return fluid.layers.dropout(x, self.prob, dropout_implementation=self.mode) - - -class BCEWithLogitsLoss(): - def __init__(self, weight=None, reduction='mean'): - self.weight = weight - self.reduction = 'mean' - - def __call__(self, x, label): - out = paddle.fluid.layers.sigmoid_cross_entropy_with_logits(x, label) - if self.reduction == 'sum': - return fluid.layers.reduce_sum(out) - elif self.reduction == 'mean': - return fluid.layers.reduce_mean(out) - else: - return out - - -class _SpectralNorm(paddle.nn.SpectralNorm): +class _SpectralNorm(nn.SpectralNorm): def __init__(self, weight_shape, dim=0, power_iters=1, eps=1e-12, dtype='float32'): - super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps, dtype) + super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps, + dtype) def forward(self, weight): - paddle.fluid.data_feeder.check_variable_and_dtype(weight, "weight", ['float32', 'float64'], - 'SpectralNorm') inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v} out = self._helper.create_variable_for_type_inference(self._dtype) _power_iters = self._power_iters if self.training else 0 - self._helper.append_op( - type="spectral_norm", - inputs=inputs, - outputs={"Out": out, }, - attrs={ - "dim": self._dim, - "power_iters": _power_iters, - "eps": self._eps, - }) + self._helper.append_op(type="spectral_norm", + inputs=inputs, + outputs={ + "Out": out, + }, + attrs={ + "dim": self._dim, + "power_iters": _power_iters, + "eps": self._eps, + }) return out class Spectralnorm(paddle.nn.Layer): - - def __init__(self, - layer, - dim=0, - power_iters=1, - eps=1e-12, - dtype='float32'): + def __init__(self, layer, dim=0, power_iters=1, eps=1e-12, dtype='float32'): super(Spectralnorm, self).__init__() - self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters, eps, dtype) + self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters, + eps, dtype) self.dim = dim self.power_iters = power_iters self.eps = eps self.layer = layer weight = layer._parameters['weight'] del layer._parameters['weight'] - self.weight_orig = self.create_parameter(weight.shape, dtype=weight.dtype) + self.weight_orig = self.create_parameter(weight.shape, + dtype=weight.dtype) self.weight_orig.set_value(weight) - def forward(self, x): weight = self.spectral_norm(self.weight_orig) self.layer.weight = weight out = self.layer(x) return out - - -def initial_type( - input, - op_type, - fan_out, - init="normal", - use_bias=False, - filter_size=0, - stddev=0.02, - name=None): - if init == "kaiming": - if op_type == 'conv': - fan_in = input.shape[1] * filter_size * filter_size - elif op_type == 'deconv': - fan_in = fan_out * filter_size * filter_size - else: - if len(input.shape) > 2: - fan_in = input.shape[1] * input.shape[2] * input.shape[3] - else: - fan_in = input.shape[1] - bound = 1 / math.sqrt(fan_in) - param_attr = fluid.ParamAttr( - # name=name + "_w", - initializer=fluid.initializer.Uniform( - low=-bound, high=bound)) - if use_bias == True: - bias_attr = fluid.ParamAttr( - # name=name + '_b', - initializer=fluid.initializer.Uniform( - low=-bound, high=bound)) - else: - bias_attr = False - else: - param_attr = fluid.ParamAttr( - # name=name + "_w", - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=stddev)) - if use_bias == True: - bias_attr = fluid.ParamAttr( - # name=name + "_b", - initializer=fluid.initializer.Constant(0.0)) - else: - bias_attr = False - return param_attr, bias_attr - - -class Pad2D(fluid.dygraph.Layer): - def __init__(self, paddings, mode, pad_value=0.0): - super(Pad2D, self).__init__() - self.paddings = paddings - self.mode = mode - - def forward(self, x): - return fluid.layers.pad2d(x, self.paddings, self.mode) \ No newline at end of file diff --git a/ppgan/modules/norm.py b/ppgan/modules/norm.py index 8413c0b75d625327943628991e372a1489e9b8ea..66833fcf01e93266fd920d7e9072856f95de6c78 100644 --- a/ppgan/modules/norm.py +++ b/ppgan/modules/norm.py @@ -3,7 +3,7 @@ import functools import paddle.nn as nn -class Identity(paddle.fluid.dygraph.Layer): +class Identity(nn.Layer): def forward(self, x): return x @@ -18,11 +18,28 @@ def build_norm_layer(norm_type='instance'): For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics. """ if norm_type == 'batch': - norm_layer = functools.partial(nn.BatchNorm, param_attr=paddle.ParamAttr(initializer=paddle.fluid.initializer.NormalInitializer(1.0, 0.02)), bias_attr=paddle.ParamAttr(initializer=paddle.fluid.initializer.Constant(0.0)), trainable_statistics=True) + norm_layer = functools.partial( + nn.BatchNorm, + param_attr=paddle.ParamAttr( + initializer=nn.initializer.Normal(1.0, 0.02)), + bias_attr=paddle.ParamAttr( + initializer=nn.initializer.Constant(0.0)), + trainable_statistics=True) elif norm_type == 'instance': - norm_layer = functools.partial(nn.InstanceNorm, param_attr=paddle.ParamAttr(initializer=paddle.fluid.initializer.Constant(1.0), learning_rate=0.0, trainable=False), bias_attr=paddle.ParamAttr(initializer=paddle.fluid.initializer.Constant(0.0), learning_rate=0.0, trainable=False)) + norm_layer = functools.partial( + nn.InstanceNorm, + param_attr=paddle.ParamAttr( + initializer=nn.initializer.Constant(1.0), + learning_rate=0.0, + trainable=False), + bias_attr=paddle.ParamAttr(initializer=nn.initializer.Constant(0.0), + learning_rate=0.0, + trainable=False)) elif norm_type == 'none': - def norm_layer(x): return Identity() + + def norm_layer(x): + return Identity() else: - raise NotImplementedError('normalization layer [%s] is not found' % norm_type) - return norm_layer \ No newline at end of file + raise NotImplementedError('normalization layer [%s] is not found' % + norm_type) + return norm_layer diff --git a/ppgan/utils/setup.py b/ppgan/utils/setup.py index f663ba960e21c1c1647d7acfdcad7af43d638367..d9a610ec4a2b259b85657b77c61b3d8eb8ddde19 100644 --- a/ppgan/utils/setup.py +++ b/ppgan/utils/setup.py @@ -19,6 +19,6 @@ def setup(args, cfg): logger.info('Configs: {}'.format(cfg)) - place = paddle.fluid.CUDAPlace(ParallelEnv().dev_id) \ - if ParallelEnv().nranks > 1 else paddle.fluid.CUDAPlace(0) + place = paddle.CUDAPlace(ParallelEnv().dev_id) \ + if ParallelEnv().nranks > 1 else paddle.CUDAPlace(0) paddle.disable_static(place) diff --git a/ppgan/utils/timer.py b/ppgan/utils/timer.py new file mode 100644 index 0000000000000000000000000000000000000000..6b277e5e291c70327fb08e899f4606ae08f68f5c --- /dev/null +++ b/ppgan/utils/timer.py @@ -0,0 +1,33 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time + + +class TimeAverager(object): + def __init__(self): + self.reset() + + def reset(self): + self._cnt = 0 + self._total_time = 0 + + def record(self, usetime): + self._cnt += 1 + self._total_time += usetime + + def get_average(self): + if self._cnt == 0: + return 0 + return self._total_time / self._cnt