提交 c1bd1a2a 编写于 作者: L LielinJiang

fix conflict

......@@ -90,4 +90,7 @@ parser.add_argument('--use_cuda',
type=bool,
help='use cuda or not')
parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not')
parser.add_argument('--remove_duplicates',
default=True,
type=bool,
help='remove duplicate frames or not')
......@@ -80,7 +80,8 @@ class VideoFrameInterp(object):
video_path,
use_gpu=True,
key_frame_thread=0.,
output_path='output'):
output_path='output',
remove_duplicates=True):
self.video_path = video_path
self.output_path = os.path.join(output_path, 'DAIN')
if model_path is None:
......@@ -138,6 +139,8 @@ class VideoFrameInterp(object):
end = time.time()
frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
if remove_duplicates:
frames = remove_duplicates(out_path)
img = imread(frames[0])
......@@ -199,58 +202,51 @@ class VideoFrameInterp(object):
X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255
if key_frame:
y_ = [
np.transpose(255.0 * X0.clip(0, 1.0), (1, 2, 0))
for i in range(num_frames)
]
else:
assert (X0.shape[1] == X1.shape[1])
assert (X0.shape[2] == X1.shape[2])
X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X = np.concatenate((X0, X1), axis=0)
proc_end = time.time()
o = self.exe.run(self.program,
fetch_list=self.fetch_targets,
feed={"image": X})
y_ = o[0]
proc_timer.update(time.time() - proc_end)
tot_timer.update(time.time() - end)
end = time.time()
y_ = [
np.transpose(
255.0 * item.clip(
0, 1.0)[0, :,
padding_top:padding_top + int_height,
padding_left:padding_left + int_width],
(1, 2, 0)) for item in y_
]
time_offsets = [
kk * timestep for kk in range(1, 1 + num_frames, 1)
]
count = 1
for item, time_offset in zip(y_, time_offsets):
out_dir = os.path.join(
frame_path_interpolated, vidname,
"{:0>6d}_{:0>4d}.png".format(i, count))
count = count + 1
imsave(out_dir, np.round(item).astype(np.uint8))
assert (X0.shape[1] == X1.shape[1])
assert (X0.shape[2] == X1.shape[2])
X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
(padding_left, padding_right)), mode='edge')
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X0 = np.expand_dims(X0, axis=0)
X1 = np.expand_dims(X1, axis=0)
X = np.concatenate((X0, X1), axis=0)
proc_end = time.time()
o = self.exe.run(self.program,
fetch_list=self.fetch_targets,
feed={"image": X})
y_ = o[0]
proc_timer.update(time.time() - proc_end)
tot_timer.update(time.time() - end)
end = time.time()
y_ = [
np.transpose(
255.0 * item.clip(
0, 1.0)[0, :, padding_top:padding_top + int_height,
padding_left:padding_left + int_width],
(1, 2, 0)) for item in y_
]
time_offsets = [
kk * timestep for kk in range(1, 1 + num_frames, 1)
]
count = 1
for item, time_offset in zip(y_, time_offsets):
out_dir = os.path.join(
frame_path_interpolated, vidname,
"{:0>6d}_{:0>4d}.png".format(i, count))
count = count + 1
imsave(out_dir, np.round(item).astype(np.uint8))
num_frames = int(1.0 / timestep) - 1
......@@ -266,14 +262,16 @@ class VideoFrameInterp(object):
vidname + '.mp4')
if os.path.exists(video_pattern_output):
os.remove(video_pattern_output)
frames2video(frame_pattern_combined, video_pattern_output,
r2)
frames2video(frame_pattern_combined, video_pattern_output, r2)
return frame_pattern_combined, video_pattern_output
if __name__ == '__main__':
args = parser.parse_args()
predictor = VideoFrameInterp(args.time_step, args.saved_model,
args.video_path, args.output_path)
predictor = VideoFrameInterp(args.time_step,
args.saved_model,
args.video_path,
args.output_path,
remove_duplicates=args.remove_duplicates)
predictor.run()
import os, sys
import glob
import shutil
import cv2
class AverageMeter(object):
......@@ -44,3 +45,34 @@ def combine_frames(input, interpolated, combined, num_frames):
except Exception as e:
print(e)
print(len(frames2), num_frames, i, k, i * num_frames + k)
def remove_duplicates(paths):
def dhash(image, hash_size=8):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
resized = cv2.resize(gray, (hash_size + 1, hash_size))
diff = resized[:, 1:] > resized[:, :-1]
return sum([2**i for (i, v) in enumerate(diff.flatten()) if v])
hashes = {}
image_paths = sorted(glob.glob(os.path.join(paths, '*.png')))
for image_path in image_paths:
image = cv2.imread(image_path)
h = dhash(image)
p = hashes.get(h, [])
p.append(image_path)
hashes[h] = p
for (h, hashed_paths) in hashes.items():
if len(hashed_paths) > 1:
for p in hashed_paths[1:]:
os.remove(p)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
for fid, frame in enumerate(frames):
new_name = '{:08d}'.format(fid) + '.png'
new_name = os.path.join(paths, new_name)
os.rename(frame, new_name)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
return frames
......@@ -56,8 +56,8 @@ class DictDataLoader():
self.dataset = DictDataset(dataset)
place = paddle.fluid.CUDAPlace(ParallelEnv().dev_id) \
if ParallelEnv().nranks > 1 else paddle.fluid.CUDAPlace(0)
place = paddle.CUDAPlace(ParallelEnv().dev_id) \
if ParallelEnv().nranks > 1 else paddle.CUDAPlace(0)
sampler = DistributedBatchSampler(self.dataset,
batch_size=batch_size,
......
......@@ -11,8 +11,10 @@ from ..datasets.builder import build_dataloader
from ..models.builder import build_model
from ..utils.visual import tensor2img, save_image
from ..utils.filesystem import save, load, makedirs
from ..utils.timer import TimeAverager
from ..metric.psnr_ssim import calculate_psnr, calculate_ssim
class Trainer:
def __init__(self, cfg):
......@@ -51,7 +53,6 @@ class Trainer:
self.time_count = {}
self.best_metric = {}
def distributed_data_parallel(self):
strategy = paddle.distributed.prepare_context()
for name in self.model.model_names:
......@@ -61,29 +62,37 @@ class Trainer:
paddle.DataParallel(net, strategy))
def train(self):
reader_cost_averager = TimeAverager()
batch_cost_averager = TimeAverager()
for epoch in range(self.start_epoch, self.epochs):
self.current_epoch = epoch
start_time = step_start_time = time.time()
for i, data in enumerate(self.train_dataloader):
data_time = time.time()
reader_cost_averager.record(time.time() - step_start_time)
self.batch_id = i
# unpack data from dataset and apply preprocessing
# data input should be dict
self.model.set_input(data)
self.model.optimize_parameters()
self.data_time = data_time - step_start_time
self.step_time = time.time() - step_start_time
batch_cost_averager.record(time.time() - step_start_time)
if i % self.log_interval == 0:
self.data_time = reader_cost_averager.get_average()
self.step_time = batch_cost_averager.get_average()
self.print_log()
reader_cost_averager.reset()
batch_cost_averager.reset()
if i % self.visual_interval == 0:
self.visual('visual_train')
step_start_time = time.time()
self.logger.info('train one epoch time: {}'.format(time.time() -
start_time))
self.logger.info(
'train one epoch time: {}'.format(time.time() - start_time))
if self.validate_interval > -1 and epoch % self.validate_interval:
self.validate()
self.model.lr_scheduler.step()
......@@ -93,7 +102,8 @@ class Trainer:
def validate(self):
if not hasattr(self, 'val_dataloader'):
self.val_dataloader = build_dataloader(self.cfg.dataset.val, is_train=False)
self.val_dataloader = build_dataloader(
self.cfg.dataset.val, is_train=False)
metric_result = {}
......@@ -106,7 +116,7 @@ class Trainer:
visual_results = {}
current_paths = self.model.get_image_paths()
current_visuals = self.model.get_current_visuals()
for j in range(len(current_paths)):
short_path = os.path.basename(current_paths[j])
basename = os.path.splitext(short_path)[0]
......@@ -115,31 +125,43 @@ class Trainer:
visual_results.update({name: img_tensor[j]})
if 'psnr' in self.cfg.validate.metrics:
if 'psnr' not in metric_result:
metric_result['psnr'] = calculate_psnr(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.psnr)
metric_result['psnr'] = calculate_psnr(
tensor2img(current_visuals['output'][j], (0., 1.)),
tensor2img(current_visuals['gt'][j], (0., 1.)),
**self.cfg.validate.metrics.psnr)
else:
metric_result['psnr'] += calculate_psnr(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.psnr)
metric_result['psnr'] += calculate_psnr(
tensor2img(current_visuals['output'][j], (0., 1.)),
tensor2img(current_visuals['gt'][j], (0., 1.)),
**self.cfg.validate.metrics.psnr)
if 'ssim' in self.cfg.validate.metrics:
if 'ssim' not in metric_result:
metric_result['ssim'] = calculate_ssim(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.ssim)
metric_result['ssim'] = calculate_ssim(
tensor2img(current_visuals['output'][j], (0., 1.)),
tensor2img(current_visuals['gt'][j], (0., 1.)),
**self.cfg.validate.metrics.ssim)
else:
metric_result['ssim'] += calculate_ssim(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.ssim)
metric_result['ssim'] += calculate_ssim(
tensor2img(current_visuals['output'][j], (0., 1.)),
tensor2img(current_visuals['gt'][j], (0., 1.)),
**self.cfg.validate.metrics.ssim)
self.visual('visual_val', visual_results=visual_results)
if i % self.log_interval == 0:
self.logger.info('val iter: [%d/%d]' %
(i, len(self.val_dataloader)))
self.logger.info(
'val iter: [%d/%d]' % (i, len(self.val_dataloader)))
for metric_name in metric_result.keys():
metric_result[metric_name] /= len(self.val_dataloader.dataset)
self.logger.info('Epoch {} validate end: {}'.format(self.current_epoch, metric_result))
self.logger.info('Epoch {} validate end: {}'.format(
self.current_epoch, metric_result))
def test(self):
if not hasattr(self, 'test_dataloader'):
self.test_dataloader = build_dataloader(self.cfg.dataset.test,
is_train=False)
self.test_dataloader = build_dataloader(
self.cfg.dataset.test, is_train=False)
# data[0]: img, data[1]: img path index
# test batch size must be 1
......@@ -163,8 +185,8 @@ class Trainer:
self.visual('visual_test', visual_results=visual_results)
if i % self.log_interval == 0:
self.logger.info('Test iter: [%d/%d]' %
(i, len(self.test_dataloader)))
self.logger.info(
'Test iter: [%d/%d]' % (i, len(self.test_dataloader)))
def print_log(self):
losses = self.model.get_current_losses()
......@@ -266,6 +288,7 @@ class Trainer:
for name in self.model.model_names:
if isinstance(name, str):
self.logger.info('laod model {} {} params!'.format(self.cfg.model.name, 'net' + name))
self.logger.info('laod model {} {} params!'.format(
self.cfg.model.name, 'net' + name))
net = getattr(self.model, 'net' + name)
net.set_dict(state_dicts['net' + name])
......@@ -8,3 +8,12 @@ wget https://paddlegan.bj.bcebos.com/InceptionV3.pdparams
```
python test_fid_score.py --image_data_path1 /path/to/dataset1 --image_data_path2 /path/to/dataset2 --inference_model ./InceptionV3.pdparams
```
### Inception-V3 weights converted from torchvision
Download: https://aistudio.baidu.com/aistudio/datasetdetail/51890
This model weights file is converted from official torchvision inception-v3 model. And both BigGAN and StarGAN-v2 is using it to calculate FID score.
Note that this model weights is different from above one (which is converted from tensorflow unofficial version)
......@@ -16,15 +16,18 @@ import os
import fnmatch
import numpy as np
import cv2
from PIL import Image
from cv2 import imread
from scipy import linalg
import paddle.fluid as fluid
from inception import InceptionV3
from paddle.fluid.dygraph.base import to_variable
def tqdm(x):
return x
try:
from tqdm import tqdm
except:
def tqdm(x):
return x
""" based on https://github.com/mit-han-lab/gan-compression/blob/master/metric/fid_score.py
......@@ -128,7 +131,7 @@ def calculate_fid_given_img(img_fake,
return fid_value
def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path):
def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path, style=None):
if len(files) % batch_size != 0:
print(('Warning: number of images is not a multiple of the '
'batch size. Some samples are going to be ignored.'))
......@@ -144,8 +147,23 @@ def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path):
for i in tqdm(range(n_batches)):
start = i * batch_size
end = start + batch_size
images = np.array(
[imread(str(f)).astype(np.float32) for f in files[start:end]])
# same as stargan-v2 official implementation: resize to 256 first, then resize to 299
if style == 'stargan':
img_list = []
for f in files[start:end]:
im = Image.open(str(f)).convert('RGB')
if im.size[0] != 299:
im = im.resize((256, 256), 2)
im = im.resize((299, 299), 2)
img_list.append(np.array(im).astype('float32'))
images = np.array(
img_list)
else:
images = np.array(
[imread(str(f)).astype(np.float32) for f in files[start:end]])
if len(images.shape) != 4:
images = imread(str(files[start]))
......@@ -155,33 +173,53 @@ def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path):
images = images.transpose((0, 3, 1, 2))
images /= 255
images = to_variable(images)
param_dict, _ = fluid.load_dygraph(premodel_path)
model.set_dict(param_dict)
model.eval()
# imagenet normalization
if style == 'stargan':
mean = np.array([0.485, 0.456, 0.406]).astype('float32')
std = np.array([0.229, 0.224, 0.225]).astype('float32')
images[:] = (images[:] - mean[:, None, None]) / std[:, None, None]
pred = model(images)[0][0].numpy()
if style=='stargan':
pred_arr[start:end] = inception_infer(images, premodel_path)
else:
with fluid.dygraph.guard():
images = to_variable(images)
param_dict, _ = fluid.load_dygraph(premodel_path)
model.set_dict(param_dict)
model.eval()
pred_arr[start:end] = pred.reshape(end - start, -1)
pred = model(images)[0][0].numpy()
pred_arr[start:end] = pred.reshape(end - start, -1)
return pred_arr
def inception_infer(x, model_path):
exe = fluid.Executor()
[inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(model_path, exe)
results = exe.run(inference_program,
feed={feed_target_names[0]: x},
fetch_list=fetch_targets)
return results[0]
def _calculate_activation_statistics(files,
model,
premodel_path,
batch_size=50,
dims=2048,
use_gpu=False):
use_gpu=False,
style = None):
act = _get_activations(files, model, batch_size, dims, use_gpu,
premodel_path)
premodel_path, style)
mu = np.mean(act, axis=0)
sigma = np.cov(act, rowvar=False)
return mu, sigma
def _compute_statistics_of_path(path, model, batch_size, dims, use_gpu,
premodel_path):
premodel_path, style=None):
if path.endswith('.npz'):
f = np.load(path)
m, s = f['mu'][:], f['sigma'][:]
......@@ -193,7 +231,7 @@ def _compute_statistics_of_path(path, model, batch_size, dims, use_gpu,
filenames, '*.jpg') or fnmatch.filter(filenames, '*.png'):
files.append(os.path.join(root, filename))
m, s = _calculate_activation_statistics(files, model, premodel_path,
batch_size, dims, use_gpu)
batch_size, dims, use_gpu, style)
return m, s
......@@ -202,7 +240,8 @@ def calculate_fid_given_paths(paths,
batch_size,
use_gpu,
dims,
model=None):
model=None,
style = None):
assert os.path.exists(
premodel_path
), 'pretrain_model path {} is not exists! Please download it first'.format(
......@@ -211,14 +250,15 @@ def calculate_fid_given_paths(paths,
if not os.path.exists(p):
raise RuntimeError('Invalid path: %s' % p)
if model is None:
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
model = InceptionV3([block_idx], class_dim=1008)
if model is None and style != 'stargan':
with fluid.dygraph.guard():
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
model = InceptionV3([block_idx], class_dim=1008)
m1, s1 = _compute_statistics_of_path(paths[0], model, batch_size, dims,
use_gpu, premodel_path)
use_gpu, premodel_path, style)
m2, s2 = _compute_statistics_of_path(paths[1], model, batch_size, dims,
use_gpu, premodel_path)
use_gpu, premodel_path, style)
fid_value = _calculate_frechet_distance(m1, s1, m2, s2)
return fid_value
......@@ -38,6 +38,9 @@ def parse_args():
type=int,
default=1,
help='sample number in a batch for inference.')
parser.add_argument('--style',
type=str,
help='calculation style: stargan or default (gan-compression style)')
args = parser.parse_args()
return args
......@@ -50,10 +53,9 @@ def main():
inference_model_path = args.inference_model
batch_size = args.batch_size
with fluid.dygraph.guard():
fid_value = calculate_fid_given_paths(paths, inference_model_path,
batch_size, args.use_gpu, 2048)
print('FID: ', fid_value)
fid_value = calculate_fid_given_paths(paths, inference_model_path,
batch_size, args.use_gpu, 2048, style=args.style)
print('FID: ', fid_value)
if __name__ == "__main__":
......
import paddle
import paddle.nn as nn
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152']
__all__ = [
'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'
]
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias_attr=False)
return nn.Conv2d(in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias_attr=False)
class BasicBlock(paddle.fluid.Layer):
class BasicBlock(nn.Layer):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
......@@ -44,17 +48,24 @@ class BasicBlock(paddle.fluid.Layer):
return out
class Bottleneck(paddle.fluid.Layer):
class Bottleneck(nn.Layer):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias_attr=False)
self.bn1 = nn.BatchNorm(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias_attr=False)
self.conv2 = nn.Conv2d(planes,
planes,
kernel_size=3,
stride=stride,
padding=1,
bias_attr=False)
self.bn2 = nn.BatchNorm(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias_attr=False)
self.conv3 = nn.Conv2d(planes,
planes * 4,
kernel_size=1,
bias_attr=False)
self.bn3 = nn.BatchNorm(planes * 4)
self.relu = nn.ReLU()
self.downsample = downsample
......@@ -82,12 +93,15 @@ class Bottleneck(paddle.fluid.Layer):
return out
class ResNet(paddle.fluid.Layer):
class ResNet(nn.Layer):
def __init__(self, block, layers, num_classes=1000):
self.inplanes = 64
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
self.conv1 = nn.Conv2d(3,
64,
kernel_size=7,
stride=2,
padding=3,
bias_attr=False)
self.bn1 = nn.BatchNorm(64)
self.relu = nn.ReLU()
......@@ -103,8 +117,11 @@ class ResNet(paddle.fluid.Layer):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias_attr=False),
nn.Conv2d(self.inplanes,
planes * block.expansion,
kernel_size=1,
stride=stride,
bias_attr=False),
nn.BatchNorm(planes * block.expansion),
)
......
......@@ -8,6 +8,7 @@ from .discriminators.builder import build_discriminator
from .losses import GANLoss
from ..solver import build_optimizer
from ..modules.init import init_weights
from ..utils.image_pool import ImagePool
......@@ -56,10 +57,14 @@ class CycleGANModel(BaseModel):
# Code (vs. paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X)
self.netG_A = build_generator(opt.model.generator)
self.netG_B = build_generator(opt.model.generator)
init_weights(self.netG_A)
init_weights(self.netG_B)
if self.isTrain: # define discriminators
self.netD_A = build_discriminator(opt.model.discriminator)
self.netD_B = build_discriminator(opt.model.discriminator)
init_weights(self.netD_A)
init_weights(self.netD_B)
if self.isTrain:
if opt.lambda_identity > 0.0: # only works when input and output images have the same number of channels
......
import paddle
import functools
import numpy as np
import paddle.nn as nn
from ...modules.nn import ReflectionPad2d, LeakyReLU, Dropout, BCEWithLogitsLoss, Pad2D, MSELoss
import paddle
import paddle.nn as nn
from ...modules.norm import build_norm_layer
from .builder import DISCRIMINATORS
@DISCRIMINATORS.register()
class NLayerDiscriminator(paddle.fluid.dygraph.Layer):
class NLayerDiscriminator(nn.Layer):
"""Defines a PatchGAN discriminator"""
def __init__(self, input_nc, ndf=64, n_layers=3, norm_type='instance'):
"""Construct a PatchGAN discriminator
......@@ -24,36 +22,51 @@ class NLayerDiscriminator(paddle.fluid.dygraph.Layer):
"""
super(NLayerDiscriminator, self).__init__()
norm_layer = build_norm_layer(norm_type)
if type(norm_layer) == functools.partial:
if type(norm_layer) == functools.partial:
use_bias = norm_layer.func == nn.InstanceNorm
else:
use_bias = norm_layer == nn.InstanceNorm
kw = 4
padw = 1
sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), LeakyReLU(0.2, True)]
sequence = [
nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw),
nn.LeakyReLU(0.2)
]
nf_mult = 1
nf_mult_prev = 1
for n in range(1, n_layers):
for n in range(1, n_layers):
nf_mult_prev = nf_mult
nf_mult = min(2 ** n, 8)
nf_mult = min(2**n, 8)
sequence += [
nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias_attr=use_bias),
nn.Conv2d(ndf * nf_mult_prev,
ndf * nf_mult,
kernel_size=kw,
stride=2,
padding=padw,
bias_attr=use_bias),
norm_layer(ndf * nf_mult),
LeakyReLU(0.2, True)
nn.LeakyReLU(0.2)
]
nf_mult_prev = nf_mult
nf_mult = min(2 ** n_layers, 8)
nf_mult = min(2**n_layers, 8)
sequence += [
nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias_attr=use_bias),
nn.Conv2d(ndf * nf_mult_prev,
ndf * nf_mult,
kernel_size=kw,
stride=1,
padding=padw,
bias_attr=use_bias),
norm_layer(ndf * nf_mult),
LeakyReLU(0.2, True)
nn.LeakyReLU(0.2)
]
sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)]
sequence += [
nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)
]
self.model = nn.Sequential(*sequence)
def forward(self, input):
"""Standard forward."""
return self.model(input)
\ No newline at end of file
return self.model(input)
......@@ -432,8 +432,7 @@ class SelfAttention(nn.Layer):
self.key = conv1d(n_channels, n_channels // 8)
self.value = conv1d(n_channels, n_channels)
self.gamma = self.create_parameter(
shape=[1],
default_initializer=paddle.fluid.initializer.Constant(
shape=[1], default_initializer=paddle.nn.initializer.Constant(
0.0)) #nn.Parameter(tensor([0.]))
def forward(self, x):
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import functools
from ...modules.norm import build_norm_layer
from .builder import GENERATORS
@GENERATORS.register()
class MobileResnetGenerator(nn.Layer):
def __init__(self,
input_channel,
output_nc,
ngf=64,
norm_type='instance',
use_dropout=False,
n_blocks=9,
padding_type='reflect'):
super(MobileResnetGenerator, self).__init__()
norm_layer = build_norm_layer(norm_type)
if type(norm_layer) == functools.partial:
use_bias = norm_layer.func == InstanceNorm
else:
use_bias = norm_layer == InstanceNorm
self.model = nn.LayerList([
nn.ReflectionPad2d([3, 3, 3, 3]),
nn.Conv2d(
input_channel,
int(ngf),
kernel_size=7,
padding=0,
bias_attr=use_bias), norm_layer(ngf), nn.ReLU()
])
n_downsampling = 2
for i in range(n_downsampling):
mult = 2**i
self.model.extend([
nn.Conv2d(
ngf * mult,
ngf * mult * 2,
kernel_size=3,
stride=2,
padding=1,
bias_attr=use_bias), norm_layer(ngf * mult * 2), nn.ReLU()
])
mult = 2**n_downsampling
for i in range(n_blocks):
self.model.extend([
MobileResnetBlock(
ngf * mult,
ngf * mult,
padding_type=padding_type,
norm_layer=norm_layer,
use_dropout=use_dropout,
use_bias=use_bias)
])
for i in range(n_downsampling):
mult = 2**(n_downsampling - i)
output_size = (i + 1) * 128
self.model.extend([
nn.ConvTranspose2d(
ngf * mult,
int(ngf * mult / 2),
kernel_size=3,
stride=2,
padding=1,
output_padding=1,
bias_attr=use_bias), norm_layer(int(ngf * mult / 2)),
nn.ReLU()
])
self.model.extend([nn.ReflectionPad2d([3, 3, 3, 3])])
self.model.extend([nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)])
self.model.extend([nn.Tanh()])
def forward(self, inputs):
y = inputs
for sublayer in self.model:
y = sublayer(y)
return y
class MobileResnetBlock(nn.Layer):
def __init__(self, in_c, out_c, padding_type, norm_layer, use_dropout,
use_bias):
super(MobileResnetBlock, self).__init__()
self.padding_type = padding_type
self.use_dropout = use_dropout
self.conv_block = nn.LayerList([])
p = 0
if self.padding_type == 'reflect':
self.conv_block.extend([nn.ReflectionPad2d([1, 1, 1, 1])])
elif self.padding_type == 'replicate':
self.conv_block.extend([nn.ReplicationPad2d([1, 1, 1, 1])])
elif self.padding_type == 'zero':
p = 1
else:
raise NotImplementedError('padding [%s] is not implemented' %
self.padding_type)
self.conv_block.extend([
SeparableConv2D(
num_channels=in_c,
num_filters=out_c,
filter_size=3,
padding=p,
stride=1), norm_layer(out_c), nn.ReLU()
])
self.conv_block.extend([nn.Dropout(0.5)])
if self.padding_type == 'reflect':
self.conv_block.extend([nn.ReflectionPad2d([1, 1, 1, 1])])
elif self.padding_type == 'replicate':
self.conv_block.extend([nn.ReplicationPad2d([1, 1, 1, 1])])
elif self.padding_type == 'zero':
p = 1
else:
raise NotImplementedError('padding [%s] is not implemented' %
self.padding_type)
self.conv_block.extend([
SeparableConv2D(
num_channels=out_c,
num_filters=in_c,
filter_size=3,
padding=p,
stride=1), norm_layer(in_c)
])
def forward(self, inputs):
y = inputs
for sublayer in self.conv_block:
y = sublayer(y)
out = inputs + y
return out
class SeparableConv2D(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
padding=0,
norm_layer=InstanceNorm,
use_bias=True,
scale_factor=1,
stddev=0.02):
super(SeparableConv2D, self).__init__()
self.conv = nn.LayerList([
nn.Conv2d(
in_channels=num_channels,
out_channels=num_channels * scale_factor,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=num_channels,
weight_attr=paddle.ParamAttr(
initializer=nn.initializer.Normal(
loc=0.0, scale=stddev)),
bias_attr=use_bias)
])
self.conv.extend([norm_layer(num_channels * scale_factor)])
self.conv.extend([
nn.Conv2d(
in_channels=num_channels * scale_factor,
out_channels=num_filters,
kernel_size=1,
stride=1,
weight_attr=paddle.ParamAttr(
initializer=nn.initializer.Normal(
loc=0.0, scale=stddev)),
bias_attr=use_bias)
])
def forward(self, inputs):
for sublayer in self.conv:
inputs = sublayer(inputs)
return inputs
......@@ -2,43 +2,79 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
class TempConv(nn.Layer):
def __init__(self, in_planes, out_planes, kernel_size=(1,3,3), stride=(1,1,1), padding=(0,1,1) ):
super(TempConv, self).__init__()
self.conv3d = nn.Conv3d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding)
self.bn = nn.BatchNorm( out_planes )
def forward(self, x):
return F.elu( self.bn(self.conv3d(x)))
def __init__(self,
in_planes,
out_planes,
kernel_size=(1, 3, 3),
stride=(1, 1, 1),
padding=(0, 1, 1)):
super(TempConv, self).__init__()
self.conv3d = nn.Conv3d(in_planes,
out_planes,
kernel_size=kernel_size,
stride=stride,
padding=padding)
self.bn = nn.BatchNorm(out_planes)
def forward(self, x):
return F.elu(self.bn(self.conv3d(x)))
class Upsample(nn.Layer):
def __init__(self, in_planes, out_planes, scale_factor=(1,2,2)):
super(Upsample, self).__init__()
self.scale_factor = scale_factor
self.conv3d = nn.Conv3d( in_planes, out_planes, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) )
self.bn = nn.BatchNorm( out_planes )
def forward(self, x):
out_size = x.shape[2:]
for i in range(3):
out_size[i] = self.scale_factor[i] * out_size[i]
return F.elu( self.bn( self.conv3d( F.interpolate(x, size=out_size, mode='trilinear', align_corners=False, data_format='NCDHW', align_mode=0))))
def __init__(self, in_planes, out_planes, scale_factor=(1, 2, 2)):
super(Upsample, self).__init__()
self.scale_factor = scale_factor
self.conv3d = nn.Conv3d(in_planes,
out_planes,
kernel_size=(3, 3, 3),
stride=(1, 1, 1),
padding=(1, 1, 1))
self.bn = nn.BatchNorm(out_planes)
def forward(self, x):
out_size = x.shape[2:]
for i in range(3):
out_size[i] = self.scale_factor[i] * out_size[i]
return F.elu(
self.bn(
self.conv3d(
F.interpolate(x,
size=out_size,
mode='trilinear',
align_corners=False,
data_format='NCDHW',
align_mode=0))))
class UpsampleConcat(nn.Layer):
def __init__(self, in_planes_up, in_planes_flat, out_planes):
super(UpsampleConcat, self).__init__()
self.conv3d = TempConv( in_planes_up + in_planes_flat, out_planes, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) )
def forward(self, x1, x2):
scale_factor=(1,2,2)
def __init__(self, in_planes_up, in_planes_flat, out_planes):
super(UpsampleConcat, self).__init__()
self.conv3d = TempConv(in_planes_up + in_planes_flat,
out_planes,
kernel_size=(3, 3, 3),
stride=(1, 1, 1),
padding=(1, 1, 1))
def forward(self, x1, x2):
scale_factor = (1, 2, 2)
out_size = x1.shape[2:]
for i in range(3):
out_size[i] = scale_factor[i] * out_size[i]
x1 = F.interpolate(x1, size=out_size, mode='trilinear', align_corners=False, data_format='NCDHW', align_mode=0)
x1 = F.interpolate(x1,
size=out_size,
mode='trilinear',
align_corners=False,
data_format='NCDHW',
align_mode=0)
x = paddle.concat([x1, x2], axis=1)
return self.conv3d(x)
class SourceReferenceAttention(paddle.fluid.dygraph.Layer):
class SourceReferenceAttention(nn.Layer):
"""
Source-Reference Attention Layer
"""
......@@ -51,137 +87,166 @@ class SourceReferenceAttention(paddle.fluid.dygraph.Layer):
in_planes_r: int
Number of input reference feature vector channels.
"""
super(SourceReferenceAttention,self).__init__()
super(SourceReferenceAttention, self).__init__()
self.query_conv = nn.Conv3d(in_channels=in_planes_s,
out_channels=in_planes_s//8, kernel_size=1 )
self.key_conv = nn.Conv3d(in_channels=in_planes_r,
out_channels=in_planes_r//8, kernel_size=1 )
out_channels=in_planes_s // 8,
kernel_size=1)
self.key_conv = nn.Conv3d(in_channels=in_planes_r,
out_channels=in_planes_r // 8,
kernel_size=1)
self.value_conv = nn.Conv3d(in_channels=in_planes_r,
out_channels=in_planes_r, kernel_size=1 )
self.gamma = self.create_parameter(shape=[1], dtype=self.query_conv.weight.dtype,
default_initializer=paddle.fluid.initializer.Constant(0.0))
out_channels=in_planes_r,
kernel_size=1)
self.gamma = self.create_parameter(
shape=[1],
dtype=self.query_conv.weight.dtype,
default_initializer=nn.initializer.Constant(0.0))
def forward(self, source, reference):
s_batchsize, sC, sT, sH, sW = source.shape
r_batchsize, rC, rT, rH, rW = reference.shape
proj_query = paddle.reshape(self.query_conv(source), [s_batchsize,-1,sT*sH*sW])
proj_query = paddle.reshape(self.query_conv(source),
[s_batchsize, -1, sT * sH * sW])
proj_query = paddle.transpose(proj_query, [0, 2, 1])
proj_key = paddle.reshape(self.key_conv(reference), [r_batchsize,-1,rT*rW*rH])
energy = paddle.bmm( proj_query, proj_key )
attention = F.softmax(energy)
proj_value = paddle.reshape(self.value_conv(reference), [r_batchsize,-1,rT*rH*rW])
out = paddle.bmm(proj_value,paddle.transpose(attention, [0,2,1]))
out = paddle.reshape(out, [s_batchsize, sC, sT, sH, sW])
out = self.gamma*out + source
proj_key = paddle.reshape(self.key_conv(reference),
[r_batchsize, -1, rT * rW * rH])
energy = paddle.bmm(proj_query, proj_key)
attention = F.softmax(energy)
proj_value = paddle.reshape(self.value_conv(reference),
[r_batchsize, -1, rT * rH * rW])
out = paddle.bmm(proj_value, paddle.transpose(attention, [0, 2, 1]))
out = paddle.reshape(out, [s_batchsize, sC, sT, sH, sW])
out = self.gamma * out + source
return out, attention
class NetworkR( nn.Layer ):
def __init__(self):
super(NetworkR, self).__init__()
self.layers = nn.Sequential(
nn.ReplicationPad3d((1,1,1,1,1,1)),
TempConv( 1, 64, kernel_size=(3,3,3), stride=(1,2,2), padding=(0,0,0) ),
TempConv( 64, 128, kernel_size=(3,3,3), padding=(1,1,1) ),
TempConv( 128, 128, kernel_size=(3,3,3), padding=(1,1,1) ),
TempConv( 128, 256, kernel_size=(3,3,3), stride=(1,2,2), padding=(1,1,1) ),
TempConv( 256, 256, kernel_size=(3,3,3), padding=(1,1,1) ),
TempConv( 256, 256, kernel_size=(3,3,3), padding=(1,1,1) ),
TempConv( 256, 256, kernel_size=(3,3,3), padding=(1,1,1) ),
TempConv( 256, 256, kernel_size=(3,3,3), padding=(1,1,1) ),
Upsample( 256, 128 ),
TempConv( 128, 64, kernel_size=(3,3,3), padding=(1,1,1) ),
TempConv( 64, 64, kernel_size=(3,3,3), padding=(1,1,1) ),
Upsample( 64, 16 ),
nn.Conv3d( 16, 1, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) )
)
def forward(self, x):
return paddle.clip((x + paddle.fluid.layers.tanh( self.layers( ((x * 1).detach())-0.4462414 ) )), 0.0, 1.0)
class NetworkC( nn.Layer ):
def __init__(self):
super(NetworkC, self).__init__()
self.down1 = nn.Sequential(
nn.ReplicationPad3d((1,1,1,1,0,0)),
TempConv( 1, 64, stride=(1,2,2), padding=(0,0,0) ),
TempConv( 64, 128 ),
TempConv( 128, 128 ),
TempConv( 128, 256, stride=(1,2,2) ),
TempConv( 256, 256 ),
TempConv( 256, 256 ),
TempConv( 256, 512, stride=(1,2,2) ),
TempConv( 512, 512 ),
TempConv( 512, 512 )
)
self.flat = nn.Sequential(
TempConv( 512, 512 ),
TempConv( 512, 512 )
)
self.down2 = nn.Sequential(
TempConv( 512, 512, stride=(1,2,2) ),
TempConv( 512, 512 ),
)
self.stattn1 = SourceReferenceAttention( 512, 512 ) # Source-Reference Attention
self.stattn2 = SourceReferenceAttention( 512, 512 ) # Source-Reference Attention
self.selfattn1 = SourceReferenceAttention( 512, 512 ) # Self Attention
self.conv1 = TempConv( 512, 512 )
self.up1 = UpsampleConcat( 512, 512, 512 ) # 1/8
self.selfattn2 = SourceReferenceAttention( 512, 512 ) # Self Attention
self.conv2 = TempConv( 512, 256, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) )
self.up2 = nn.Sequential(
Upsample( 256, 128 ), # 1/4
TempConv( 128, 64, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) )
)
self.up3 = nn.Sequential(
Upsample( 64, 32 ), # 1/2
TempConv( 32, 16, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) )
)
self.up4 = nn.Sequential(
Upsample( 16, 8 ), # 1/1
nn.Conv3d( 8, 2, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) )
)
self.reffeatnet1 = nn.Sequential(
TempConv( 3, 64, stride=(1,2,2) ),
TempConv( 64, 128 ),
TempConv( 128, 128 ),
TempConv( 128, 256, stride=(1,2,2) ),
TempConv( 256, 256 ),
TempConv( 256, 256 ),
TempConv( 256, 512, stride=(1,2,2) ),
TempConv( 512, 512 ),
TempConv( 512, 512 ),
)
self.reffeatnet2 = nn.Sequential(
TempConv( 512, 512, stride=(1,2,2) ),
TempConv( 512, 512 ),
TempConv( 512, 512 ),
)
def forward(self, x, x_refs=None):
x1 = self.down1( x - 0.4462414 )
if x_refs is not None:
x_refs = paddle.transpose(x_refs, [0, 2, 1, 3, 4]) # [B,T,C,H,W] --> [B,C,T,H,W]
reffeat = self.reffeatnet1( x_refs-0.48 )
x1, _ = self.stattn1( x1, reffeat )
x2 = self.flat( x1 )
out = self.down2( x1 )
if x_refs is not None:
reffeat2 = self.reffeatnet2( reffeat )
out, _ = self.stattn2( out, reffeat2 )
out = self.conv1( out )
out, _ = self.selfattn1( out, out )
out = self.up1( out, x2 )
out, _ = self.selfattn2( out, out )
out = self.conv2( out )
out = self.up2( out )
out = self.up3( out )
out = self.up4( out )
return F.sigmoid( out )
\ No newline at end of file
class NetworkR(nn.Layer):
def __init__(self):
super(NetworkR, self).__init__()
self.layers = nn.Sequential(
nn.ReplicationPad3d((1, 1, 1, 1, 1, 1)),
TempConv(1,
64,
kernel_size=(3, 3, 3),
stride=(1, 2, 2),
padding=(0, 0, 0)),
TempConv(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
TempConv(128, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
TempConv(128,
256,
kernel_size=(3, 3, 3),
stride=(1, 2, 2),
padding=(1, 1, 1)),
TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
Upsample(256, 128),
TempConv(128, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
TempConv(64, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
Upsample(64, 16),
nn.Conv3d(16,
1,
kernel_size=(3, 3, 3),
stride=(1, 1, 1),
padding=(1, 1, 1)))
def forward(self, x):
return paddle.clip(
(x + F.tanh(self.layers(((x * 1).detach()) - 0.4462414))), 0.0, 1.0)
class NetworkC(nn.Layer):
def __init__(self):
super(NetworkC, self).__init__()
self.down1 = nn.Sequential(
nn.ReplicationPad3d((1, 1, 1, 1, 0, 0)),
TempConv(1, 64, stride=(1, 2, 2), padding=(0, 0, 0)),
TempConv(64, 128), TempConv(128, 128),
TempConv(128, 256, stride=(1, 2, 2)), TempConv(256, 256),
TempConv(256, 256), TempConv(256, 512, stride=(1, 2, 2)),
TempConv(512, 512), TempConv(512, 512))
self.flat = nn.Sequential(TempConv(512, 512), TempConv(512, 512))
self.down2 = nn.Sequential(
TempConv(512, 512, stride=(1, 2, 2)),
TempConv(512, 512),
)
self.stattn1 = SourceReferenceAttention(
512, 512) # Source-Reference Attention
self.stattn2 = SourceReferenceAttention(
512, 512) # Source-Reference Attention
self.selfattn1 = SourceReferenceAttention(512, 512) # Self Attention
self.conv1 = TempConv(512, 512)
self.up1 = UpsampleConcat(512, 512, 512) # 1/8
self.selfattn2 = SourceReferenceAttention(512, 512) # Self Attention
self.conv2 = TempConv(512,
256,
kernel_size=(3, 3, 3),
stride=(1, 1, 1),
padding=(1, 1, 1))
self.up2 = nn.Sequential(
Upsample(256, 128), # 1/4
TempConv(128,
64,
kernel_size=(3, 3, 3),
stride=(1, 1, 1),
padding=(1, 1, 1)))
self.up3 = nn.Sequential(
Upsample(64, 32), # 1/2
TempConv(32,
16,
kernel_size=(3, 3, 3),
stride=(1, 1, 1),
padding=(1, 1, 1)))
self.up4 = nn.Sequential(
Upsample(16, 8), # 1/1
nn.Conv3d(8,
2,
kernel_size=(3, 3, 3),
stride=(1, 1, 1),
padding=(1, 1, 1)))
self.reffeatnet1 = nn.Sequential(
TempConv(3, 64, stride=(1, 2, 2)),
TempConv(64, 128),
TempConv(128, 128),
TempConv(128, 256, stride=(1, 2, 2)),
TempConv(256, 256),
TempConv(256, 256),
TempConv(256, 512, stride=(1, 2, 2)),
TempConv(512, 512),
TempConv(512, 512),
)
self.reffeatnet2 = nn.Sequential(
TempConv(512, 512, stride=(1, 2, 2)),
TempConv(512, 512),
TempConv(512, 512),
)
def forward(self, x, x_refs=None):
x1 = self.down1(x - 0.4462414)
if x_refs is not None:
x_refs = paddle.transpose(
x_refs, [0, 2, 1, 3, 4]) # [B,T,C,H,W] --> [B,C,T,H,W]
reffeat = self.reffeatnet1(x_refs - 0.48)
x1, _ = self.stattn1(x1, reffeat)
x2 = self.flat(x1)
out = self.down2(x1)
if x_refs is not None:
reffeat2 = self.reffeatnet2(reffeat)
out, _ = self.stattn2(out, reffeat2)
out = self.conv1(out)
out, _ = self.selfattn1(out, out)
out = self.up1(out, x2)
out, _ = self.selfattn2(out, out)
out = self.conv2(out)
out = self.up2(out)
out = self.up3(out)
out = self.up4(out)
return F.sigmoid(out)
......@@ -2,20 +2,25 @@ import paddle
import paddle.nn as nn
import functools
from ...modules.nn import ReflectionPad2d, LeakyReLU, Tanh, Dropout, BCEWithLogitsLoss, Pad2D, MSELoss
from ...modules.norm import build_norm_layer
from .builder import GENERATORS
@GENERATORS.register()
class ResnetGenerator(paddle.fluid.dygraph.Layer):
class ResnetGenerator(nn.Layer):
"""Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations.
code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style)
"""
def __init__(self, input_nc, output_nc, ngf=64, norm_type='instance', use_dropout=False, n_blocks=6, padding_type='reflect'):
def __init__(self,
input_nc,
output_nc,
ngf=64,
norm_type='instance',
use_dropout=False,
n_blocks=6,
padding_type='reflect'):
"""Construct a Resnet-based generator
Args:
......@@ -27,7 +32,7 @@ class ResnetGenerator(paddle.fluid.dygraph.Layer):
n_blocks (int) -- the number of ResNet blocks
padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero
"""
assert(n_blocks >= 0)
assert (n_blocks >= 0)
super(ResnetGenerator, self).__init__()
norm_layer = build_norm_layer(norm_type)
......@@ -36,35 +41,56 @@ class ResnetGenerator(paddle.fluid.dygraph.Layer):
else:
use_bias = norm_layer == nn.InstanceNorm
model = [ReflectionPad2d(3),
nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias_attr=use_bias),
norm_layer(ngf),
nn.ReLU()]
model = [
nn.ReflectionPad2d([3, 3, 3, 3]),
nn.Conv2d(input_nc,
ngf,
kernel_size=7,
padding=0,
bias_attr=use_bias),
norm_layer(ngf),
nn.ReLU()
]
n_downsampling = 2
for i in range(n_downsampling): # add downsampling layers
mult = 2 ** i
mult = 2**i
model += [
nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias_attr=use_bias),
norm_layer(ngf * mult * 2),
nn.ReLU()]
mult = 2 ** n_downsampling
for i in range(n_blocks): # add ResNet blocks
nn.Conv2d(ngf * mult,
ngf * mult * 2,
kernel_size=3,
stride=2,
padding=1,
bias_attr=use_bias),
norm_layer(ngf * mult * 2),
nn.ReLU()
]
mult = 2**n_downsampling
for i in range(n_blocks): # add ResNet blocks
model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]
model += [
ResnetBlock(ngf * mult,
padding_type=padding_type,
norm_layer=norm_layer,
use_dropout=use_dropout,
use_bias=use_bias)
]
for i in range(n_downsampling): # add upsampling layers
mult = 2 ** (n_downsampling - i)
mult = 2**(n_downsampling - i)
model += [
nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2),
kernel_size=3, stride=2,
padding=1,
output_padding=1,
bias_attr=use_bias),
norm_layer(int(ngf * mult / 2)),
nn.ReLU()]
model += [ReflectionPad2d(3)]
nn.ConvTranspose2d(ngf * mult,
int(ngf * mult / 2),
kernel_size=3,
stride=2,
padding=1,
output_padding=1,
bias_attr=use_bias),
norm_layer(int(ngf * mult / 2)),
nn.ReLU()
]
model += [nn.ReflectionPad2d([3, 3, 3, 3])]
model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
model += [nn.Tanh()]
......@@ -75,9 +101,8 @@ class ResnetGenerator(paddle.fluid.dygraph.Layer):
return self.model(x)
class ResnetBlock(paddle.fluid.dygraph.Layer):
class ResnetBlock(nn.Layer):
"""Define a Resnet block"""
def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias):
"""Initialize the Resnet block
......@@ -87,9 +112,11 @@ class ResnetBlock(paddle.fluid.dygraph.Layer):
Original Resnet paper: https://arxiv.org/pdf/1512.03385.pdf
"""
super(ResnetBlock, self).__init__()
self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias)
self.conv_block = self.build_conv_block(dim, padding_type, norm_layer,
use_dropout, use_bias)
def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias):
def build_conv_block(self, dim, padding_type, norm_layer, use_dropout,
use_bias):
"""Construct a convolutional block.
Parameters:
......@@ -104,28 +131,37 @@ class ResnetBlock(paddle.fluid.dygraph.Layer):
conv_block = []
p = 0
if padding_type == 'reflect':
conv_block += [ReflectionPad2d(1)]
conv_block += [nn.ReflectionPad2d([1, 1, 1, 1])]
elif padding_type == 'replicate':
conv_block += [ReplicationPad2d(1)]
conv_block += [nn.ReplicationPad2d([1, 1, 1, 1])]
elif padding_type == 'zero':
p = 1
else:
raise NotImplementedError('padding [%s] is not implemented' % padding_type)
conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias), norm_layer(dim), nn.ReLU()]
raise NotImplementedError('padding [%s] is not implemented' %
padding_type)
conv_block += [
nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias),
norm_layer(dim),
nn.ReLU()
]
if use_dropout:
conv_block += [Dropout(0.5)]
conv_block += [nn.Dropout(0.5)]
p = 0
if padding_type == 'reflect':
conv_block += [ReflectionPad2d(1)]
conv_block += [nn.ReflectionPad2d([1, 1, 1, 1])]
elif padding_type == 'replicate':
conv_block += [ReplicationPad2d(1)]
conv_block += [nn.ReplicationPad2d([1, 1, 1, 1])]
elif padding_type == 'zero':
p = 1
else:
raise NotImplementedError('padding [%s] is not implemented' % padding_type)
conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias), norm_layer(dim)]
raise NotImplementedError('padding [%s] is not implemented' %
padding_type)
conv_block += [
nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias),
norm_layer(dim)
]
return nn.Sequential(*conv_block)
......
import functools
import paddle
import paddle.nn as nn
import functools
from ...modules.nn import ReflectionPad2d, LeakyReLU, Tanh, Dropout
from ...modules.norm import build_norm_layer
from .builder import GENERATORS
@GENERATORS.register()
class UnetGenerator(paddle.fluid.dygraph.Layer):
class UnetGenerator(nn.Layer):
"""Create a Unet-based generator"""
def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_type='batch', use_dropout=False):
def __init__(self,
input_nc,
output_nc,
num_downs,
ngf=64,
norm_type='batch',
use_dropout=False):
"""Construct a Unet generator
Args:
input_nc (int) -- the number of channels in input images
......@@ -27,36 +31,64 @@ class UnetGenerator(paddle.fluid.dygraph.Layer):
super(UnetGenerator, self).__init__()
norm_layer = build_norm_layer(norm_type)
# construct unet structure
unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) # add the innermost layer
for i in range(num_downs - 5): # add intermediate layers with ngf * 8 filters
unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout)
unet_block = UnetSkipConnectionBlock(
ngf * 8,
ngf * 8,
input_nc=None,
submodule=None,
norm_layer=norm_layer,
innermost=True) # add the innermost layer
for i in range(num_downs -
5): # add intermediate layers with ngf * 8 filters
unet_block = UnetSkipConnectionBlock(ngf * 8,
ngf * 8,
input_nc=None,
submodule=unet_block,
norm_layer=norm_layer,
use_dropout=use_dropout)
# gradually reduce the number of filters from ngf * 8 to ngf
unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) # add the outermost layer
unet_block = UnetSkipConnectionBlock(ngf * 4,
ngf * 8,
input_nc=None,
submodule=unet_block,
norm_layer=norm_layer)
unet_block = UnetSkipConnectionBlock(ngf * 2,
ngf * 4,
input_nc=None,
submodule=unet_block,
norm_layer=norm_layer)
unet_block = UnetSkipConnectionBlock(ngf,
ngf * 2,
input_nc=None,
submodule=unet_block,
norm_layer=norm_layer)
self.model = UnetSkipConnectionBlock(
output_nc,
ngf,
input_nc=input_nc,
submodule=unet_block,
outermost=True,
norm_layer=norm_layer) # add the outermost layer
def forward(self, input):
"""Standard forward"""
# tmp = self.model._sub_layers['model'][0](input)
# tmp1 = self.model._sub_layers['model'][1](tmp)
# tmp2 = self.model._sub_layers['model'][2](tmp1)
# import pickle
# pickle.dump(tmp2.numpy(), open('/workspace/notebook/align_pix2pix/tmp2-pd.pkl', 'wb'))
# tmp3 = self.model._sub_layers['model'][3](tmp2)
# pickle.dump(tmp3.numpy(), open('/workspace/notebook/align_pix2pix/tmp3-pd.pkl', 'wb'))
# tmp4 = self.model._sub_layers['model'][4](tmp3)
return self.model(input)
class UnetSkipConnectionBlock(paddle.fluid.dygraph.Layer):
class UnetSkipConnectionBlock(nn.Layer):
"""Defines the Unet submodule with skip connection.
X -------------------identity----------------------
|-- downsampling -- |submodule| -- upsampling --|
"""
def __init__(self, outer_nc, inner_nc, input_nc=None,
submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm, use_dropout=False):
def __init__(self,
outer_nc,
inner_nc,
input_nc=None,
submodule=None,
outermost=False,
innermost=False,
norm_layer=nn.BatchNorm,
use_dropout=False):
"""Construct a Unet submodule with skip connections.
Parameters:
......@@ -77,36 +109,48 @@ class UnetSkipConnectionBlock(paddle.fluid.dygraph.Layer):
use_bias = norm_layer == nn.InstanceNorm
if input_nc is None:
input_nc = outer_nc
downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4,
stride=2, padding=1, bias_attr=use_bias)
downrelu = LeakyReLU(0.2, True)
downconv = nn.Conv2d(input_nc,
inner_nc,
kernel_size=4,
stride=2,
padding=1,
bias_attr=use_bias)
downrelu = nn.LeakyReLU(0.2)
downnorm = norm_layer(inner_nc)
uprelu = nn.ReLU(True)
uprelu = nn.ReLU()
upnorm = norm_layer(outer_nc)
if outermost:
upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
kernel_size=4, stride=2,
upconv = nn.ConvTranspose2d(inner_nc * 2,
outer_nc,
kernel_size=4,
stride=2,
padding=1)
down = [downconv]
up = [uprelu, upconv, Tanh()]
up = [uprelu, upconv, nn.Tanh()]
model = down + [submodule] + up
elif innermost:
upconv = nn.ConvTranspose2d(inner_nc, outer_nc,
kernel_size=4, stride=2,
padding=1, bias_attr=use_bias)
upconv = nn.ConvTranspose2d(inner_nc,
outer_nc,
kernel_size=4,
stride=2,
padding=1,
bias_attr=use_bias)
down = [downrelu, downconv]
up = [uprelu, upconv, upnorm]
model = down + up
else:
upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
kernel_size=4, stride=2,
padding=1, bias_attr=use_bias)
upconv = nn.ConvTranspose2d(inner_nc * 2,
outer_nc,
kernel_size=4,
stride=2,
padding=1,
bias_attr=use_bias)
down = [downrelu, downconv, downnorm]
up = [uprelu, upconv, upnorm]
if use_dropout:
model = down + [submodule] + up + [Dropout(0.5)]
model = down + [submodule] + up + [nn.Dropout(0.5)]
else:
model = down + [submodule] + up
......@@ -115,5 +159,5 @@ class UnetSkipConnectionBlock(paddle.fluid.dygraph.Layer):
def forward(self, x):
if self.outermost:
return self.model(x)
else: # add skip connections
else: # add skip connections
return paddle.concat([x, self.model(x)], 1)
import paddle
import paddle.nn as nn
import numpy as np
from ..modules.nn import BCEWithLogitsLoss
import paddle
import paddle.nn as nn
class GANLoss(paddle.fluid.dygraph.Layer):
class GANLoss(nn.Layer):
"""Define different GAN objectives.
The GANLoss class abstracts away the need to create the target label tensor
that has the same size as the input.
"""
def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0):
""" Initialize the GANLoss class.
......@@ -31,7 +29,7 @@ class GANLoss(paddle.fluid.dygraph.Layer):
if gan_mode == 'lsgan':
self.loss = nn.MSELoss()
elif gan_mode == 'vanilla':
self.loss = BCEWithLogitsLoss()
self.loss = nn.BCEWithLogitsLoss()
elif gan_mode in ['wgangp']:
self.loss = None
else:
......@@ -50,11 +48,17 @@ class GANLoss(paddle.fluid.dygraph.Layer):
if target_is_real:
if not hasattr(self, 'target_real_tensor'):
self.target_real_tensor = paddle.fill_constant(shape=paddle.shape(prediction), value=self.target_real_label, dtype='float32')
self.target_real_tensor = paddle.fill_constant(
shape=paddle.shape(prediction),
value=self.target_real_label,
dtype='float32')
target_tensor = self.target_real_tensor
else:
if not hasattr(self, 'target_fake_tensor'):
self.target_fake_tensor = paddle.fill_constant(shape=paddle.shape(prediction), value=self.target_fake_label, dtype='float32')
self.target_fake_tensor = paddle.fill_constant(
shape=paddle.shape(prediction),
value=self.target_fake_label,
dtype='float32')
target_tensor = self.target_fake_tensor
# target_tensor.stop_gradient = True
......@@ -78,4 +82,4 @@ class GANLoss(paddle.fluid.dygraph.Layer):
loss = -prediction.mean()
else:
loss = prediction.mean()
return loss
\ No newline at end of file
return loss
......@@ -8,6 +8,7 @@ from .discriminators.builder import build_discriminator
from .losses import GANLoss
from ..solver import build_optimizer
from ..modules.init import init_weights
from ..utils.image_pool import ImagePool
......@@ -42,12 +43,15 @@ class Pix2PixModel(BaseModel):
# define networks (both generator and discriminator)
self.netG = build_generator(opt.model.generator)
init_weights(self.netG)
# define a discriminator; conditional GANs need to take both input and output images; Therefore, #channels for D is input_nc + output_nc
if self.isTrain:
self.netD = build_discriminator(opt.model.discriminator)
init_weights(self.netD)
if self.isTrain:
self.losses = {}
# define loss functions
self.criterionGAN = GANLoss(opt.model.gan_mode)
self.criterionL1 = paddle.nn.L1Loss()
......@@ -79,6 +83,7 @@ class Pix2PixModel(BaseModel):
AtoB = self.opt.dataset.train.direction == 'AtoB'
self.real_A = paddle.to_variable(input['A' if AtoB else 'B'])
self.real_B = paddle.to_variable(input['B' if AtoB else 'A'])
self.image_paths = input['A_paths' if AtoB else 'B_paths']
def forward(self):
......@@ -118,6 +123,7 @@ class Pix2PixModel(BaseModel):
# Second, G(A) = B
self.loss_G_L1 = self.criterionL1(self.fake_B,
self.real_B) * self.opt.lambda_L1
# combine loss and calculate gradients
self.loss_G = self.loss_G_GAN + self.loss_G_L1
......
import math
import numpy as np
import paddle
def _calculate_fan_in_and_fan_out(tensor):
dimensions = len(tensor.shape)
if dimensions < 2:
raise ValueError(
"Fan in and fan out can not be computed for tensor with fewer than 2 dimensions"
)
num_input_fmaps = tensor.shape[1]
num_output_fmaps = tensor.shape[0]
receptive_field_size = 1
if len(tensor.shape) > 2:
receptive_field_size = paddle.numel(tensor[0][0])
fan_in = num_input_fmaps * receptive_field_size
fan_out = num_output_fmaps * receptive_field_size
return fan_in, fan_out
def _calculate_correct_fan(tensor, mode):
mode = mode.lower()
valid_modes = ['fan_in', 'fan_out']
if mode not in valid_modes:
raise ValueError("Mode {} not supported, please use one of {}".format(
mode, valid_modes))
fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
return fan_in if mode == 'fan_in' else fan_out
def calculate_gain(nonlinearity, param=None):
"""Return the recommended gain value for the given nonlinearity function.
The values are as follows:
================= ====================================================
nonlinearity gain
================= ====================================================
Linear / Identity :math:`1`
Conv{1,2,3}D :math:`1`
Sigmoid :math:`1`
Tanh :math:`\frac{5}{3}`
ReLU :math:`\sqrt{2}`
Leaky Relu :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
================= ====================================================
Args:
nonlinearity: the non-linear function (`nn.functional` name)
param: optional parameter for the non-linear function
"""
linear_fns = [
'linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d',
'conv_transpose2d', 'conv_transpose3d'
]
if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
return 1
elif nonlinearity == 'tanh':
return 5.0 / 3
elif nonlinearity == 'relu':
return math.sqrt(2.0)
elif nonlinearity == 'leaky_relu':
if param is None:
negative_slope = 0.01
elif not isinstance(param, bool) and isinstance(
param, int) or isinstance(param, float):
# True/False are instances of int, hence check above
negative_slope = param
else:
raise ValueError(
"negative_slope {} not a valid number".format(param))
return math.sqrt(2.0 / (1 + negative_slope**2))
else:
raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
@paddle.no_grad()
def constant_(x, value):
temp_value = paddle.fill_constant(x.shape, x.dtype, value)
x.set_value(temp_value)
return x
@paddle.no_grad()
def normal_(x, mean=0., std=1.):
temp_value = paddle.normal(mean, std, shape=x.shape)
x.set_value(temp_value)
return x
@paddle.no_grad()
def uniform_(x, a=-1., b=1.):
temp_value = paddle.uniform(min=a, max=b, shape=x.shape)
x.set_value(temp_value)
return x
@paddle.no_grad()
def xavier_uniform_(x, gain=1.):
"""Fills the input `Tensor` with values according to the method
described in `Understanding the difficulty of training deep feedforward
neural networks` - Glorot, X. & Bengio, Y. (2010), using a uniform
distribution. The resulting tensor will have values sampled from
:math:`\mathcal{U}(-a, a)` where
.. math::
a = \text{gain} \times \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}}
Also known as Glorot initialization.
Args:
x: an n-dimensional `paddle.Tensor`
gain: an optional scaling factor
"""
fan_in, fan_out = _calculate_fan_in_and_fan_out(x)
std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
a = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation
return uniform_(x, -a, a)
@paddle.no_grad()
def xavier_normal_(x, gain=1.):
"""Fills the input `Tensor` with values according to the method
described in `Understanding the difficulty of training deep feedforward
neural networks` - Glorot, X. & Bengio, Y. (2010), using a normal
distribution. The resulting tensor will have values sampled from
:math:`\mathcal{N}(0, \text{std}^2)` where
.. math::
\text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}
Also known as Glorot initialization.
Args:
tensor: an n-dimensional `paddle.Tensor`
gain: an optional scaling factor
"""
fan_in, fan_out = _calculate_fan_in_and_fan_out(x)
std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
return normal_(x, 0., std)
@paddle.no_grad()
def kaiming_uniform_(x, a=0, mode='fan_in', nonlinearity='leaky_relu'):
"""Fills the input `Tensor` with values according to the method
described in `Delving deep into rectifiers: Surpassing human-level
performance on ImageNet classification` - He, K. et al. (2015), using a
uniform distribution. The resulting tensor will have values sampled from
:math:`\mathcal{U}(-\text{bound}, \text{bound})` where
.. math::
\text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}
Also known as He initialization.
Args:
x: an n-dimensional `paddle.Tensor`
a: the negative slope of the rectifier used after this layer (only
used with ``'leaky_relu'``)
mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
preserves the magnitude of the variance of the weights in the
forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
backwards pass.
nonlinearity: the non-linear function (`nn.functional` name),
recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
"""
fan = _calculate_correct_fan(x, mode)
gain = calculate_gain(nonlinearity, a)
std = gain / math.sqrt(fan)
bound = math.sqrt(
3.0) * std # Calculate uniform bounds from standard deviation
temp_value = paddle.uniform(x.shape, min=-bound, max=bound)
x.set_value(temp_value)
return x
@paddle.no_grad()
def kaiming_normal_(x, a=0, mode='fan_in', nonlinearity='leaky_relu'):
"""Fills the input `Tensor` with values according to the method
described in `Delving deep into rectifiers: Surpassing human-level
performance on ImageNet classification` - He, K. et al. (2015), using a
normal distribution. The resulting tensor will have values sampled from
:math:`\mathcal{N}(0, \text{std}^2)` where
.. math::
\text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}
Also known as He initialization.
Args:
x: an n-dimensional `paddle.Tensor`
a: the negative slope of the rectifier used after this layer (only
used with ``'leaky_relu'``)
mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
preserves the magnitude of the variance of the weights in the
forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
backwards pass.
nonlinearity: the non-linear function (`nn.functional` name),
recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
"""
fan = _calculate_correct_fan(x, mode)
gain = calculate_gain(nonlinearity, a)
std = gain / math.sqrt(fan)
temp_value = paddle.normal(0, std, shape=x.shape)
x.set_value(temp_value)
return x
def constant_init(layer, val, bias=0):
if hasattr(layer, 'weight') and layer.weight is not None:
constant_(layer.weight, val)
if hasattr(layer, 'bias') and layer.bias is not None:
constant_(layer.bias, bias)
def xavier_init(layer, gain=1, bias=0, distribution='normal'):
assert distribution in ['uniform', 'normal']
if distribution == 'uniform':
xavier_uniform_(layer.weight, gain=gain)
else:
xavier_normal_(layer.weight, gain=gain)
if hasattr(layer, 'bias') and layer.bias is not None:
constant_(layer.bias, bias)
def normal_init(layer, mean=0, std=1, bias=0):
normal_(layer.weight, mean, std)
if hasattr(layer, 'bias') and layer.bias is not None:
constant_(layer.bias, bias)
def uniform_init(layer, a=0, b=1, bias=0):
uniform_(layer.weight, a, b)
if hasattr(layer, 'bias') and layer.bias is not None:
constant_(layer.bias, bias)
def kaiming_init(layer,
a=0,
mode='fan_out',
nonlinearity='relu',
bias=0,
distribution='normal'):
assert distribution in ['uniform', 'normal']
if distribution == 'uniform':
kaiming_uniform_(layer.weight,
a=a,
mode=mode,
nonlinearity=nonlinearity)
else:
kaiming_normal_(layer.weight, a=a, mode=mode, nonlinearity=nonlinearity)
if hasattr(layer, 'bias') and layer.bias is not None:
constant_(layer.bias, bias)
def init_weights(net, init_type='normal', init_gain=0.02):
"""Initialize network weights.
Args:
net (nn.Layer): network to be initialized
init_type (str): the name of an initialization method: normal | xavier | kaiming | orthogonal
init_gain (float): scaling factor for normal, xavier and orthogonal.
We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might
work better for some applications. Feel free to try yourself.
"""
def init_func(m): # define the initialization function
classname = m.__class__.__name__
if hasattr(m, 'weight') and (classname.find('Conv') != -1
or classname.find('Linear') != -1):
if init_type == 'normal':
normal_(m.weight, 0.0, init_gain)
elif init_type == 'xavier':
xavier_normal_(m.weight, gain=init_gain)
elif init_type == 'kaiming':
kaiming_normal_(m.weight, a=0, mode='fan_in')
else:
raise NotImplementedError(
'initialization method [%s] is not implemented' % init_type)
if hasattr(m, 'bias') and m.bias is not None:
constant_(m.bias, 0.0)
elif classname.find(
'BatchNorm'
) != -1: # BatchNorm Layer's weight is not a matrix; only normal distribution applies.
normal_(m.weight, 1.0, init_gain)
constant_(m.bias, 0.0)
print('initialize network with %s' % init_type)
net.apply(init_func) # apply the initialization function <init_func>
import paddle
import paddle.nn as nn
from paddle.fluid.dygraph import Layer
from paddle import fluid
class MSELoss():
def __init__(self):
pass
def __call__(self, prediction, label):
return fluid.layers.mse_loss(prediction, label)
class L1Loss():
def __init__(self):
pass
def __call__(self, prediction, label):
return fluid.layers.reduce_mean(fluid.layers.elementwise_sub(prediction, label, act='abs'))
class ReflectionPad2d(Layer):
def __init__(self, size):
super(ReflectionPad2d, self).__init__()
self.size = size
def forward(self, x):
return fluid.layers.pad2d(x, [self.size] * 4, mode="reflect")
class LeakyReLU(Layer):
def __init__(self, alpha, inplace=False):
super(LeakyReLU, self).__init__()
self.alpha = alpha
def forward(self, x):
return fluid.layers.leaky_relu(x, self.alpha)
class Tanh(Layer):
def __init__(self):
super(Tanh, self).__init__()
def forward(self, x):
return fluid.layers.tanh(x)
class Dropout(Layer):
def __init__(self, prob, mode='upscale_in_train'):
super(Dropout, self).__init__()
self.prob = prob
self.mode = mode
def forward(self, x):
return fluid.layers.dropout(x, self.prob, dropout_implementation=self.mode)
class BCEWithLogitsLoss():
def __init__(self, weight=None, reduction='mean'):
self.weight = weight
self.reduction = 'mean'
def __call__(self, x, label):
out = paddle.fluid.layers.sigmoid_cross_entropy_with_logits(x, label)
if self.reduction == 'sum':
return fluid.layers.reduce_sum(out)
elif self.reduction == 'mean':
return fluid.layers.reduce_mean(out)
else:
return out
class _SpectralNorm(paddle.nn.SpectralNorm):
class _SpectralNorm(nn.SpectralNorm):
def __init__(self,
weight_shape,
dim=0,
power_iters=1,
eps=1e-12,
dtype='float32'):
super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps, dtype)
super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps,
dtype)
def forward(self, weight):
paddle.fluid.data_feeder.check_variable_and_dtype(weight, "weight", ['float32', 'float64'],
'SpectralNorm')
inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v}
out = self._helper.create_variable_for_type_inference(self._dtype)
_power_iters = self._power_iters if self.training else 0
self._helper.append_op(
type="spectral_norm",
inputs=inputs,
outputs={"Out": out, },
attrs={
"dim": self._dim,
"power_iters": _power_iters,
"eps": self._eps,
})
self._helper.append_op(type="spectral_norm",
inputs=inputs,
outputs={
"Out": out,
},
attrs={
"dim": self._dim,
"power_iters": _power_iters,
"eps": self._eps,
})
return out
class Spectralnorm(paddle.nn.Layer):
def __init__(self,
layer,
dim=0,
power_iters=1,
eps=1e-12,
dtype='float32'):
def __init__(self, layer, dim=0, power_iters=1, eps=1e-12, dtype='float32'):
super(Spectralnorm, self).__init__()
self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters, eps, dtype)
self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters,
eps, dtype)
self.dim = dim
self.power_iters = power_iters
self.eps = eps
self.layer = layer
weight = layer._parameters['weight']
del layer._parameters['weight']
self.weight_orig = self.create_parameter(weight.shape, dtype=weight.dtype)
self.weight_orig = self.create_parameter(weight.shape,
dtype=weight.dtype)
self.weight_orig.set_value(weight)
def forward(self, x):
weight = self.spectral_norm(self.weight_orig)
self.layer.weight = weight
out = self.layer(x)
return out
def initial_type(
input,
op_type,
fan_out,
init="normal",
use_bias=False,
filter_size=0,
stddev=0.02,
name=None):
if init == "kaiming":
if op_type == 'conv':
fan_in = input.shape[1] * filter_size * filter_size
elif op_type == 'deconv':
fan_in = fan_out * filter_size * filter_size
else:
if len(input.shape) > 2:
fan_in = input.shape[1] * input.shape[2] * input.shape[3]
else:
fan_in = input.shape[1]
bound = 1 / math.sqrt(fan_in)
param_attr = fluid.ParamAttr(
# name=name + "_w",
initializer=fluid.initializer.Uniform(
low=-bound, high=bound))
if use_bias == True:
bias_attr = fluid.ParamAttr(
# name=name + '_b',
initializer=fluid.initializer.Uniform(
low=-bound, high=bound))
else:
bias_attr = False
else:
param_attr = fluid.ParamAttr(
# name=name + "_w",
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=stddev))
if use_bias == True:
bias_attr = fluid.ParamAttr(
# name=name + "_b",
initializer=fluid.initializer.Constant(0.0))
else:
bias_attr = False
return param_attr, bias_attr
class Pad2D(fluid.dygraph.Layer):
def __init__(self, paddings, mode, pad_value=0.0):
super(Pad2D, self).__init__()
self.paddings = paddings
self.mode = mode
def forward(self, x):
return fluid.layers.pad2d(x, self.paddings, self.mode)
\ No newline at end of file
......@@ -3,7 +3,7 @@ import functools
import paddle.nn as nn
class Identity(paddle.fluid.dygraph.Layer):
class Identity(nn.Layer):
def forward(self, x):
return x
......@@ -18,11 +18,28 @@ def build_norm_layer(norm_type='instance'):
For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics.
"""
if norm_type == 'batch':
norm_layer = functools.partial(nn.BatchNorm, param_attr=paddle.ParamAttr(initializer=paddle.fluid.initializer.NormalInitializer(1.0, 0.02)), bias_attr=paddle.ParamAttr(initializer=paddle.fluid.initializer.Constant(0.0)), trainable_statistics=True)
norm_layer = functools.partial(
nn.BatchNorm,
param_attr=paddle.ParamAttr(
initializer=nn.initializer.Normal(1.0, 0.02)),
bias_attr=paddle.ParamAttr(
initializer=nn.initializer.Constant(0.0)),
trainable_statistics=True)
elif norm_type == 'instance':
norm_layer = functools.partial(nn.InstanceNorm, param_attr=paddle.ParamAttr(initializer=paddle.fluid.initializer.Constant(1.0), learning_rate=0.0, trainable=False), bias_attr=paddle.ParamAttr(initializer=paddle.fluid.initializer.Constant(0.0), learning_rate=0.0, trainable=False))
norm_layer = functools.partial(
nn.InstanceNorm,
param_attr=paddle.ParamAttr(
initializer=nn.initializer.Constant(1.0),
learning_rate=0.0,
trainable=False),
bias_attr=paddle.ParamAttr(initializer=nn.initializer.Constant(0.0),
learning_rate=0.0,
trainable=False))
elif norm_type == 'none':
def norm_layer(x): return Identity()
def norm_layer(x):
return Identity()
else:
raise NotImplementedError('normalization layer [%s] is not found' % norm_type)
return norm_layer
\ No newline at end of file
raise NotImplementedError('normalization layer [%s] is not found' %
norm_type)
return norm_layer
......@@ -19,6 +19,6 @@ def setup(args, cfg):
logger.info('Configs: {}'.format(cfg))
place = paddle.fluid.CUDAPlace(ParallelEnv().dev_id) \
if ParallelEnv().nranks > 1 else paddle.fluid.CUDAPlace(0)
place = paddle.CUDAPlace(ParallelEnv().dev_id) \
if ParallelEnv().nranks > 1 else paddle.CUDAPlace(0)
paddle.disable_static(place)
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
class TimeAverager(object):
def __init__(self):
self.reset()
def reset(self):
self._cnt = 0
self._total_time = 0
def record(self, usetime):
self._cnt += 1
self._total_time += usetime
def get_average(self):
if self._cnt == 0:
return 0
return self._total_time / self._cnt
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册