提交 c1bd1a2a 编写于 作者: L LielinJiang

fix conflict

...@@ -90,4 +90,7 @@ parser.add_argument('--use_cuda', ...@@ -90,4 +90,7 @@ parser.add_argument('--use_cuda',
type=bool, type=bool,
help='use cuda or not') help='use cuda or not')
parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not') parser.add_argument('--use_cudnn', default=1, type=int, help='use cudnn or not')
parser.add_argument('--remove_duplicates',
default=True,
type=bool,
help='remove duplicate frames or not')
...@@ -80,7 +80,8 @@ class VideoFrameInterp(object): ...@@ -80,7 +80,8 @@ class VideoFrameInterp(object):
video_path, video_path,
use_gpu=True, use_gpu=True,
key_frame_thread=0., key_frame_thread=0.,
output_path='output'): output_path='output',
remove_duplicates=True):
self.video_path = video_path self.video_path = video_path
self.output_path = os.path.join(output_path, 'DAIN') self.output_path = os.path.join(output_path, 'DAIN')
if model_path is None: if model_path is None:
...@@ -138,6 +139,8 @@ class VideoFrameInterp(object): ...@@ -138,6 +139,8 @@ class VideoFrameInterp(object):
end = time.time() end = time.time()
frames = sorted(glob.glob(os.path.join(out_path, '*.png'))) frames = sorted(glob.glob(os.path.join(out_path, '*.png')))
if remove_duplicates:
frames = remove_duplicates(out_path)
img = imread(frames[0]) img = imread(frames[0])
...@@ -199,58 +202,51 @@ class VideoFrameInterp(object): ...@@ -199,58 +202,51 @@ class VideoFrameInterp(object):
X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255 X0 = img_first.astype('float32').transpose((2, 0, 1)) / 255
X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255 X1 = img_second.astype('float32').transpose((2, 0, 1)) / 255
if key_frame: assert (X0.shape[1] == X1.shape[1])
y_ = [ assert (X0.shape[2] == X1.shape[2])
np.transpose(255.0 * X0.clip(0, 1.0), (1, 2, 0))
for i in range(num_frames) X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \
] (padding_left, padding_right)), mode='edge')
else: X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \
assert (X0.shape[1] == X1.shape[1]) (padding_left, padding_right)), mode='edge')
assert (X0.shape[2] == X1.shape[2])
X0 = np.expand_dims(X0, axis=0)
X0 = np.pad(X0, ((0,0), (padding_top, padding_bottom), \ X1 = np.expand_dims(X1, axis=0)
(padding_left, padding_right)), mode='edge')
X1 = np.pad(X1, ((0,0), (padding_top, padding_bottom), \ X0 = np.expand_dims(X0, axis=0)
(padding_left, padding_right)), mode='edge') X1 = np.expand_dims(X1, axis=0)
X0 = np.expand_dims(X0, axis=0) X = np.concatenate((X0, X1), axis=0)
X1 = np.expand_dims(X1, axis=0)
proc_end = time.time()
X0 = np.expand_dims(X0, axis=0) o = self.exe.run(self.program,
X1 = np.expand_dims(X1, axis=0) fetch_list=self.fetch_targets,
feed={"image": X})
X = np.concatenate((X0, X1), axis=0)
y_ = o[0]
proc_end = time.time()
o = self.exe.run(self.program, proc_timer.update(time.time() - proc_end)
fetch_list=self.fetch_targets, tot_timer.update(time.time() - end)
feed={"image": X}) end = time.time()
y_ = o[0] y_ = [
np.transpose(
proc_timer.update(time.time() - proc_end) 255.0 * item.clip(
tot_timer.update(time.time() - end) 0, 1.0)[0, :, padding_top:padding_top + int_height,
end = time.time() padding_left:padding_left + int_width],
(1, 2, 0)) for item in y_
y_ = [ ]
np.transpose( time_offsets = [
255.0 * item.clip( kk * timestep for kk in range(1, 1 + num_frames, 1)
0, 1.0)[0, :, ]
padding_top:padding_top + int_height,
padding_left:padding_left + int_width], count = 1
(1, 2, 0)) for item in y_ for item, time_offset in zip(y_, time_offsets):
] out_dir = os.path.join(
time_offsets = [ frame_path_interpolated, vidname,
kk * timestep for kk in range(1, 1 + num_frames, 1) "{:0>6d}_{:0>4d}.png".format(i, count))
] count = count + 1
imsave(out_dir, np.round(item).astype(np.uint8))
count = 1
for item, time_offset in zip(y_, time_offsets):
out_dir = os.path.join(
frame_path_interpolated, vidname,
"{:0>6d}_{:0>4d}.png".format(i, count))
count = count + 1
imsave(out_dir, np.round(item).astype(np.uint8))
num_frames = int(1.0 / timestep) - 1 num_frames = int(1.0 / timestep) - 1
...@@ -266,14 +262,16 @@ class VideoFrameInterp(object): ...@@ -266,14 +262,16 @@ class VideoFrameInterp(object):
vidname + '.mp4') vidname + '.mp4')
if os.path.exists(video_pattern_output): if os.path.exists(video_pattern_output):
os.remove(video_pattern_output) os.remove(video_pattern_output)
frames2video(frame_pattern_combined, video_pattern_output, frames2video(frame_pattern_combined, video_pattern_output, r2)
r2)
return frame_pattern_combined, video_pattern_output return frame_pattern_combined, video_pattern_output
if __name__ == '__main__': if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
predictor = VideoFrameInterp(args.time_step, args.saved_model, predictor = VideoFrameInterp(args.time_step,
args.video_path, args.output_path) args.saved_model,
args.video_path,
args.output_path,
remove_duplicates=args.remove_duplicates)
predictor.run() predictor.run()
import os, sys import os, sys
import glob import glob
import shutil import shutil
import cv2
class AverageMeter(object): class AverageMeter(object):
...@@ -44,3 +45,34 @@ def combine_frames(input, interpolated, combined, num_frames): ...@@ -44,3 +45,34 @@ def combine_frames(input, interpolated, combined, num_frames):
except Exception as e: except Exception as e:
print(e) print(e)
print(len(frames2), num_frames, i, k, i * num_frames + k) print(len(frames2), num_frames, i, k, i * num_frames + k)
def remove_duplicates(paths):
def dhash(image, hash_size=8):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
resized = cv2.resize(gray, (hash_size + 1, hash_size))
diff = resized[:, 1:] > resized[:, :-1]
return sum([2**i for (i, v) in enumerate(diff.flatten()) if v])
hashes = {}
image_paths = sorted(glob.glob(os.path.join(paths, '*.png')))
for image_path in image_paths:
image = cv2.imread(image_path)
h = dhash(image)
p = hashes.get(h, [])
p.append(image_path)
hashes[h] = p
for (h, hashed_paths) in hashes.items():
if len(hashed_paths) > 1:
for p in hashed_paths[1:]:
os.remove(p)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
for fid, frame in enumerate(frames):
new_name = '{:08d}'.format(fid) + '.png'
new_name = os.path.join(paths, new_name)
os.rename(frame, new_name)
frames = sorted(glob.glob(os.path.join(paths, '*.png')))
return frames
...@@ -56,8 +56,8 @@ class DictDataLoader(): ...@@ -56,8 +56,8 @@ class DictDataLoader():
self.dataset = DictDataset(dataset) self.dataset = DictDataset(dataset)
place = paddle.fluid.CUDAPlace(ParallelEnv().dev_id) \ place = paddle.CUDAPlace(ParallelEnv().dev_id) \
if ParallelEnv().nranks > 1 else paddle.fluid.CUDAPlace(0) if ParallelEnv().nranks > 1 else paddle.CUDAPlace(0)
sampler = DistributedBatchSampler(self.dataset, sampler = DistributedBatchSampler(self.dataset,
batch_size=batch_size, batch_size=batch_size,
......
...@@ -11,8 +11,10 @@ from ..datasets.builder import build_dataloader ...@@ -11,8 +11,10 @@ from ..datasets.builder import build_dataloader
from ..models.builder import build_model from ..models.builder import build_model
from ..utils.visual import tensor2img, save_image from ..utils.visual import tensor2img, save_image
from ..utils.filesystem import save, load, makedirs from ..utils.filesystem import save, load, makedirs
from ..utils.timer import TimeAverager
from ..metric.psnr_ssim import calculate_psnr, calculate_ssim from ..metric.psnr_ssim import calculate_psnr, calculate_ssim
class Trainer: class Trainer:
def __init__(self, cfg): def __init__(self, cfg):
...@@ -51,7 +53,6 @@ class Trainer: ...@@ -51,7 +53,6 @@ class Trainer:
self.time_count = {} self.time_count = {}
self.best_metric = {} self.best_metric = {}
def distributed_data_parallel(self): def distributed_data_parallel(self):
strategy = paddle.distributed.prepare_context() strategy = paddle.distributed.prepare_context()
for name in self.model.model_names: for name in self.model.model_names:
...@@ -61,29 +62,37 @@ class Trainer: ...@@ -61,29 +62,37 @@ class Trainer:
paddle.DataParallel(net, strategy)) paddle.DataParallel(net, strategy))
def train(self): def train(self):
reader_cost_averager = TimeAverager()
batch_cost_averager = TimeAverager()
for epoch in range(self.start_epoch, self.epochs): for epoch in range(self.start_epoch, self.epochs):
self.current_epoch = epoch self.current_epoch = epoch
start_time = step_start_time = time.time() start_time = step_start_time = time.time()
for i, data in enumerate(self.train_dataloader): for i, data in enumerate(self.train_dataloader):
data_time = time.time() reader_cost_averager.record(time.time() - step_start_time)
self.batch_id = i self.batch_id = i
# unpack data from dataset and apply preprocessing # unpack data from dataset and apply preprocessing
# data input should be dict # data input should be dict
self.model.set_input(data) self.model.set_input(data)
self.model.optimize_parameters() self.model.optimize_parameters()
self.data_time = data_time - step_start_time batch_cost_averager.record(time.time() - step_start_time)
self.step_time = time.time() - step_start_time
if i % self.log_interval == 0: if i % self.log_interval == 0:
self.data_time = reader_cost_averager.get_average()
self.step_time = batch_cost_averager.get_average()
self.print_log() self.print_log()
reader_cost_averager.reset()
batch_cost_averager.reset()
if i % self.visual_interval == 0: if i % self.visual_interval == 0:
self.visual('visual_train') self.visual('visual_train')
step_start_time = time.time() step_start_time = time.time()
self.logger.info('train one epoch time: {}'.format(time.time() -
start_time)) self.logger.info(
'train one epoch time: {}'.format(time.time() - start_time))
if self.validate_interval > -1 and epoch % self.validate_interval: if self.validate_interval > -1 and epoch % self.validate_interval:
self.validate() self.validate()
self.model.lr_scheduler.step() self.model.lr_scheduler.step()
...@@ -93,7 +102,8 @@ class Trainer: ...@@ -93,7 +102,8 @@ class Trainer:
def validate(self): def validate(self):
if not hasattr(self, 'val_dataloader'): if not hasattr(self, 'val_dataloader'):
self.val_dataloader = build_dataloader(self.cfg.dataset.val, is_train=False) self.val_dataloader = build_dataloader(
self.cfg.dataset.val, is_train=False)
metric_result = {} metric_result = {}
...@@ -106,7 +116,7 @@ class Trainer: ...@@ -106,7 +116,7 @@ class Trainer:
visual_results = {} visual_results = {}
current_paths = self.model.get_image_paths() current_paths = self.model.get_image_paths()
current_visuals = self.model.get_current_visuals() current_visuals = self.model.get_current_visuals()
for j in range(len(current_paths)): for j in range(len(current_paths)):
short_path = os.path.basename(current_paths[j]) short_path = os.path.basename(current_paths[j])
basename = os.path.splitext(short_path)[0] basename = os.path.splitext(short_path)[0]
...@@ -115,31 +125,43 @@ class Trainer: ...@@ -115,31 +125,43 @@ class Trainer:
visual_results.update({name: img_tensor[j]}) visual_results.update({name: img_tensor[j]})
if 'psnr' in self.cfg.validate.metrics: if 'psnr' in self.cfg.validate.metrics:
if 'psnr' not in metric_result: if 'psnr' not in metric_result:
metric_result['psnr'] = calculate_psnr(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.psnr) metric_result['psnr'] = calculate_psnr(
tensor2img(current_visuals['output'][j], (0., 1.)),
tensor2img(current_visuals['gt'][j], (0., 1.)),
**self.cfg.validate.metrics.psnr)
else: else:
metric_result['psnr'] += calculate_psnr(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.psnr) metric_result['psnr'] += calculate_psnr(
tensor2img(current_visuals['output'][j], (0., 1.)),
tensor2img(current_visuals['gt'][j], (0., 1.)),
**self.cfg.validate.metrics.psnr)
if 'ssim' in self.cfg.validate.metrics: if 'ssim' in self.cfg.validate.metrics:
if 'ssim' not in metric_result: if 'ssim' not in metric_result:
metric_result['ssim'] = calculate_ssim(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.ssim) metric_result['ssim'] = calculate_ssim(
tensor2img(current_visuals['output'][j], (0., 1.)),
tensor2img(current_visuals['gt'][j], (0., 1.)),
**self.cfg.validate.metrics.ssim)
else: else:
metric_result['ssim'] += calculate_ssim(tensor2img(current_visuals['output'][j], (0., 1.)), tensor2img(current_visuals['gt'][j], (0., 1.)), **self.cfg.validate.metrics.ssim) metric_result['ssim'] += calculate_ssim(
tensor2img(current_visuals['output'][j], (0., 1.)),
tensor2img(current_visuals['gt'][j], (0., 1.)),
**self.cfg.validate.metrics.ssim)
self.visual('visual_val', visual_results=visual_results) self.visual('visual_val', visual_results=visual_results)
if i % self.log_interval == 0: if i % self.log_interval == 0:
self.logger.info('val iter: [%d/%d]' % self.logger.info(
(i, len(self.val_dataloader))) 'val iter: [%d/%d]' % (i, len(self.val_dataloader)))
for metric_name in metric_result.keys(): for metric_name in metric_result.keys():
metric_result[metric_name] /= len(self.val_dataloader.dataset) metric_result[metric_name] /= len(self.val_dataloader.dataset)
self.logger.info('Epoch {} validate end: {}'.format(self.current_epoch, metric_result)) self.logger.info('Epoch {} validate end: {}'.format(
self.current_epoch, metric_result))
def test(self): def test(self):
if not hasattr(self, 'test_dataloader'): if not hasattr(self, 'test_dataloader'):
self.test_dataloader = build_dataloader(self.cfg.dataset.test, self.test_dataloader = build_dataloader(
is_train=False) self.cfg.dataset.test, is_train=False)
# data[0]: img, data[1]: img path index # data[0]: img, data[1]: img path index
# test batch size must be 1 # test batch size must be 1
...@@ -163,8 +185,8 @@ class Trainer: ...@@ -163,8 +185,8 @@ class Trainer:
self.visual('visual_test', visual_results=visual_results) self.visual('visual_test', visual_results=visual_results)
if i % self.log_interval == 0: if i % self.log_interval == 0:
self.logger.info('Test iter: [%d/%d]' % self.logger.info(
(i, len(self.test_dataloader))) 'Test iter: [%d/%d]' % (i, len(self.test_dataloader)))
def print_log(self): def print_log(self):
losses = self.model.get_current_losses() losses = self.model.get_current_losses()
...@@ -266,6 +288,7 @@ class Trainer: ...@@ -266,6 +288,7 @@ class Trainer:
for name in self.model.model_names: for name in self.model.model_names:
if isinstance(name, str): if isinstance(name, str):
self.logger.info('laod model {} {} params!'.format(self.cfg.model.name, 'net' + name)) self.logger.info('laod model {} {} params!'.format(
self.cfg.model.name, 'net' + name))
net = getattr(self.model, 'net' + name) net = getattr(self.model, 'net' + name)
net.set_dict(state_dicts['net' + name]) net.set_dict(state_dicts['net' + name])
...@@ -8,3 +8,12 @@ wget https://paddlegan.bj.bcebos.com/InceptionV3.pdparams ...@@ -8,3 +8,12 @@ wget https://paddlegan.bj.bcebos.com/InceptionV3.pdparams
``` ```
python test_fid_score.py --image_data_path1 /path/to/dataset1 --image_data_path2 /path/to/dataset2 --inference_model ./InceptionV3.pdparams python test_fid_score.py --image_data_path1 /path/to/dataset1 --image_data_path2 /path/to/dataset2 --inference_model ./InceptionV3.pdparams
``` ```
### Inception-V3 weights converted from torchvision
Download: https://aistudio.baidu.com/aistudio/datasetdetail/51890
This model weights file is converted from official torchvision inception-v3 model. And both BigGAN and StarGAN-v2 is using it to calculate FID score.
Note that this model weights is different from above one (which is converted from tensorflow unofficial version)
...@@ -16,15 +16,18 @@ import os ...@@ -16,15 +16,18 @@ import os
import fnmatch import fnmatch
import numpy as np import numpy as np
import cv2 import cv2
from PIL import Image
from cv2 import imread from cv2 import imread
from scipy import linalg from scipy import linalg
import paddle.fluid as fluid import paddle.fluid as fluid
from inception import InceptionV3 from inception import InceptionV3
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
try:
def tqdm(x): from tqdm import tqdm
return x except:
def tqdm(x):
return x
""" based on https://github.com/mit-han-lab/gan-compression/blob/master/metric/fid_score.py """ based on https://github.com/mit-han-lab/gan-compression/blob/master/metric/fid_score.py
...@@ -128,7 +131,7 @@ def calculate_fid_given_img(img_fake, ...@@ -128,7 +131,7 @@ def calculate_fid_given_img(img_fake,
return fid_value return fid_value
def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path): def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path, style=None):
if len(files) % batch_size != 0: if len(files) % batch_size != 0:
print(('Warning: number of images is not a multiple of the ' print(('Warning: number of images is not a multiple of the '
'batch size. Some samples are going to be ignored.')) 'batch size. Some samples are going to be ignored.'))
...@@ -144,8 +147,23 @@ def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path): ...@@ -144,8 +147,23 @@ def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path):
for i in tqdm(range(n_batches)): for i in tqdm(range(n_batches)):
start = i * batch_size start = i * batch_size
end = start + batch_size end = start + batch_size
images = np.array(
[imread(str(f)).astype(np.float32) for f in files[start:end]]) # same as stargan-v2 official implementation: resize to 256 first, then resize to 299
if style == 'stargan':
img_list = []
for f in files[start:end]:
im = Image.open(str(f)).convert('RGB')
if im.size[0] != 299:
im = im.resize((256, 256), 2)
im = im.resize((299, 299), 2)
img_list.append(np.array(im).astype('float32'))
images = np.array(
img_list)
else:
images = np.array(
[imread(str(f)).astype(np.float32) for f in files[start:end]])
if len(images.shape) != 4: if len(images.shape) != 4:
images = imread(str(files[start])) images = imread(str(files[start]))
...@@ -155,33 +173,53 @@ def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path): ...@@ -155,33 +173,53 @@ def _get_activations(files, model, batch_size, dims, use_gpu, premodel_path):
images = images.transpose((0, 3, 1, 2)) images = images.transpose((0, 3, 1, 2))
images /= 255 images /= 255
images = to_variable(images) # imagenet normalization
param_dict, _ = fluid.load_dygraph(premodel_path) if style == 'stargan':
model.set_dict(param_dict) mean = np.array([0.485, 0.456, 0.406]).astype('float32')
model.eval() std = np.array([0.229, 0.224, 0.225]).astype('float32')
images[:] = (images[:] - mean[:, None, None]) / std[:, None, None]
pred = model(images)[0][0].numpy() if style=='stargan':
pred_arr[start:end] = inception_infer(images, premodel_path)
else:
with fluid.dygraph.guard():
images = to_variable(images)
param_dict, _ = fluid.load_dygraph(premodel_path)
model.set_dict(param_dict)
model.eval()
pred_arr[start:end] = pred.reshape(end - start, -1) pred = model(images)[0][0].numpy()
pred_arr[start:end] = pred.reshape(end - start, -1)
return pred_arr return pred_arr
def inception_infer(x, model_path):
exe = fluid.Executor()
[inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(model_path, exe)
results = exe.run(inference_program,
feed={feed_target_names[0]: x},
fetch_list=fetch_targets)
return results[0]
def _calculate_activation_statistics(files, def _calculate_activation_statistics(files,
model, model,
premodel_path, premodel_path,
batch_size=50, batch_size=50,
dims=2048, dims=2048,
use_gpu=False): use_gpu=False,
style = None):
act = _get_activations(files, model, batch_size, dims, use_gpu, act = _get_activations(files, model, batch_size, dims, use_gpu,
premodel_path) premodel_path, style)
mu = np.mean(act, axis=0) mu = np.mean(act, axis=0)
sigma = np.cov(act, rowvar=False) sigma = np.cov(act, rowvar=False)
return mu, sigma return mu, sigma
def _compute_statistics_of_path(path, model, batch_size, dims, use_gpu, def _compute_statistics_of_path(path, model, batch_size, dims, use_gpu,
premodel_path): premodel_path, style=None):
if path.endswith('.npz'): if path.endswith('.npz'):
f = np.load(path) f = np.load(path)
m, s = f['mu'][:], f['sigma'][:] m, s = f['mu'][:], f['sigma'][:]
...@@ -193,7 +231,7 @@ def _compute_statistics_of_path(path, model, batch_size, dims, use_gpu, ...@@ -193,7 +231,7 @@ def _compute_statistics_of_path(path, model, batch_size, dims, use_gpu,
filenames, '*.jpg') or fnmatch.filter(filenames, '*.png'): filenames, '*.jpg') or fnmatch.filter(filenames, '*.png'):
files.append(os.path.join(root, filename)) files.append(os.path.join(root, filename))
m, s = _calculate_activation_statistics(files, model, premodel_path, m, s = _calculate_activation_statistics(files, model, premodel_path,
batch_size, dims, use_gpu) batch_size, dims, use_gpu, style)
return m, s return m, s
...@@ -202,7 +240,8 @@ def calculate_fid_given_paths(paths, ...@@ -202,7 +240,8 @@ def calculate_fid_given_paths(paths,
batch_size, batch_size,
use_gpu, use_gpu,
dims, dims,
model=None): model=None,
style = None):
assert os.path.exists( assert os.path.exists(
premodel_path premodel_path
), 'pretrain_model path {} is not exists! Please download it first'.format( ), 'pretrain_model path {} is not exists! Please download it first'.format(
...@@ -211,14 +250,15 @@ def calculate_fid_given_paths(paths, ...@@ -211,14 +250,15 @@ def calculate_fid_given_paths(paths,
if not os.path.exists(p): if not os.path.exists(p):
raise RuntimeError('Invalid path: %s' % p) raise RuntimeError('Invalid path: %s' % p)
if model is None: if model is None and style != 'stargan':
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims] with fluid.dygraph.guard():
model = InceptionV3([block_idx], class_dim=1008) block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
model = InceptionV3([block_idx], class_dim=1008)
m1, s1 = _compute_statistics_of_path(paths[0], model, batch_size, dims, m1, s1 = _compute_statistics_of_path(paths[0], model, batch_size, dims,
use_gpu, premodel_path) use_gpu, premodel_path, style)
m2, s2 = _compute_statistics_of_path(paths[1], model, batch_size, dims, m2, s2 = _compute_statistics_of_path(paths[1], model, batch_size, dims,
use_gpu, premodel_path) use_gpu, premodel_path, style)
fid_value = _calculate_frechet_distance(m1, s1, m2, s2) fid_value = _calculate_frechet_distance(m1, s1, m2, s2)
return fid_value return fid_value
...@@ -38,6 +38,9 @@ def parse_args(): ...@@ -38,6 +38,9 @@ def parse_args():
type=int, type=int,
default=1, default=1,
help='sample number in a batch for inference.') help='sample number in a batch for inference.')
parser.add_argument('--style',
type=str,
help='calculation style: stargan or default (gan-compression style)')
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -50,10 +53,9 @@ def main(): ...@@ -50,10 +53,9 @@ def main():
inference_model_path = args.inference_model inference_model_path = args.inference_model
batch_size = args.batch_size batch_size = args.batch_size
with fluid.dygraph.guard(): fid_value = calculate_fid_given_paths(paths, inference_model_path,
fid_value = calculate_fid_given_paths(paths, inference_model_path, batch_size, args.use_gpu, 2048, style=args.style)
batch_size, args.use_gpu, 2048) print('FID: ', fid_value)
print('FID: ', fid_value)
if __name__ == "__main__": if __name__ == "__main__":
......
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
__all__ = [
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'
'resnet152'] ]
def conv3x3(in_planes, out_planes, stride=1): def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding" "3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, return nn.Conv2d(in_planes,
padding=1, bias_attr=False) out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias_attr=False)
class BasicBlock(paddle.fluid.Layer): class BasicBlock(nn.Layer):
expansion = 1 expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None): def __init__(self, inplanes, planes, stride=1, downsample=None):
...@@ -44,17 +48,24 @@ class BasicBlock(paddle.fluid.Layer): ...@@ -44,17 +48,24 @@ class BasicBlock(paddle.fluid.Layer):
return out return out
class Bottleneck(paddle.fluid.Layer): class Bottleneck(nn.Layer):
expansion = 4 expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None): def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__() super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias_attr=False) self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias_attr=False)
self.bn1 = nn.BatchNorm(planes) self.bn1 = nn.BatchNorm(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, self.conv2 = nn.Conv2d(planes,
padding=1, bias_attr=False) planes,
kernel_size=3,
stride=stride,
padding=1,
bias_attr=False)
self.bn2 = nn.BatchNorm(planes) self.bn2 = nn.BatchNorm(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias_attr=False) self.conv3 = nn.Conv2d(planes,
planes * 4,
kernel_size=1,
bias_attr=False)
self.bn3 = nn.BatchNorm(planes * 4) self.bn3 = nn.BatchNorm(planes * 4)
self.relu = nn.ReLU() self.relu = nn.ReLU()
self.downsample = downsample self.downsample = downsample
...@@ -82,12 +93,15 @@ class Bottleneck(paddle.fluid.Layer): ...@@ -82,12 +93,15 @@ class Bottleneck(paddle.fluid.Layer):
return out return out
class ResNet(paddle.fluid.Layer): class ResNet(nn.Layer):
def __init__(self, block, layers, num_classes=1000): def __init__(self, block, layers, num_classes=1000):
self.inplanes = 64 self.inplanes = 64
super(ResNet, self).__init__() super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, self.conv1 = nn.Conv2d(3,
64,
kernel_size=7,
stride=2,
padding=3,
bias_attr=False) bias_attr=False)
self.bn1 = nn.BatchNorm(64) self.bn1 = nn.BatchNorm(64)
self.relu = nn.ReLU() self.relu = nn.ReLU()
...@@ -103,8 +117,11 @@ class ResNet(paddle.fluid.Layer): ...@@ -103,8 +117,11 @@ class ResNet(paddle.fluid.Layer):
downsample = None downsample = None
if stride != 1 or self.inplanes != planes * block.expansion: if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential( downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion, nn.Conv2d(self.inplanes,
kernel_size=1, stride=stride, bias_attr=False), planes * block.expansion,
kernel_size=1,
stride=stride,
bias_attr=False),
nn.BatchNorm(planes * block.expansion), nn.BatchNorm(planes * block.expansion),
) )
......
...@@ -8,6 +8,7 @@ from .discriminators.builder import build_discriminator ...@@ -8,6 +8,7 @@ from .discriminators.builder import build_discriminator
from .losses import GANLoss from .losses import GANLoss
from ..solver import build_optimizer from ..solver import build_optimizer
from ..modules.init import init_weights
from ..utils.image_pool import ImagePool from ..utils.image_pool import ImagePool
...@@ -56,10 +57,14 @@ class CycleGANModel(BaseModel): ...@@ -56,10 +57,14 @@ class CycleGANModel(BaseModel):
# Code (vs. paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X) # Code (vs. paper): G_A (G), G_B (F), D_A (D_Y), D_B (D_X)
self.netG_A = build_generator(opt.model.generator) self.netG_A = build_generator(opt.model.generator)
self.netG_B = build_generator(opt.model.generator) self.netG_B = build_generator(opt.model.generator)
init_weights(self.netG_A)
init_weights(self.netG_B)
if self.isTrain: # define discriminators if self.isTrain: # define discriminators
self.netD_A = build_discriminator(opt.model.discriminator) self.netD_A = build_discriminator(opt.model.discriminator)
self.netD_B = build_discriminator(opt.model.discriminator) self.netD_B = build_discriminator(opt.model.discriminator)
init_weights(self.netD_A)
init_weights(self.netD_B)
if self.isTrain: if self.isTrain:
if opt.lambda_identity > 0.0: # only works when input and output images have the same number of channels if opt.lambda_identity > 0.0: # only works when input and output images have the same number of channels
......
import paddle
import functools import functools
import numpy as np import numpy as np
import paddle.nn as nn
from ...modules.nn import ReflectionPad2d, LeakyReLU, Dropout, BCEWithLogitsLoss, Pad2D, MSELoss import paddle
import paddle.nn as nn
from ...modules.norm import build_norm_layer from ...modules.norm import build_norm_layer
from .builder import DISCRIMINATORS from .builder import DISCRIMINATORS
@DISCRIMINATORS.register() @DISCRIMINATORS.register()
class NLayerDiscriminator(paddle.fluid.dygraph.Layer): class NLayerDiscriminator(nn.Layer):
"""Defines a PatchGAN discriminator""" """Defines a PatchGAN discriminator"""
def __init__(self, input_nc, ndf=64, n_layers=3, norm_type='instance'): def __init__(self, input_nc, ndf=64, n_layers=3, norm_type='instance'):
"""Construct a PatchGAN discriminator """Construct a PatchGAN discriminator
...@@ -24,36 +22,51 @@ class NLayerDiscriminator(paddle.fluid.dygraph.Layer): ...@@ -24,36 +22,51 @@ class NLayerDiscriminator(paddle.fluid.dygraph.Layer):
""" """
super(NLayerDiscriminator, self).__init__() super(NLayerDiscriminator, self).__init__()
norm_layer = build_norm_layer(norm_type) norm_layer = build_norm_layer(norm_type)
if type(norm_layer) == functools.partial: if type(norm_layer) == functools.partial:
use_bias = norm_layer.func == nn.InstanceNorm use_bias = norm_layer.func == nn.InstanceNorm
else: else:
use_bias = norm_layer == nn.InstanceNorm use_bias = norm_layer == nn.InstanceNorm
kw = 4 kw = 4
padw = 1 padw = 1
sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), LeakyReLU(0.2, True)] sequence = [
nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw),
nn.LeakyReLU(0.2)
]
nf_mult = 1 nf_mult = 1
nf_mult_prev = 1 nf_mult_prev = 1
for n in range(1, n_layers): for n in range(1, n_layers):
nf_mult_prev = nf_mult nf_mult_prev = nf_mult
nf_mult = min(2 ** n, 8) nf_mult = min(2**n, 8)
sequence += [ sequence += [
nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias_attr=use_bias), nn.Conv2d(ndf * nf_mult_prev,
ndf * nf_mult,
kernel_size=kw,
stride=2,
padding=padw,
bias_attr=use_bias),
norm_layer(ndf * nf_mult), norm_layer(ndf * nf_mult),
LeakyReLU(0.2, True) nn.LeakyReLU(0.2)
] ]
nf_mult_prev = nf_mult nf_mult_prev = nf_mult
nf_mult = min(2 ** n_layers, 8) nf_mult = min(2**n_layers, 8)
sequence += [ sequence += [
nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias_attr=use_bias), nn.Conv2d(ndf * nf_mult_prev,
ndf * nf_mult,
kernel_size=kw,
stride=1,
padding=padw,
bias_attr=use_bias),
norm_layer(ndf * nf_mult), norm_layer(ndf * nf_mult),
LeakyReLU(0.2, True) nn.LeakyReLU(0.2)
] ]
sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)] sequence += [
nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)
]
self.model = nn.Sequential(*sequence) self.model = nn.Sequential(*sequence)
def forward(self, input): def forward(self, input):
"""Standard forward.""" """Standard forward."""
return self.model(input) return self.model(input)
\ No newline at end of file
...@@ -432,8 +432,7 @@ class SelfAttention(nn.Layer): ...@@ -432,8 +432,7 @@ class SelfAttention(nn.Layer):
self.key = conv1d(n_channels, n_channels // 8) self.key = conv1d(n_channels, n_channels // 8)
self.value = conv1d(n_channels, n_channels) self.value = conv1d(n_channels, n_channels)
self.gamma = self.create_parameter( self.gamma = self.create_parameter(
shape=[1], shape=[1], default_initializer=paddle.nn.initializer.Constant(
default_initializer=paddle.fluid.initializer.Constant(
0.0)) #nn.Parameter(tensor([0.])) 0.0)) #nn.Parameter(tensor([0.]))
def forward(self, x): def forward(self, x):
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import functools
from ...modules.norm import build_norm_layer
from .builder import GENERATORS
@GENERATORS.register()
class MobileResnetGenerator(nn.Layer):
def __init__(self,
input_channel,
output_nc,
ngf=64,
norm_type='instance',
use_dropout=False,
n_blocks=9,
padding_type='reflect'):
super(MobileResnetGenerator, self).__init__()
norm_layer = build_norm_layer(norm_type)
if type(norm_layer) == functools.partial:
use_bias = norm_layer.func == InstanceNorm
else:
use_bias = norm_layer == InstanceNorm
self.model = nn.LayerList([
nn.ReflectionPad2d([3, 3, 3, 3]),
nn.Conv2d(
input_channel,
int(ngf),
kernel_size=7,
padding=0,
bias_attr=use_bias), norm_layer(ngf), nn.ReLU()
])
n_downsampling = 2
for i in range(n_downsampling):
mult = 2**i
self.model.extend([
nn.Conv2d(
ngf * mult,
ngf * mult * 2,
kernel_size=3,
stride=2,
padding=1,
bias_attr=use_bias), norm_layer(ngf * mult * 2), nn.ReLU()
])
mult = 2**n_downsampling
for i in range(n_blocks):
self.model.extend([
MobileResnetBlock(
ngf * mult,
ngf * mult,
padding_type=padding_type,
norm_layer=norm_layer,
use_dropout=use_dropout,
use_bias=use_bias)
])
for i in range(n_downsampling):
mult = 2**(n_downsampling - i)
output_size = (i + 1) * 128
self.model.extend([
nn.ConvTranspose2d(
ngf * mult,
int(ngf * mult / 2),
kernel_size=3,
stride=2,
padding=1,
output_padding=1,
bias_attr=use_bias), norm_layer(int(ngf * mult / 2)),
nn.ReLU()
])
self.model.extend([nn.ReflectionPad2d([3, 3, 3, 3])])
self.model.extend([nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)])
self.model.extend([nn.Tanh()])
def forward(self, inputs):
y = inputs
for sublayer in self.model:
y = sublayer(y)
return y
class MobileResnetBlock(nn.Layer):
def __init__(self, in_c, out_c, padding_type, norm_layer, use_dropout,
use_bias):
super(MobileResnetBlock, self).__init__()
self.padding_type = padding_type
self.use_dropout = use_dropout
self.conv_block = nn.LayerList([])
p = 0
if self.padding_type == 'reflect':
self.conv_block.extend([nn.ReflectionPad2d([1, 1, 1, 1])])
elif self.padding_type == 'replicate':
self.conv_block.extend([nn.ReplicationPad2d([1, 1, 1, 1])])
elif self.padding_type == 'zero':
p = 1
else:
raise NotImplementedError('padding [%s] is not implemented' %
self.padding_type)
self.conv_block.extend([
SeparableConv2D(
num_channels=in_c,
num_filters=out_c,
filter_size=3,
padding=p,
stride=1), norm_layer(out_c), nn.ReLU()
])
self.conv_block.extend([nn.Dropout(0.5)])
if self.padding_type == 'reflect':
self.conv_block.extend([nn.ReflectionPad2d([1, 1, 1, 1])])
elif self.padding_type == 'replicate':
self.conv_block.extend([nn.ReplicationPad2d([1, 1, 1, 1])])
elif self.padding_type == 'zero':
p = 1
else:
raise NotImplementedError('padding [%s] is not implemented' %
self.padding_type)
self.conv_block.extend([
SeparableConv2D(
num_channels=out_c,
num_filters=in_c,
filter_size=3,
padding=p,
stride=1), norm_layer(in_c)
])
def forward(self, inputs):
y = inputs
for sublayer in self.conv_block:
y = sublayer(y)
out = inputs + y
return out
class SeparableConv2D(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
padding=0,
norm_layer=InstanceNorm,
use_bias=True,
scale_factor=1,
stddev=0.02):
super(SeparableConv2D, self).__init__()
self.conv = nn.LayerList([
nn.Conv2d(
in_channels=num_channels,
out_channels=num_channels * scale_factor,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=num_channels,
weight_attr=paddle.ParamAttr(
initializer=nn.initializer.Normal(
loc=0.0, scale=stddev)),
bias_attr=use_bias)
])
self.conv.extend([norm_layer(num_channels * scale_factor)])
self.conv.extend([
nn.Conv2d(
in_channels=num_channels * scale_factor,
out_channels=num_filters,
kernel_size=1,
stride=1,
weight_attr=paddle.ParamAttr(
initializer=nn.initializer.Normal(
loc=0.0, scale=stddev)),
bias_attr=use_bias)
])
def forward(self, inputs):
for sublayer in self.conv:
inputs = sublayer(inputs)
return inputs
...@@ -2,43 +2,79 @@ import paddle ...@@ -2,43 +2,79 @@ import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
class TempConv(nn.Layer): class TempConv(nn.Layer):
def __init__(self, in_planes, out_planes, kernel_size=(1,3,3), stride=(1,1,1), padding=(0,1,1) ): def __init__(self,
super(TempConv, self).__init__() in_planes,
self.conv3d = nn.Conv3d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding) out_planes,
self.bn = nn.BatchNorm( out_planes ) kernel_size=(1, 3, 3),
def forward(self, x): stride=(1, 1, 1),
return F.elu( self.bn(self.conv3d(x))) padding=(0, 1, 1)):
super(TempConv, self).__init__()
self.conv3d = nn.Conv3d(in_planes,
out_planes,
kernel_size=kernel_size,
stride=stride,
padding=padding)
self.bn = nn.BatchNorm(out_planes)
def forward(self, x):
return F.elu(self.bn(self.conv3d(x)))
class Upsample(nn.Layer): class Upsample(nn.Layer):
def __init__(self, in_planes, out_planes, scale_factor=(1,2,2)): def __init__(self, in_planes, out_planes, scale_factor=(1, 2, 2)):
super(Upsample, self).__init__() super(Upsample, self).__init__()
self.scale_factor = scale_factor self.scale_factor = scale_factor
self.conv3d = nn.Conv3d( in_planes, out_planes, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) self.conv3d = nn.Conv3d(in_planes,
self.bn = nn.BatchNorm( out_planes ) out_planes,
kernel_size=(3, 3, 3),
def forward(self, x): stride=(1, 1, 1),
out_size = x.shape[2:] padding=(1, 1, 1))
for i in range(3): self.bn = nn.BatchNorm(out_planes)
out_size[i] = self.scale_factor[i] * out_size[i]
def forward(self, x):
return F.elu( self.bn( self.conv3d( F.interpolate(x, size=out_size, mode='trilinear', align_corners=False, data_format='NCDHW', align_mode=0)))) out_size = x.shape[2:]
for i in range(3):
out_size[i] = self.scale_factor[i] * out_size[i]
return F.elu(
self.bn(
self.conv3d(
F.interpolate(x,
size=out_size,
mode='trilinear',
align_corners=False,
data_format='NCDHW',
align_mode=0))))
class UpsampleConcat(nn.Layer): class UpsampleConcat(nn.Layer):
def __init__(self, in_planes_up, in_planes_flat, out_planes): def __init__(self, in_planes_up, in_planes_flat, out_planes):
super(UpsampleConcat, self).__init__() super(UpsampleConcat, self).__init__()
self.conv3d = TempConv( in_planes_up + in_planes_flat, out_planes, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) self.conv3d = TempConv(in_planes_up + in_planes_flat,
def forward(self, x1, x2): out_planes,
scale_factor=(1,2,2) kernel_size=(3, 3, 3),
stride=(1, 1, 1),
padding=(1, 1, 1))
def forward(self, x1, x2):
scale_factor = (1, 2, 2)
out_size = x1.shape[2:] out_size = x1.shape[2:]
for i in range(3): for i in range(3):
out_size[i] = scale_factor[i] * out_size[i] out_size[i] = scale_factor[i] * out_size[i]
x1 = F.interpolate(x1, size=out_size, mode='trilinear', align_corners=False, data_format='NCDHW', align_mode=0) x1 = F.interpolate(x1,
size=out_size,
mode='trilinear',
align_corners=False,
data_format='NCDHW',
align_mode=0)
x = paddle.concat([x1, x2], axis=1) x = paddle.concat([x1, x2], axis=1)
return self.conv3d(x) return self.conv3d(x)
class SourceReferenceAttention(paddle.fluid.dygraph.Layer):
class SourceReferenceAttention(nn.Layer):
""" """
Source-Reference Attention Layer Source-Reference Attention Layer
""" """
...@@ -51,137 +87,166 @@ class SourceReferenceAttention(paddle.fluid.dygraph.Layer): ...@@ -51,137 +87,166 @@ class SourceReferenceAttention(paddle.fluid.dygraph.Layer):
in_planes_r: int in_planes_r: int
Number of input reference feature vector channels. Number of input reference feature vector channels.
""" """
super(SourceReferenceAttention,self).__init__() super(SourceReferenceAttention, self).__init__()
self.query_conv = nn.Conv3d(in_channels=in_planes_s, self.query_conv = nn.Conv3d(in_channels=in_planes_s,
out_channels=in_planes_s//8, kernel_size=1 ) out_channels=in_planes_s // 8,
self.key_conv = nn.Conv3d(in_channels=in_planes_r, kernel_size=1)
out_channels=in_planes_r//8, kernel_size=1 ) self.key_conv = nn.Conv3d(in_channels=in_planes_r,
out_channels=in_planes_r // 8,
kernel_size=1)
self.value_conv = nn.Conv3d(in_channels=in_planes_r, self.value_conv = nn.Conv3d(in_channels=in_planes_r,
out_channels=in_planes_r, kernel_size=1 ) out_channels=in_planes_r,
self.gamma = self.create_parameter(shape=[1], dtype=self.query_conv.weight.dtype, kernel_size=1)
default_initializer=paddle.fluid.initializer.Constant(0.0)) self.gamma = self.create_parameter(
shape=[1],
dtype=self.query_conv.weight.dtype,
default_initializer=nn.initializer.Constant(0.0))
def forward(self, source, reference): def forward(self, source, reference):
s_batchsize, sC, sT, sH, sW = source.shape s_batchsize, sC, sT, sH, sW = source.shape
r_batchsize, rC, rT, rH, rW = reference.shape r_batchsize, rC, rT, rH, rW = reference.shape
proj_query = paddle.reshape(self.query_conv(source), [s_batchsize,-1,sT*sH*sW]) proj_query = paddle.reshape(self.query_conv(source),
[s_batchsize, -1, sT * sH * sW])
proj_query = paddle.transpose(proj_query, [0, 2, 1]) proj_query = paddle.transpose(proj_query, [0, 2, 1])
proj_key = paddle.reshape(self.key_conv(reference), [r_batchsize,-1,rT*rW*rH]) proj_key = paddle.reshape(self.key_conv(reference),
energy = paddle.bmm( proj_query, proj_key ) [r_batchsize, -1, rT * rW * rH])
attention = F.softmax(energy) energy = paddle.bmm(proj_query, proj_key)
attention = F.softmax(energy)
proj_value = paddle.reshape(self.value_conv(reference), [r_batchsize,-1,rT*rH*rW])
proj_value = paddle.reshape(self.value_conv(reference),
out = paddle.bmm(proj_value,paddle.transpose(attention, [0,2,1])) [r_batchsize, -1, rT * rH * rW])
out = paddle.reshape(out, [s_batchsize, sC, sT, sH, sW])
out = self.gamma*out + source out = paddle.bmm(proj_value, paddle.transpose(attention, [0, 2, 1]))
out = paddle.reshape(out, [s_batchsize, sC, sT, sH, sW])
out = self.gamma * out + source
return out, attention return out, attention
class NetworkR( nn.Layer ): class NetworkR(nn.Layer):
def __init__(self): def __init__(self):
super(NetworkR, self).__init__() super(NetworkR, self).__init__()
self.layers = nn.Sequential( self.layers = nn.Sequential(
nn.ReplicationPad3d((1,1,1,1,1,1)), nn.ReplicationPad3d((1, 1, 1, 1, 1, 1)),
TempConv( 1, 64, kernel_size=(3,3,3), stride=(1,2,2), padding=(0,0,0) ), TempConv(1,
TempConv( 64, 128, kernel_size=(3,3,3), padding=(1,1,1) ), 64,
TempConv( 128, 128, kernel_size=(3,3,3), padding=(1,1,1) ), kernel_size=(3, 3, 3),
TempConv( 128, 256, kernel_size=(3,3,3), stride=(1,2,2), padding=(1,1,1) ), stride=(1, 2, 2),
TempConv( 256, 256, kernel_size=(3,3,3), padding=(1,1,1) ), padding=(0, 0, 0)),
TempConv( 256, 256, kernel_size=(3,3,3), padding=(1,1,1) ), TempConv(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
TempConv( 256, 256, kernel_size=(3,3,3), padding=(1,1,1) ), TempConv(128, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
TempConv( 256, 256, kernel_size=(3,3,3), padding=(1,1,1) ), TempConv(128,
Upsample( 256, 128 ), 256,
TempConv( 128, 64, kernel_size=(3,3,3), padding=(1,1,1) ), kernel_size=(3, 3, 3),
TempConv( 64, 64, kernel_size=(3,3,3), padding=(1,1,1) ), stride=(1, 2, 2),
Upsample( 64, 16 ), padding=(1, 1, 1)),
nn.Conv3d( 16, 1, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
) TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
def forward(self, x): TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
return paddle.clip((x + paddle.fluid.layers.tanh( self.layers( ((x * 1).detach())-0.4462414 ) )), 0.0, 1.0) TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
Upsample(256, 128),
class NetworkC( nn.Layer ): TempConv(128, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
def __init__(self): TempConv(64, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
super(NetworkC, self).__init__() Upsample(64, 16),
nn.Conv3d(16,
self.down1 = nn.Sequential( 1,
nn.ReplicationPad3d((1,1,1,1,0,0)), kernel_size=(3, 3, 3),
TempConv( 1, 64, stride=(1,2,2), padding=(0,0,0) ), stride=(1, 1, 1),
TempConv( 64, 128 ), padding=(1, 1, 1)))
TempConv( 128, 128 ),
TempConv( 128, 256, stride=(1,2,2) ), def forward(self, x):
TempConv( 256, 256 ), return paddle.clip(
TempConv( 256, 256 ), (x + F.tanh(self.layers(((x * 1).detach()) - 0.4462414))), 0.0, 1.0)
TempConv( 256, 512, stride=(1,2,2) ),
TempConv( 512, 512 ),
TempConv( 512, 512 ) class NetworkC(nn.Layer):
) def __init__(self):
self.flat = nn.Sequential( super(NetworkC, self).__init__()
TempConv( 512, 512 ),
TempConv( 512, 512 ) self.down1 = nn.Sequential(
) nn.ReplicationPad3d((1, 1, 1, 1, 0, 0)),
self.down2 = nn.Sequential( TempConv(1, 64, stride=(1, 2, 2), padding=(0, 0, 0)),
TempConv( 512, 512, stride=(1,2,2) ), TempConv(64, 128), TempConv(128, 128),
TempConv( 512, 512 ), TempConv(128, 256, stride=(1, 2, 2)), TempConv(256, 256),
) TempConv(256, 256), TempConv(256, 512, stride=(1, 2, 2)),
self.stattn1 = SourceReferenceAttention( 512, 512 ) # Source-Reference Attention TempConv(512, 512), TempConv(512, 512))
self.stattn2 = SourceReferenceAttention( 512, 512 ) # Source-Reference Attention self.flat = nn.Sequential(TempConv(512, 512), TempConv(512, 512))
self.selfattn1 = SourceReferenceAttention( 512, 512 ) # Self Attention self.down2 = nn.Sequential(
self.conv1 = TempConv( 512, 512 ) TempConv(512, 512, stride=(1, 2, 2)),
self.up1 = UpsampleConcat( 512, 512, 512 ) # 1/8 TempConv(512, 512),
self.selfattn2 = SourceReferenceAttention( 512, 512 ) # Self Attention )
self.conv2 = TempConv( 512, 256, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) self.stattn1 = SourceReferenceAttention(
self.up2 = nn.Sequential( 512, 512) # Source-Reference Attention
Upsample( 256, 128 ), # 1/4 self.stattn2 = SourceReferenceAttention(
TempConv( 128, 64, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) 512, 512) # Source-Reference Attention
) self.selfattn1 = SourceReferenceAttention(512, 512) # Self Attention
self.up3 = nn.Sequential( self.conv1 = TempConv(512, 512)
Upsample( 64, 32 ), # 1/2 self.up1 = UpsampleConcat(512, 512, 512) # 1/8
TempConv( 32, 16, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) self.selfattn2 = SourceReferenceAttention(512, 512) # Self Attention
) self.conv2 = TempConv(512,
self.up4 = nn.Sequential( 256,
Upsample( 16, 8 ), # 1/1 kernel_size=(3, 3, 3),
nn.Conv3d( 8, 2, kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1) ) stride=(1, 1, 1),
) padding=(1, 1, 1))
self.reffeatnet1 = nn.Sequential( self.up2 = nn.Sequential(
TempConv( 3, 64, stride=(1,2,2) ), Upsample(256, 128), # 1/4
TempConv( 64, 128 ), TempConv(128,
TempConv( 128, 128 ), 64,
TempConv( 128, 256, stride=(1,2,2) ), kernel_size=(3, 3, 3),
TempConv( 256, 256 ), stride=(1, 1, 1),
TempConv( 256, 256 ), padding=(1, 1, 1)))
TempConv( 256, 512, stride=(1,2,2) ), self.up3 = nn.Sequential(
TempConv( 512, 512 ), Upsample(64, 32), # 1/2
TempConv( 512, 512 ), TempConv(32,
) 16,
self.reffeatnet2 = nn.Sequential( kernel_size=(3, 3, 3),
TempConv( 512, 512, stride=(1,2,2) ), stride=(1, 1, 1),
TempConv( 512, 512 ), padding=(1, 1, 1)))
TempConv( 512, 512 ), self.up4 = nn.Sequential(
) Upsample(16, 8), # 1/1
nn.Conv3d(8,
def forward(self, x, x_refs=None): 2,
x1 = self.down1( x - 0.4462414 ) kernel_size=(3, 3, 3),
if x_refs is not None: stride=(1, 1, 1),
x_refs = paddle.transpose(x_refs, [0, 2, 1, 3, 4]) # [B,T,C,H,W] --> [B,C,T,H,W] padding=(1, 1, 1)))
reffeat = self.reffeatnet1( x_refs-0.48 ) self.reffeatnet1 = nn.Sequential(
x1, _ = self.stattn1( x1, reffeat ) TempConv(3, 64, stride=(1, 2, 2)),
TempConv(64, 128),
x2 = self.flat( x1 ) TempConv(128, 128),
out = self.down2( x1 ) TempConv(128, 256, stride=(1, 2, 2)),
if x_refs is not None: TempConv(256, 256),
reffeat2 = self.reffeatnet2( reffeat ) TempConv(256, 256),
out, _ = self.stattn2( out, reffeat2 ) TempConv(256, 512, stride=(1, 2, 2)),
out = self.conv1( out ) TempConv(512, 512),
out, _ = self.selfattn1( out, out ) TempConv(512, 512),
out = self.up1( out, x2 ) )
out, _ = self.selfattn2( out, out ) self.reffeatnet2 = nn.Sequential(
out = self.conv2( out ) TempConv(512, 512, stride=(1, 2, 2)),
out = self.up2( out ) TempConv(512, 512),
out = self.up3( out ) TempConv(512, 512),
out = self.up4( out ) )
return F.sigmoid( out ) def forward(self, x, x_refs=None):
x1 = self.down1(x - 0.4462414)
\ No newline at end of file if x_refs is not None:
x_refs = paddle.transpose(
x_refs, [0, 2, 1, 3, 4]) # [B,T,C,H,W] --> [B,C,T,H,W]
reffeat = self.reffeatnet1(x_refs - 0.48)
x1, _ = self.stattn1(x1, reffeat)
x2 = self.flat(x1)
out = self.down2(x1)
if x_refs is not None:
reffeat2 = self.reffeatnet2(reffeat)
out, _ = self.stattn2(out, reffeat2)
out = self.conv1(out)
out, _ = self.selfattn1(out, out)
out = self.up1(out, x2)
out, _ = self.selfattn2(out, out)
out = self.conv2(out)
out = self.up2(out)
out = self.up3(out)
out = self.up4(out)
return F.sigmoid(out)
...@@ -2,20 +2,25 @@ import paddle ...@@ -2,20 +2,25 @@ import paddle
import paddle.nn as nn import paddle.nn as nn
import functools import functools
from ...modules.nn import ReflectionPad2d, LeakyReLU, Tanh, Dropout, BCEWithLogitsLoss, Pad2D, MSELoss
from ...modules.norm import build_norm_layer from ...modules.norm import build_norm_layer
from .builder import GENERATORS from .builder import GENERATORS
@GENERATORS.register() @GENERATORS.register()
class ResnetGenerator(paddle.fluid.dygraph.Layer): class ResnetGenerator(nn.Layer):
"""Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations. """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations.
code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style) code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style)
""" """
def __init__(self,
def __init__(self, input_nc, output_nc, ngf=64, norm_type='instance', use_dropout=False, n_blocks=6, padding_type='reflect'): input_nc,
output_nc,
ngf=64,
norm_type='instance',
use_dropout=False,
n_blocks=6,
padding_type='reflect'):
"""Construct a Resnet-based generator """Construct a Resnet-based generator
Args: Args:
...@@ -27,7 +32,7 @@ class ResnetGenerator(paddle.fluid.dygraph.Layer): ...@@ -27,7 +32,7 @@ class ResnetGenerator(paddle.fluid.dygraph.Layer):
n_blocks (int) -- the number of ResNet blocks n_blocks (int) -- the number of ResNet blocks
padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero
""" """
assert(n_blocks >= 0) assert (n_blocks >= 0)
super(ResnetGenerator, self).__init__() super(ResnetGenerator, self).__init__()
norm_layer = build_norm_layer(norm_type) norm_layer = build_norm_layer(norm_type)
...@@ -36,35 +41,56 @@ class ResnetGenerator(paddle.fluid.dygraph.Layer): ...@@ -36,35 +41,56 @@ class ResnetGenerator(paddle.fluid.dygraph.Layer):
else: else:
use_bias = norm_layer == nn.InstanceNorm use_bias = norm_layer == nn.InstanceNorm
model = [ReflectionPad2d(3), model = [
nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias_attr=use_bias), nn.ReflectionPad2d([3, 3, 3, 3]),
norm_layer(ngf), nn.Conv2d(input_nc,
nn.ReLU()] ngf,
kernel_size=7,
padding=0,
bias_attr=use_bias),
norm_layer(ngf),
nn.ReLU()
]
n_downsampling = 2 n_downsampling = 2
for i in range(n_downsampling): # add downsampling layers for i in range(n_downsampling): # add downsampling layers
mult = 2 ** i mult = 2**i
model += [ model += [
nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias_attr=use_bias), nn.Conv2d(ngf * mult,
norm_layer(ngf * mult * 2), ngf * mult * 2,
nn.ReLU()] kernel_size=3,
stride=2,
mult = 2 ** n_downsampling padding=1,
for i in range(n_blocks): # add ResNet blocks bias_attr=use_bias),
norm_layer(ngf * mult * 2),
nn.ReLU()
]
mult = 2**n_downsampling
for i in range(n_blocks): # add ResNet blocks
model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] model += [
ResnetBlock(ngf * mult,
padding_type=padding_type,
norm_layer=norm_layer,
use_dropout=use_dropout,
use_bias=use_bias)
]
for i in range(n_downsampling): # add upsampling layers for i in range(n_downsampling): # add upsampling layers
mult = 2 ** (n_downsampling - i) mult = 2**(n_downsampling - i)
model += [ model += [
nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), nn.ConvTranspose2d(ngf * mult,
kernel_size=3, stride=2, int(ngf * mult / 2),
padding=1, kernel_size=3,
output_padding=1, stride=2,
bias_attr=use_bias), padding=1,
norm_layer(int(ngf * mult / 2)), output_padding=1,
nn.ReLU()] bias_attr=use_bias),
model += [ReflectionPad2d(3)] norm_layer(int(ngf * mult / 2)),
nn.ReLU()
]
model += [nn.ReflectionPad2d([3, 3, 3, 3])]
model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
model += [nn.Tanh()] model += [nn.Tanh()]
...@@ -75,9 +101,8 @@ class ResnetGenerator(paddle.fluid.dygraph.Layer): ...@@ -75,9 +101,8 @@ class ResnetGenerator(paddle.fluid.dygraph.Layer):
return self.model(x) return self.model(x)
class ResnetBlock(paddle.fluid.dygraph.Layer): class ResnetBlock(nn.Layer):
"""Define a Resnet block""" """Define a Resnet block"""
def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias): def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias):
"""Initialize the Resnet block """Initialize the Resnet block
...@@ -87,9 +112,11 @@ class ResnetBlock(paddle.fluid.dygraph.Layer): ...@@ -87,9 +112,11 @@ class ResnetBlock(paddle.fluid.dygraph.Layer):
Original Resnet paper: https://arxiv.org/pdf/1512.03385.pdf Original Resnet paper: https://arxiv.org/pdf/1512.03385.pdf
""" """
super(ResnetBlock, self).__init__() super(ResnetBlock, self).__init__()
self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias) self.conv_block = self.build_conv_block(dim, padding_type, norm_layer,
use_dropout, use_bias)
def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias): def build_conv_block(self, dim, padding_type, norm_layer, use_dropout,
use_bias):
"""Construct a convolutional block. """Construct a convolutional block.
Parameters: Parameters:
...@@ -104,28 +131,37 @@ class ResnetBlock(paddle.fluid.dygraph.Layer): ...@@ -104,28 +131,37 @@ class ResnetBlock(paddle.fluid.dygraph.Layer):
conv_block = [] conv_block = []
p = 0 p = 0
if padding_type == 'reflect': if padding_type == 'reflect':
conv_block += [ReflectionPad2d(1)] conv_block += [nn.ReflectionPad2d([1, 1, 1, 1])]
elif padding_type == 'replicate': elif padding_type == 'replicate':
conv_block += [ReplicationPad2d(1)] conv_block += [nn.ReplicationPad2d([1, 1, 1, 1])]
elif padding_type == 'zero': elif padding_type == 'zero':
p = 1 p = 1
else: else:
raise NotImplementedError('padding [%s] is not implemented' % padding_type) raise NotImplementedError('padding [%s] is not implemented' %
padding_type)
conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias), norm_layer(dim), nn.ReLU()]
conv_block += [
nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias),
norm_layer(dim),
nn.ReLU()
]
if use_dropout: if use_dropout:
conv_block += [Dropout(0.5)] conv_block += [nn.Dropout(0.5)]
p = 0 p = 0
if padding_type == 'reflect': if padding_type == 'reflect':
conv_block += [ReflectionPad2d(1)] conv_block += [nn.ReflectionPad2d([1, 1, 1, 1])]
elif padding_type == 'replicate': elif padding_type == 'replicate':
conv_block += [ReplicationPad2d(1)] conv_block += [nn.ReplicationPad2d([1, 1, 1, 1])]
elif padding_type == 'zero': elif padding_type == 'zero':
p = 1 p = 1
else: else:
raise NotImplementedError('padding [%s] is not implemented' % padding_type) raise NotImplementedError('padding [%s] is not implemented' %
conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias), norm_layer(dim)] padding_type)
conv_block += [
nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias),
norm_layer(dim)
]
return nn.Sequential(*conv_block) return nn.Sequential(*conv_block)
......
import functools
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import functools
from ...modules.nn import ReflectionPad2d, LeakyReLU, Tanh, Dropout
from ...modules.norm import build_norm_layer from ...modules.norm import build_norm_layer
from .builder import GENERATORS from .builder import GENERATORS
@GENERATORS.register() @GENERATORS.register()
class UnetGenerator(paddle.fluid.dygraph.Layer): class UnetGenerator(nn.Layer):
"""Create a Unet-based generator""" """Create a Unet-based generator"""
def __init__(self,
def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_type='batch', use_dropout=False): input_nc,
output_nc,
num_downs,
ngf=64,
norm_type='batch',
use_dropout=False):
"""Construct a Unet generator """Construct a Unet generator
Args: Args:
input_nc (int) -- the number of channels in input images input_nc (int) -- the number of channels in input images
...@@ -27,36 +31,64 @@ class UnetGenerator(paddle.fluid.dygraph.Layer): ...@@ -27,36 +31,64 @@ class UnetGenerator(paddle.fluid.dygraph.Layer):
super(UnetGenerator, self).__init__() super(UnetGenerator, self).__init__()
norm_layer = build_norm_layer(norm_type) norm_layer = build_norm_layer(norm_type)
# construct unet structure # construct unet structure
unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) # add the innermost layer unet_block = UnetSkipConnectionBlock(
for i in range(num_downs - 5): # add intermediate layers with ngf * 8 filters ngf * 8,
unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout) ngf * 8,
input_nc=None,
submodule=None,
norm_layer=norm_layer,
innermost=True) # add the innermost layer
for i in range(num_downs -
5): # add intermediate layers with ngf * 8 filters
unet_block = UnetSkipConnectionBlock(ngf * 8,
ngf * 8,
input_nc=None,
submodule=unet_block,
norm_layer=norm_layer,
use_dropout=use_dropout)
# gradually reduce the number of filters from ngf * 8 to ngf # gradually reduce the number of filters from ngf * 8 to ngf
unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer) unet_block = UnetSkipConnectionBlock(ngf * 4,
unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer) ngf * 8,
unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer) input_nc=None,
self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) # add the outermost layer submodule=unet_block,
norm_layer=norm_layer)
unet_block = UnetSkipConnectionBlock(ngf * 2,
ngf * 4,
input_nc=None,
submodule=unet_block,
norm_layer=norm_layer)
unet_block = UnetSkipConnectionBlock(ngf,
ngf * 2,
input_nc=None,
submodule=unet_block,
norm_layer=norm_layer)
self.model = UnetSkipConnectionBlock(
output_nc,
ngf,
input_nc=input_nc,
submodule=unet_block,
outermost=True,
norm_layer=norm_layer) # add the outermost layer
def forward(self, input): def forward(self, input):
"""Standard forward""" """Standard forward"""
# tmp = self.model._sub_layers['model'][0](input)
# tmp1 = self.model._sub_layers['model'][1](tmp)
# tmp2 = self.model._sub_layers['model'][2](tmp1)
# import pickle
# pickle.dump(tmp2.numpy(), open('/workspace/notebook/align_pix2pix/tmp2-pd.pkl', 'wb'))
# tmp3 = self.model._sub_layers['model'][3](tmp2)
# pickle.dump(tmp3.numpy(), open('/workspace/notebook/align_pix2pix/tmp3-pd.pkl', 'wb'))
# tmp4 = self.model._sub_layers['model'][4](tmp3)
return self.model(input) return self.model(input)
class UnetSkipConnectionBlock(paddle.fluid.dygraph.Layer): class UnetSkipConnectionBlock(nn.Layer):
"""Defines the Unet submodule with skip connection. """Defines the Unet submodule with skip connection.
X -------------------identity---------------------- X -------------------identity----------------------
|-- downsampling -- |submodule| -- upsampling --| |-- downsampling -- |submodule| -- upsampling --|
""" """
def __init__(self,
def __init__(self, outer_nc, inner_nc, input_nc=None, outer_nc,
submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm, use_dropout=False): inner_nc,
input_nc=None,
submodule=None,
outermost=False,
innermost=False,
norm_layer=nn.BatchNorm,
use_dropout=False):
"""Construct a Unet submodule with skip connections. """Construct a Unet submodule with skip connections.
Parameters: Parameters:
...@@ -77,36 +109,48 @@ class UnetSkipConnectionBlock(paddle.fluid.dygraph.Layer): ...@@ -77,36 +109,48 @@ class UnetSkipConnectionBlock(paddle.fluid.dygraph.Layer):
use_bias = norm_layer == nn.InstanceNorm use_bias = norm_layer == nn.InstanceNorm
if input_nc is None: if input_nc is None:
input_nc = outer_nc input_nc = outer_nc
downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, downconv = nn.Conv2d(input_nc,
stride=2, padding=1, bias_attr=use_bias) inner_nc,
downrelu = LeakyReLU(0.2, True) kernel_size=4,
stride=2,
padding=1,
bias_attr=use_bias)
downrelu = nn.LeakyReLU(0.2)
downnorm = norm_layer(inner_nc) downnorm = norm_layer(inner_nc)
uprelu = nn.ReLU(True) uprelu = nn.ReLU()
upnorm = norm_layer(outer_nc) upnorm = norm_layer(outer_nc)
if outermost: if outermost:
upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, upconv = nn.ConvTranspose2d(inner_nc * 2,
kernel_size=4, stride=2, outer_nc,
kernel_size=4,
stride=2,
padding=1) padding=1)
down = [downconv] down = [downconv]
up = [uprelu, upconv, Tanh()] up = [uprelu, upconv, nn.Tanh()]
model = down + [submodule] + up model = down + [submodule] + up
elif innermost: elif innermost:
upconv = nn.ConvTranspose2d(inner_nc, outer_nc, upconv = nn.ConvTranspose2d(inner_nc,
kernel_size=4, stride=2, outer_nc,
padding=1, bias_attr=use_bias) kernel_size=4,
stride=2,
padding=1,
bias_attr=use_bias)
down = [downrelu, downconv] down = [downrelu, downconv]
up = [uprelu, upconv, upnorm] up = [uprelu, upconv, upnorm]
model = down + up model = down + up
else: else:
upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, upconv = nn.ConvTranspose2d(inner_nc * 2,
kernel_size=4, stride=2, outer_nc,
padding=1, bias_attr=use_bias) kernel_size=4,
stride=2,
padding=1,
bias_attr=use_bias)
down = [downrelu, downconv, downnorm] down = [downrelu, downconv, downnorm]
up = [uprelu, upconv, upnorm] up = [uprelu, upconv, upnorm]
if use_dropout: if use_dropout:
model = down + [submodule] + up + [Dropout(0.5)] model = down + [submodule] + up + [nn.Dropout(0.5)]
else: else:
model = down + [submodule] + up model = down + [submodule] + up
...@@ -115,5 +159,5 @@ class UnetSkipConnectionBlock(paddle.fluid.dygraph.Layer): ...@@ -115,5 +159,5 @@ class UnetSkipConnectionBlock(paddle.fluid.dygraph.Layer):
def forward(self, x): def forward(self, x):
if self.outermost: if self.outermost:
return self.model(x) return self.model(x)
else: # add skip connections else: # add skip connections
return paddle.concat([x, self.model(x)], 1) return paddle.concat([x, self.model(x)], 1)
import paddle
import paddle.nn as nn
import numpy as np import numpy as np
from ..modules.nn import BCEWithLogitsLoss import paddle
import paddle.nn as nn
class GANLoss(paddle.fluid.dygraph.Layer): class GANLoss(nn.Layer):
"""Define different GAN objectives. """Define different GAN objectives.
The GANLoss class abstracts away the need to create the target label tensor The GANLoss class abstracts away the need to create the target label tensor
that has the same size as the input. that has the same size as the input.
""" """
def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0): def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0):
""" Initialize the GANLoss class. """ Initialize the GANLoss class.
...@@ -31,7 +29,7 @@ class GANLoss(paddle.fluid.dygraph.Layer): ...@@ -31,7 +29,7 @@ class GANLoss(paddle.fluid.dygraph.Layer):
if gan_mode == 'lsgan': if gan_mode == 'lsgan':
self.loss = nn.MSELoss() self.loss = nn.MSELoss()
elif gan_mode == 'vanilla': elif gan_mode == 'vanilla':
self.loss = BCEWithLogitsLoss() self.loss = nn.BCEWithLogitsLoss()
elif gan_mode in ['wgangp']: elif gan_mode in ['wgangp']:
self.loss = None self.loss = None
else: else:
...@@ -50,11 +48,17 @@ class GANLoss(paddle.fluid.dygraph.Layer): ...@@ -50,11 +48,17 @@ class GANLoss(paddle.fluid.dygraph.Layer):
if target_is_real: if target_is_real:
if not hasattr(self, 'target_real_tensor'): if not hasattr(self, 'target_real_tensor'):
self.target_real_tensor = paddle.fill_constant(shape=paddle.shape(prediction), value=self.target_real_label, dtype='float32') self.target_real_tensor = paddle.fill_constant(
shape=paddle.shape(prediction),
value=self.target_real_label,
dtype='float32')
target_tensor = self.target_real_tensor target_tensor = self.target_real_tensor
else: else:
if not hasattr(self, 'target_fake_tensor'): if not hasattr(self, 'target_fake_tensor'):
self.target_fake_tensor = paddle.fill_constant(shape=paddle.shape(prediction), value=self.target_fake_label, dtype='float32') self.target_fake_tensor = paddle.fill_constant(
shape=paddle.shape(prediction),
value=self.target_fake_label,
dtype='float32')
target_tensor = self.target_fake_tensor target_tensor = self.target_fake_tensor
# target_tensor.stop_gradient = True # target_tensor.stop_gradient = True
...@@ -78,4 +82,4 @@ class GANLoss(paddle.fluid.dygraph.Layer): ...@@ -78,4 +82,4 @@ class GANLoss(paddle.fluid.dygraph.Layer):
loss = -prediction.mean() loss = -prediction.mean()
else: else:
loss = prediction.mean() loss = prediction.mean()
return loss return loss
\ No newline at end of file
...@@ -8,6 +8,7 @@ from .discriminators.builder import build_discriminator ...@@ -8,6 +8,7 @@ from .discriminators.builder import build_discriminator
from .losses import GANLoss from .losses import GANLoss
from ..solver import build_optimizer from ..solver import build_optimizer
from ..modules.init import init_weights
from ..utils.image_pool import ImagePool from ..utils.image_pool import ImagePool
...@@ -42,12 +43,15 @@ class Pix2PixModel(BaseModel): ...@@ -42,12 +43,15 @@ class Pix2PixModel(BaseModel):
# define networks (both generator and discriminator) # define networks (both generator and discriminator)
self.netG = build_generator(opt.model.generator) self.netG = build_generator(opt.model.generator)
init_weights(self.netG)
# define a discriminator; conditional GANs need to take both input and output images; Therefore, #channels for D is input_nc + output_nc # define a discriminator; conditional GANs need to take both input and output images; Therefore, #channels for D is input_nc + output_nc
if self.isTrain: if self.isTrain:
self.netD = build_discriminator(opt.model.discriminator) self.netD = build_discriminator(opt.model.discriminator)
init_weights(self.netD)
if self.isTrain: if self.isTrain:
self.losses = {}
# define loss functions # define loss functions
self.criterionGAN = GANLoss(opt.model.gan_mode) self.criterionGAN = GANLoss(opt.model.gan_mode)
self.criterionL1 = paddle.nn.L1Loss() self.criterionL1 = paddle.nn.L1Loss()
...@@ -79,6 +83,7 @@ class Pix2PixModel(BaseModel): ...@@ -79,6 +83,7 @@ class Pix2PixModel(BaseModel):
AtoB = self.opt.dataset.train.direction == 'AtoB' AtoB = self.opt.dataset.train.direction == 'AtoB'
self.real_A = paddle.to_variable(input['A' if AtoB else 'B']) self.real_A = paddle.to_variable(input['A' if AtoB else 'B'])
self.real_B = paddle.to_variable(input['B' if AtoB else 'A']) self.real_B = paddle.to_variable(input['B' if AtoB else 'A'])
self.image_paths = input['A_paths' if AtoB else 'B_paths'] self.image_paths = input['A_paths' if AtoB else 'B_paths']
def forward(self): def forward(self):
...@@ -118,6 +123,7 @@ class Pix2PixModel(BaseModel): ...@@ -118,6 +123,7 @@ class Pix2PixModel(BaseModel):
# Second, G(A) = B # Second, G(A) = B
self.loss_G_L1 = self.criterionL1(self.fake_B, self.loss_G_L1 = self.criterionL1(self.fake_B,
self.real_B) * self.opt.lambda_L1 self.real_B) * self.opt.lambda_L1
# combine loss and calculate gradients # combine loss and calculate gradients
self.loss_G = self.loss_G_GAN + self.loss_G_L1 self.loss_G = self.loss_G_GAN + self.loss_G_L1
......
import math
import numpy as np
import paddle
def _calculate_fan_in_and_fan_out(tensor):
dimensions = len(tensor.shape)
if dimensions < 2:
raise ValueError(
"Fan in and fan out can not be computed for tensor with fewer than 2 dimensions"
)
num_input_fmaps = tensor.shape[1]
num_output_fmaps = tensor.shape[0]
receptive_field_size = 1
if len(tensor.shape) > 2:
receptive_field_size = paddle.numel(tensor[0][0])
fan_in = num_input_fmaps * receptive_field_size
fan_out = num_output_fmaps * receptive_field_size
return fan_in, fan_out
def _calculate_correct_fan(tensor, mode):
mode = mode.lower()
valid_modes = ['fan_in', 'fan_out']
if mode not in valid_modes:
raise ValueError("Mode {} not supported, please use one of {}".format(
mode, valid_modes))
fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
return fan_in if mode == 'fan_in' else fan_out
def calculate_gain(nonlinearity, param=None):
"""Return the recommended gain value for the given nonlinearity function.
The values are as follows:
================= ====================================================
nonlinearity gain
================= ====================================================
Linear / Identity :math:`1`
Conv{1,2,3}D :math:`1`
Sigmoid :math:`1`
Tanh :math:`\frac{5}{3}`
ReLU :math:`\sqrt{2}`
Leaky Relu :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
================= ====================================================
Args:
nonlinearity: the non-linear function (`nn.functional` name)
param: optional parameter for the non-linear function
"""
linear_fns = [
'linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d',
'conv_transpose2d', 'conv_transpose3d'
]
if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
return 1
elif nonlinearity == 'tanh':
return 5.0 / 3
elif nonlinearity == 'relu':
return math.sqrt(2.0)
elif nonlinearity == 'leaky_relu':
if param is None:
negative_slope = 0.01
elif not isinstance(param, bool) and isinstance(
param, int) or isinstance(param, float):
# True/False are instances of int, hence check above
negative_slope = param
else:
raise ValueError(
"negative_slope {} not a valid number".format(param))
return math.sqrt(2.0 / (1 + negative_slope**2))
else:
raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
@paddle.no_grad()
def constant_(x, value):
temp_value = paddle.fill_constant(x.shape, x.dtype, value)
x.set_value(temp_value)
return x
@paddle.no_grad()
def normal_(x, mean=0., std=1.):
temp_value = paddle.normal(mean, std, shape=x.shape)
x.set_value(temp_value)
return x
@paddle.no_grad()
def uniform_(x, a=-1., b=1.):
temp_value = paddle.uniform(min=a, max=b, shape=x.shape)
x.set_value(temp_value)
return x
@paddle.no_grad()
def xavier_uniform_(x, gain=1.):
"""Fills the input `Tensor` with values according to the method
described in `Understanding the difficulty of training deep feedforward
neural networks` - Glorot, X. & Bengio, Y. (2010), using a uniform
distribution. The resulting tensor will have values sampled from
:math:`\mathcal{U}(-a, a)` where
.. math::
a = \text{gain} \times \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}}
Also known as Glorot initialization.
Args:
x: an n-dimensional `paddle.Tensor`
gain: an optional scaling factor
"""
fan_in, fan_out = _calculate_fan_in_and_fan_out(x)
std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
a = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation
return uniform_(x, -a, a)
@paddle.no_grad()
def xavier_normal_(x, gain=1.):
"""Fills the input `Tensor` with values according to the method
described in `Understanding the difficulty of training deep feedforward
neural networks` - Glorot, X. & Bengio, Y. (2010), using a normal
distribution. The resulting tensor will have values sampled from
:math:`\mathcal{N}(0, \text{std}^2)` where
.. math::
\text{std} = \text{gain} \times \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}
Also known as Glorot initialization.
Args:
tensor: an n-dimensional `paddle.Tensor`
gain: an optional scaling factor
"""
fan_in, fan_out = _calculate_fan_in_and_fan_out(x)
std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
return normal_(x, 0., std)
@paddle.no_grad()
def kaiming_uniform_(x, a=0, mode='fan_in', nonlinearity='leaky_relu'):
"""Fills the input `Tensor` with values according to the method
described in `Delving deep into rectifiers: Surpassing human-level
performance on ImageNet classification` - He, K. et al. (2015), using a
uniform distribution. The resulting tensor will have values sampled from
:math:`\mathcal{U}(-\text{bound}, \text{bound})` where
.. math::
\text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}
Also known as He initialization.
Args:
x: an n-dimensional `paddle.Tensor`
a: the negative slope of the rectifier used after this layer (only
used with ``'leaky_relu'``)
mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
preserves the magnitude of the variance of the weights in the
forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
backwards pass.
nonlinearity: the non-linear function (`nn.functional` name),
recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
"""
fan = _calculate_correct_fan(x, mode)
gain = calculate_gain(nonlinearity, a)
std = gain / math.sqrt(fan)
bound = math.sqrt(
3.0) * std # Calculate uniform bounds from standard deviation
temp_value = paddle.uniform(x.shape, min=-bound, max=bound)
x.set_value(temp_value)
return x
@paddle.no_grad()
def kaiming_normal_(x, a=0, mode='fan_in', nonlinearity='leaky_relu'):
"""Fills the input `Tensor` with values according to the method
described in `Delving deep into rectifiers: Surpassing human-level
performance on ImageNet classification` - He, K. et al. (2015), using a
normal distribution. The resulting tensor will have values sampled from
:math:`\mathcal{N}(0, \text{std}^2)` where
.. math::
\text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}
Also known as He initialization.
Args:
x: an n-dimensional `paddle.Tensor`
a: the negative slope of the rectifier used after this layer (only
used with ``'leaky_relu'``)
mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
preserves the magnitude of the variance of the weights in the
forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
backwards pass.
nonlinearity: the non-linear function (`nn.functional` name),
recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
"""
fan = _calculate_correct_fan(x, mode)
gain = calculate_gain(nonlinearity, a)
std = gain / math.sqrt(fan)
temp_value = paddle.normal(0, std, shape=x.shape)
x.set_value(temp_value)
return x
def constant_init(layer, val, bias=0):
if hasattr(layer, 'weight') and layer.weight is not None:
constant_(layer.weight, val)
if hasattr(layer, 'bias') and layer.bias is not None:
constant_(layer.bias, bias)
def xavier_init(layer, gain=1, bias=0, distribution='normal'):
assert distribution in ['uniform', 'normal']
if distribution == 'uniform':
xavier_uniform_(layer.weight, gain=gain)
else:
xavier_normal_(layer.weight, gain=gain)
if hasattr(layer, 'bias') and layer.bias is not None:
constant_(layer.bias, bias)
def normal_init(layer, mean=0, std=1, bias=0):
normal_(layer.weight, mean, std)
if hasattr(layer, 'bias') and layer.bias is not None:
constant_(layer.bias, bias)
def uniform_init(layer, a=0, b=1, bias=0):
uniform_(layer.weight, a, b)
if hasattr(layer, 'bias') and layer.bias is not None:
constant_(layer.bias, bias)
def kaiming_init(layer,
a=0,
mode='fan_out',
nonlinearity='relu',
bias=0,
distribution='normal'):
assert distribution in ['uniform', 'normal']
if distribution == 'uniform':
kaiming_uniform_(layer.weight,
a=a,
mode=mode,
nonlinearity=nonlinearity)
else:
kaiming_normal_(layer.weight, a=a, mode=mode, nonlinearity=nonlinearity)
if hasattr(layer, 'bias') and layer.bias is not None:
constant_(layer.bias, bias)
def init_weights(net, init_type='normal', init_gain=0.02):
"""Initialize network weights.
Args:
net (nn.Layer): network to be initialized
init_type (str): the name of an initialization method: normal | xavier | kaiming | orthogonal
init_gain (float): scaling factor for normal, xavier and orthogonal.
We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might
work better for some applications. Feel free to try yourself.
"""
def init_func(m): # define the initialization function
classname = m.__class__.__name__
if hasattr(m, 'weight') and (classname.find('Conv') != -1
or classname.find('Linear') != -1):
if init_type == 'normal':
normal_(m.weight, 0.0, init_gain)
elif init_type == 'xavier':
xavier_normal_(m.weight, gain=init_gain)
elif init_type == 'kaiming':
kaiming_normal_(m.weight, a=0, mode='fan_in')
else:
raise NotImplementedError(
'initialization method [%s] is not implemented' % init_type)
if hasattr(m, 'bias') and m.bias is not None:
constant_(m.bias, 0.0)
elif classname.find(
'BatchNorm'
) != -1: # BatchNorm Layer's weight is not a matrix; only normal distribution applies.
normal_(m.weight, 1.0, init_gain)
constant_(m.bias, 0.0)
print('initialize network with %s' % init_type)
net.apply(init_func) # apply the initialization function <init_func>
import paddle import paddle
import paddle.nn as nn
from paddle.fluid.dygraph import Layer
from paddle import fluid
class _SpectralNorm(nn.SpectralNorm):
class MSELoss():
def __init__(self):
pass
def __call__(self, prediction, label):
return fluid.layers.mse_loss(prediction, label)
class L1Loss():
def __init__(self):
pass
def __call__(self, prediction, label):
return fluid.layers.reduce_mean(fluid.layers.elementwise_sub(prediction, label, act='abs'))
class ReflectionPad2d(Layer):
def __init__(self, size):
super(ReflectionPad2d, self).__init__()
self.size = size
def forward(self, x):
return fluid.layers.pad2d(x, [self.size] * 4, mode="reflect")
class LeakyReLU(Layer):
def __init__(self, alpha, inplace=False):
super(LeakyReLU, self).__init__()
self.alpha = alpha
def forward(self, x):
return fluid.layers.leaky_relu(x, self.alpha)
class Tanh(Layer):
def __init__(self):
super(Tanh, self).__init__()
def forward(self, x):
return fluid.layers.tanh(x)
class Dropout(Layer):
def __init__(self, prob, mode='upscale_in_train'):
super(Dropout, self).__init__()
self.prob = prob
self.mode = mode
def forward(self, x):
return fluid.layers.dropout(x, self.prob, dropout_implementation=self.mode)
class BCEWithLogitsLoss():
def __init__(self, weight=None, reduction='mean'):
self.weight = weight
self.reduction = 'mean'
def __call__(self, x, label):
out = paddle.fluid.layers.sigmoid_cross_entropy_with_logits(x, label)
if self.reduction == 'sum':
return fluid.layers.reduce_sum(out)
elif self.reduction == 'mean':
return fluid.layers.reduce_mean(out)
else:
return out
class _SpectralNorm(paddle.nn.SpectralNorm):
def __init__(self, def __init__(self,
weight_shape, weight_shape,
dim=0, dim=0,
power_iters=1, power_iters=1,
eps=1e-12, eps=1e-12,
dtype='float32'): dtype='float32'):
super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps, dtype) super(_SpectralNorm, self).__init__(weight_shape, dim, power_iters, eps,
dtype)
def forward(self, weight): def forward(self, weight):
paddle.fluid.data_feeder.check_variable_and_dtype(weight, "weight", ['float32', 'float64'],
'SpectralNorm')
inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v} inputs = {'Weight': weight, 'U': self.weight_u, 'V': self.weight_v}
out = self._helper.create_variable_for_type_inference(self._dtype) out = self._helper.create_variable_for_type_inference(self._dtype)
_power_iters = self._power_iters if self.training else 0 _power_iters = self._power_iters if self.training else 0
self._helper.append_op( self._helper.append_op(type="spectral_norm",
type="spectral_norm", inputs=inputs,
inputs=inputs, outputs={
outputs={"Out": out, }, "Out": out,
attrs={ },
"dim": self._dim, attrs={
"power_iters": _power_iters, "dim": self._dim,
"eps": self._eps, "power_iters": _power_iters,
}) "eps": self._eps,
})
return out return out
class Spectralnorm(paddle.nn.Layer): class Spectralnorm(paddle.nn.Layer):
def __init__(self, layer, dim=0, power_iters=1, eps=1e-12, dtype='float32'):
def __init__(self,
layer,
dim=0,
power_iters=1,
eps=1e-12,
dtype='float32'):
super(Spectralnorm, self).__init__() super(Spectralnorm, self).__init__()
self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters, eps, dtype) self.spectral_norm = _SpectralNorm(layer.weight.shape, dim, power_iters,
eps, dtype)
self.dim = dim self.dim = dim
self.power_iters = power_iters self.power_iters = power_iters
self.eps = eps self.eps = eps
self.layer = layer self.layer = layer
weight = layer._parameters['weight'] weight = layer._parameters['weight']
del layer._parameters['weight'] del layer._parameters['weight']
self.weight_orig = self.create_parameter(weight.shape, dtype=weight.dtype) self.weight_orig = self.create_parameter(weight.shape,
dtype=weight.dtype)
self.weight_orig.set_value(weight) self.weight_orig.set_value(weight)
def forward(self, x): def forward(self, x):
weight = self.spectral_norm(self.weight_orig) weight = self.spectral_norm(self.weight_orig)
self.layer.weight = weight self.layer.weight = weight
out = self.layer(x) out = self.layer(x)
return out return out
def initial_type(
input,
op_type,
fan_out,
init="normal",
use_bias=False,
filter_size=0,
stddev=0.02,
name=None):
if init == "kaiming":
if op_type == 'conv':
fan_in = input.shape[1] * filter_size * filter_size
elif op_type == 'deconv':
fan_in = fan_out * filter_size * filter_size
else:
if len(input.shape) > 2:
fan_in = input.shape[1] * input.shape[2] * input.shape[3]
else:
fan_in = input.shape[1]
bound = 1 / math.sqrt(fan_in)
param_attr = fluid.ParamAttr(
# name=name + "_w",
initializer=fluid.initializer.Uniform(
low=-bound, high=bound))
if use_bias == True:
bias_attr = fluid.ParamAttr(
# name=name + '_b',
initializer=fluid.initializer.Uniform(
low=-bound, high=bound))
else:
bias_attr = False
else:
param_attr = fluid.ParamAttr(
# name=name + "_w",
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=stddev))
if use_bias == True:
bias_attr = fluid.ParamAttr(
# name=name + "_b",
initializer=fluid.initializer.Constant(0.0))
else:
bias_attr = False
return param_attr, bias_attr
class Pad2D(fluid.dygraph.Layer):
def __init__(self, paddings, mode, pad_value=0.0):
super(Pad2D, self).__init__()
self.paddings = paddings
self.mode = mode
def forward(self, x):
return fluid.layers.pad2d(x, self.paddings, self.mode)
\ No newline at end of file
...@@ -3,7 +3,7 @@ import functools ...@@ -3,7 +3,7 @@ import functools
import paddle.nn as nn import paddle.nn as nn
class Identity(paddle.fluid.dygraph.Layer): class Identity(nn.Layer):
def forward(self, x): def forward(self, x):
return x return x
...@@ -18,11 +18,28 @@ def build_norm_layer(norm_type='instance'): ...@@ -18,11 +18,28 @@ def build_norm_layer(norm_type='instance'):
For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics. For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics.
""" """
if norm_type == 'batch': if norm_type == 'batch':
norm_layer = functools.partial(nn.BatchNorm, param_attr=paddle.ParamAttr(initializer=paddle.fluid.initializer.NormalInitializer(1.0, 0.02)), bias_attr=paddle.ParamAttr(initializer=paddle.fluid.initializer.Constant(0.0)), trainable_statistics=True) norm_layer = functools.partial(
nn.BatchNorm,
param_attr=paddle.ParamAttr(
initializer=nn.initializer.Normal(1.0, 0.02)),
bias_attr=paddle.ParamAttr(
initializer=nn.initializer.Constant(0.0)),
trainable_statistics=True)
elif norm_type == 'instance': elif norm_type == 'instance':
norm_layer = functools.partial(nn.InstanceNorm, param_attr=paddle.ParamAttr(initializer=paddle.fluid.initializer.Constant(1.0), learning_rate=0.0, trainable=False), bias_attr=paddle.ParamAttr(initializer=paddle.fluid.initializer.Constant(0.0), learning_rate=0.0, trainable=False)) norm_layer = functools.partial(
nn.InstanceNorm,
param_attr=paddle.ParamAttr(
initializer=nn.initializer.Constant(1.0),
learning_rate=0.0,
trainable=False),
bias_attr=paddle.ParamAttr(initializer=nn.initializer.Constant(0.0),
learning_rate=0.0,
trainable=False))
elif norm_type == 'none': elif norm_type == 'none':
def norm_layer(x): return Identity()
def norm_layer(x):
return Identity()
else: else:
raise NotImplementedError('normalization layer [%s] is not found' % norm_type) raise NotImplementedError('normalization layer [%s] is not found' %
return norm_layer norm_type)
\ No newline at end of file return norm_layer
...@@ -19,6 +19,6 @@ def setup(args, cfg): ...@@ -19,6 +19,6 @@ def setup(args, cfg):
logger.info('Configs: {}'.format(cfg)) logger.info('Configs: {}'.format(cfg))
place = paddle.fluid.CUDAPlace(ParallelEnv().dev_id) \ place = paddle.CUDAPlace(ParallelEnv().dev_id) \
if ParallelEnv().nranks > 1 else paddle.fluid.CUDAPlace(0) if ParallelEnv().nranks > 1 else paddle.CUDAPlace(0)
paddle.disable_static(place) paddle.disable_static(place)
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
class TimeAverager(object):
def __init__(self):
self.reset()
def reset(self):
self._cnt = 0
self._total_time = 0
def record(self, usetime):
self._cnt += 1
self._total_time += usetime
def get_average(self):
if self._cnt == 0:
return 0
return self._total_time / self._cnt
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册