提交 bf0899a5 编写于 作者: Z zhumanyu 提交者: lvmengsi

Spade (#3343)

* add SPADE
上级 9219a777
......@@ -23,6 +23,7 @@ import struct
import os
import paddle
import random
import sys
def RandomCrop(img, crop_w, crop_h):
......@@ -58,6 +59,19 @@ def get_preprocess_param(load_size, crop_size):
}
def get_preprocess_param(load_width, load_height, crop_width, crop_height):
if crop_width == load_width:
x = 0
y = 0
else:
x = np.random.randint(0, np.maximum(0, load_width - crop_width))
y = np.random.randint(0, np.maximum(0, load_height - crop_height))
flip = np.random.rand() > 0.5
return {
"crop_pos": (x, y),
"flip": flip}
class reader_creator(object):
''' read and preprocess dataset'''
......@@ -209,6 +223,125 @@ class pair_reader_creator(reader_creator):
return reader
class triplex_reader_creator(reader_creator):
''' read and preprocess dataset'''
def __init__(self,
image_dir,
list_filename,
shuffle=False,
batch_size=1,
mode="TRAIN"):
super(triplex_reader_creator, self).__init__(
image_dir,
list_filename,
shuffle=shuffle,
batch_size=batch_size,
mode=mode)
def make_reader(self, args, return_name=False):
print(self.image_dir, self.list_filename)
print("files length:", len(self.lines))
def reader():
batch_out_1 = []
batch_out_2 = []
batch_out_3 = []
batch_out_name = []
if self.shuffle:
np.random.shuffle(self.lines)
for line in self.lines:
files = line.strip('\n\r\t ').split('\t')
if len(files) != 3:
print("files is not equal to 3!")
sys.exit(-1)
#label image instance
img1 = Image.open(os.path.join(self.image_dir, files[
0]))
img2 = Image.open(os.path.join(self.image_dir, files[
1])).convert('RGB')
if not args.no_instance:
img3 = Image.open(os.path.join(self.image_dir, files[
2]))
if self.mode == "TRAIN":
param = get_preprocess_param(args.load_width, args.load_height,
args.crop_width, args.crop_height)
img1 = img1.resize((args.load_width, args.load_height),
Image.NEAREST)
img2 = img2.resize((args.load_width, args.load_height),
Image.BICUBIC)
if not args.no_instance:
img3 = img3.resize((args.load_width, args.load_height),
Image.NEAREST)
if args.crop_type == 'Centor':
img1 = CentorCrop(img1, args.crop_width, args.crop_height)
img2 = CentorCrop(img2, args.crop_width, args.crop_height)
if not args.no_instance:
img3 = CentorCrop(img3, args.crop_width, args.crop_height)
elif args.crop_type == 'Random':
x = param['crop_pos'][0]
y = param['crop_pos'][1]
img1 = img1.crop(
(x, y, x + args.crop_width, y + args.crop_height))
img2 = img2.crop(
(x, y, x + args.crop_width, y + args.crop_height))
if not args.no_instance:
img3 = img3.crop(
(x, y, x + args.crop_width, y + args.crop_height))
else:
img1 = img1.resize((args.crop_width, args.crop_height),
Image.NEAREST)
img2 = img2.resize((args.crop_width, args.crop_height),
Image.BICUBIC)
if not args.no_instance:
img3 = img3.resize((args.crop_width, args.crop_height),
Image.NEAREST)
img1 = np.array(img1)
index = img1[np.newaxis, :,:]
input_label = np.zeros((args.label_nc, index.shape[1], index.shape[2]))
np.put_along_axis(input_label,index,1.0,0)
img1 = input_label
img2 = (np.array(img2).astype('float32') / 255.0 - 0.5) / 0.5
img2 = img2.transpose([2, 0, 1])
if not args.no_instance:
img3 = np.array(img3)[:, :, np.newaxis]
img3 = img3.transpose([2, 0, 1])
###extracte edge from instance
edge = np.zeros(img3.shape)
edge = edge.astype('int8')
edge[:, :, 1:] = edge[:, :, 1:] | (img3[:, :, 1:] != img3[:, :, :-1])
edge[:, :, :-1] = edge[:, :, :-1] | (img3[:, :, 1:] != img3[:, :, :-1])
edge[:, 1:, :] = edge[:, 1:, :] | (img3[:, 1:, :] != img3[:, :-1, :])
edge[:, :-1, :] = edge[:, :-1, :] | (img3[:, 1:, :] != img3[:, :-1, :])
img3 = edge.astype('float32')
###end extracte
batch_out_1.append(img1)
batch_out_2.append(img2)
if not args.no_instance:
batch_out_3.append(img3)
if return_name:
batch_out_name.append(os.path.basename(files[0]))
if len(batch_out_1) == self.batch_size:
if return_name:
if not args.no_instance:
yield batch_out_1, batch_out_2, batch_out_3, batch_out_name
else:
yield batch_out_1, batch_out_2, batch_out_name
batch_out_name = []
else:
if not args.no_instance:
yield batch_out_1, batch_out_2, batch_out_3
else:
yield batch_out_1, batch_out_2
batch_out_1 = []
batch_out_2 = []
batch_out_3 = []
return reader
class celeba_reader_creator(reader_creator):
''' read and preprocess dataset'''
......@@ -461,6 +594,33 @@ class data_reader(object):
mode="TEST")
reader_test = test_reader.make_reader(
self.cfg, return_name=True)
batch_num = train_reader.len()
reader = train_reader.make_reader(self.cfg)
return reader, reader_test, batch_num
elif self.cfg.model_net in ['SPADE']:
dataset_dir = os.path.join(self.cfg.data_dir, self.cfg.dataset)
train_list = os.path.join(dataset_dir, 'train.txt')
if self.cfg.train_list is not None:
train_list = self.cfg.train_list
train_reader = triplex_reader_creator(
image_dir=dataset_dir,
list_filename=train_list,
shuffle=self.cfg.shuffle,
batch_size=self.cfg.batch_size,
mode="TRAIN")
reader_test = None
if self.cfg.run_test:
test_list = os.path.join(dataset_dir, "test.txt")
if self.cfg.test_list is not None:
test_list = self.cfg.test_list
test_reader = triplex_reader_creator(
image_dir=dataset_dir,
list_filename=test_list,
shuffle=False,
batch_size=1,
mode="TEST")
reader_test = test_reader.make_reader(
self.cfg, return_name=True)
id2name = test_reader.id2name
batch_num = train_reader.len()
reader = train_reader.make_reader(self.cfg)
......
......@@ -26,7 +26,7 @@ import numpy as np
import imageio
import glob
from util.config import add_arguments, print_arguments
from data_reader import celeba_reader_creator, reader_creator
from data_reader import celeba_reader_creator, reader_creator, triplex_reader_creato
from util.utility import check_attribute_conflict, check_gpu, save_batch_image
from util import utility
import copy
......@@ -44,13 +44,19 @@ add_arg('init_model', str, None, "The init model file of d
add_arg('output', str, "./infer_result", "The directory the infer result to be saved to.")
add_arg('input_style', str, "A", "The style of the input, A or B")
add_arg('norm_type', str, "batch_norm", "Which normalization to used")
add_arg('crop_type', str, None, "Which crop type to use")
add_arg('use_gpu', bool, True, "Whether to use GPU to train.")
add_arg('dropout', bool, False, "Whether to use dropout")
add_arg('g_base_dims', int, 64, "Base channels in CycleGAN generator")
add_arg('ngf', int, 64, "Base channels in SPADE generator")
add_arg('c_dim', int, 13, "the size of attrs")
add_arg('use_gru', bool, False, "Whether to use GRU")
add_arg('crop_size', int, 178, "crop size")
add_arg('image_size', int, 128, "image size")
add_arg('load_height', int, 128, "image size")
add_arg('load_width', int, 128, "image size")
add_arg('crop_height', int, 128, "height of crop size")
add_arg('crop_width', int, 128, "width of crop size")
add_arg('selected_attrs', str,
"Bald,Bangs,Black_Hair,Blond_Hair,Brown_Hair,Bushy_Eyebrows,Eyeglasses,Male,Mouth_Slightly_Open,Mustache,No_Beard,Pale_Skin,Young",
"the attributes we selected to change")
......@@ -60,6 +66,8 @@ add_arg('dataset_dir', str, "./data/celeba/", "the datase
add_arg('n_layers', int, 5, "default layers in generotor")
add_arg('gru_n_layers', int, 4, "default layers of GRU in generotor")
add_arg('noise_size', int, 100, "the noise dimension")
add_arg('label_nc', int, 36, "label numbers of SPADE")
add_arg('no_instance', type=bool, default=False, help="Whether to use instance label.")
# yapf: enable
......@@ -159,6 +167,13 @@ def infer(args):
from network.DCGAN_network import DCGAN_model
model = DCGAN_model(args.n_samples)
fake = model.network_G(noise, name="G")
elif args.model_net == 'SPADE':
from network.SPADE_network import SPADE_model
model = SPADE_model()
input_label = fluid.layers.data(name='input_label', shape=data_shape, dtype='float32')
input_ins = fluid.layers.data(name='input_ins', shape=data_shape, dtype='float32')
input_ = fluid.layers.concat([input_label, input_ins], 1)
fake = model.network_G(input_, "generator", cfg=args, is_test=True)
else:
raise NotImplementedError("model_net {} is not support".format(
args.model_net))
......@@ -294,6 +309,33 @@ def infer(args):
imageio.imwrite(
os.path.join(args.output, "fake_" + image_name), (
(fake_temp + 1) * 127.5).astype(np.uint8))
elif args.model_net == 'SPADE':
test_reader = triplex_reader_creator(
image_dir=args.dataset_dir,
list_filename=args.test_list,
shuffle=False,
batch_size=1,
mode="TEST")
reader_test = test_reader.make_reader(
args, return_name=True)
for data in zip(reader_test()):
data_A, data_B, data_C, name = data[0]
name = name[0]
tensor_A = fluid.LoDTensor()
tensor_C = fluid.LoDTensor()
tensor_A.set(data_A, place)
tensor_C.set(data_C, place)
fake_B_temp = exe.run(
fetch_list=[fake.name],
feed={"input_label": tensor_A,
"input_ins": tensor_C})
fake_B_temp = np.squeeze(fake_B_temp[0]).transpose([1, 2, 0])
input_B_temp = np.squeeze(data_B[0]).transpose([1, 2, 0])
imageio.imwrite(args.output + "/fakeB_" + "_" + name, (
(fake_B_temp + 1) * 127.5).astype(np.uint8))
imageio.imwrite(args.output + "/real_" + "_" + name, (
(input_B_temp + 1) * 127.5).astype(np.uint8))
elif args.model_net == 'CGAN':
noise_data = np.random.uniform(
......
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from .base_network import conv2d, deconv2d, norm_layer, conv2d_spectral_norm
import paddle.fluid as fluid
import numpy as np
class SPADE_model(object):
def __init__(self):
pass
def network_G(self, input, name, cfg, is_test=False):
nf = cfg.ngf
num_up_layers = 5
sw = cfg.crop_width // (2**num_up_layers)
sh = cfg.crop_height // (2**num_up_layers)
seg = input
x = fluid.layers.resize_nearest(seg, out_shape=(sh, sw), align_corners=False)
x = conv2d(x, 16*nf,3,padding=1,name=name + "_fc",use_bias=True, is_test=is_test)
x = self.SPADEResnetBlock(x, seg, 16 * nf, 16 * nf, cfg, name=name+"_head_0", is_test=is_test)
x = fluid.layers.resize_nearest(x, scale=2, align_corners=False)
x = self.SPADEResnetBlock(x, seg, 16 * nf, 16 * nf, cfg, name=name+"_G_middle_0", is_test=is_test)
x = self.SPADEResnetBlock(x, seg, 16 * nf, 16 * nf, cfg, name=name+"_G_middle_1", is_test=is_test)
x = fluid.layers.resize_nearest(x, scale=2, align_corners=False)
x = self.SPADEResnetBlock(x, seg, 16 * nf, 8 * nf, cfg, name=name+"_up_0", is_test=is_test)
x = fluid.layers.resize_nearest(x, scale=2, align_corners=False)
x = self.SPADEResnetBlock(x, seg, 8 * nf, 4 * nf, cfg, name=name+"_up_1", is_test=is_test)
x = fluid.layers.resize_nearest(x, scale=2, align_corners=False)
x = self.SPADEResnetBlock(x, seg, 4 * nf, 2 * nf, cfg, name=name+"_up_2", is_test=is_test)
x = fluid.layers.resize_nearest(x, scale=2, align_corners=False)
x = self.SPADEResnetBlock(x, seg, 2 * nf, 1 * nf, cfg, name=name+"_up_3", is_test=is_test)
x = fluid.layers.leaky_relu(
x, alpha=0.2, name=name + '_conv_img_leaky_relu')
x = conv2d(x, 3,3,padding=1,name=name + "_conv_img",use_bias=True, is_test=is_test)
x = fluid.layers.tanh(x)
return x
def SPADEResnetBlock(self, x, seg, fin, fout, opt, name, is_test=False):
learn_shortcut = (fin != fout)
fmiddle = min(fin, fout)
semantic_nc = opt.label_nc + (0 if opt.no_instance else 1)
if learn_shortcut:
x_s = self.SPADE(x, seg, fin, name=name+".norm_s", is_test=is_test)
x_s = conv2d_spectral_norm(x_s, fout,1,use_bias=False, name=name + ".conv_s", is_test=is_test)
else:
x_s = x
dx = self.SPADE(x, seg, fin, name=name+".norm_0", is_test=is_test)
dx = fluid.layers.leaky_relu(dx, alpha=0.2, name=name+'_leaky_relu0')
dx = conv2d_spectral_norm(dx, fmiddle,3,padding=1,name=name + ".conv_0", use_bias=True, is_test=is_test)
dx = self.SPADE(dx, seg, fmiddle, name=name+".norm_1", is_test=is_test)
dx = fluid.layers.leaky_relu(dx, alpha=0.2, name=name+'_leaky_relu1')
dx = conv2d_spectral_norm(dx, fout,3,padding=1,name=name + ".conv_1", use_bias=True, is_test=is_test)
output = dx + x_s
return output
def SPADE(self, input, seg_map, norm_nc, name, is_test=False):
nhidden = 128
ks = 3
pw = ks // 2
seg_map = fluid.layers.resize_nearest(seg_map, out_shape=input.shape[2:], align_corners=False)
actv = conv2d(seg_map, nhidden, ks, padding=pw, activation_fn='relu', name=name+".mlp_shared.0", use_bias=True)
gamma = conv2d(actv, norm_nc, ks, padding=pw, name=name+".mlp_gamma", use_bias=True)
beta = conv2d(actv, norm_nc, ks, padding=pw, name=name+".mlp_beta", use_bias=True)
param_attr = fluid.ParamAttr(
name=name + ".param_free_norm.weight",
initializer=fluid.initializer.Constant(value=1.0), trainable=False)
bias_attr = fluid.ParamAttr(
name=name+".param_free_norm.bias", initializer=fluid.initializer.Constant(0.0), trainable=False)
norm = fluid.layers.batch_norm(input=input, name=name, param_attr=param_attr,
bias_attr=bias_attr, moving_mean_name=name+".param_free_norm.running_mean", moving_variance_name=name+".param_free_norm.running_var", is_test=is_test)
out = norm * (1 + gamma) + beta
return out
def network_D(self, input, name, cfg):
num_D = 2
result = []
for i in range(num_D):
out = build_discriminator_Nlayers(input, name=name+"_%d"%i)
result.append(out)
input = fluid.layers.pool2d(input, pool_size=3, pool_type="avg", pool_stride=2, pool_padding=1, name=name+"_pool%d"%i)
return result
def build_discriminator_Nlayers(input,
name="discriminator",
d_nlayers=4,
d_base_dims=64,
norm_type='instance_norm'):
kw = 4
padw = int(np.ceil((kw - 1.0) / 2))
nf = d_base_dims
res_list = []
res1 = conv2d(
input,
nf,
kw,
2,
0.02,
1,
name=name + ".model0.0",
activation_fn='leaky_relu',
relufactor=0.2,
use_bias=True)
d_dims = d_base_dims
res_list.append(res1)
for i in range(1, d_nlayers):
conv_name = name + ".model{}.0.0".format(i)
nf = min(nf*2, 512)
stride = 1 if i == d_nlayers - 1 else 2
dis_output = conv2d_spectral_norm(
res_list[-1],
nf,
kw,
stride,
0.02,
1,
name=conv_name,
norm=norm_type,
activation_fn='leaky_relu',
relufactor=0.2,
use_bias=False, norm_affine=False)
res_list.append(dis_output)
o_c4 = conv2d(
res_list[-1],
1,
4,
1,
0.02,
1,
name + ".model{}.0".format(d_nlayers),
use_bias=True)
res_list.append(o_c4)
return res_list
......@@ -34,12 +34,18 @@ def cal_padding(img_size, stride, filter_size, dilation=1):
return out_size // 2, out_size - out_size // 2
def norm_layer(input, norm_type='batch_norm', name=None, is_test=False):
def norm_layer(input, norm_type='batch_norm', name=None, is_test=False, affine=True):
if norm_type == 'batch_norm':
if affine == True:
param_attr = fluid.ParamAttr(
name=name + '_w', initializer=fluid.initializer.Constant(1.0))
bias_attr = fluid.ParamAttr(
name=name + '_b', initializer=fluid.initializer.Constant(value=0.0))
else:
param_attr = fluid.ParamAttr(
name=name + '_w', initializer=fluid.initializer.Constant(1.0), trainable=False)
bias_attr = fluid.ParamAttr(
name=name + '_b', initializer=fluid.initializer.Constant(value=0.0), trainable=False)
return fluid.layers.batch_norm(
input,
param_attr=param_attr,
......@@ -58,6 +64,7 @@ def norm_layer(input, norm_type='batch_norm', name=None, is_test=False):
if name is not None:
scale_name = name + "_scale"
offset_name = name + "_offset"
if affine:
scale_param = fluid.ParamAttr(
name=scale_name,
initializer=fluid.initializer.Constant(1.0),
......@@ -66,6 +73,15 @@ def norm_layer(input, norm_type='batch_norm', name=None, is_test=False):
name=offset_name,
initializer=fluid.initializer.Constant(0.0),
trainable=True)
else:
scale_param = fluid.ParamAttr(
name=scale_name,
initializer=fluid.initializer.Constant(1.0),
trainable=False)
offset_param = fluid.ParamAttr(
name=offset_name,
initializer=fluid.initializer.Constant(0.0),
trainable=False)
scale = helper.create_parameter(
attr=scale_param, shape=input.shape[1:2], dtype=dtype)
offset = helper.create_parameter(
......@@ -375,3 +391,149 @@ def conv_and_pool(x, num_filters, name, stddev=0.02, act=None):
bias_attr=bias_attr,
act=act)
return out
def conv2d_spectral_norm(input,
num_filters=64,
filter_size=7,
stride=1,
stddev=0.02,
padding=0,
name="conv2d_spectral_norm",
norm=None,
activation_fn=None,
relufactor=0.0,
use_bias=False,
padding_type=None,
initial="normal",
is_test=False, norm_affine=True):
b, c, h, w = input.shape
height = num_filters
width = c * filter_size * filter_size
helper = fluid.layer_helper.LayerHelper("conv2d_spectral_norm", **locals())
dtype = helper.input_dtype()
weight_param = fluid.ParamAttr(
name=name+".weight_orig",
initializer=fluid.initializer.Constant(1.0),
trainable=True)
weight = helper.create_parameter(
attr=weight_param, shape=(num_filters, c, filter_size, filter_size), dtype=dtype)
weight_spectral_norm = fluid.layers.spectral_norm(weight, dim=0, name=name+".spectral_norm")
weight = weight_spectral_norm
if use_bias:
bias_attr = fluid.ParamAttr(
name=name + "_b", initializer=fluid.initializer.Constant(0.0))
else:
bias_attr = False
conv = conv2d_with_filter(input, weight, stride, padding, bias_attr=bias_attr, name=name)
if norm is not None:
conv = norm_layer(
input=conv, norm_type=norm, name=name + "_norm", is_test=is_test, affine=norm_affine)
if activation_fn == 'relu':
conv = fluid.layers.relu(conv, name=name + '_relu')
elif activation_fn == 'leaky_relu':
conv = fluid.layers.leaky_relu(
conv, alpha=relufactor, name=name + '_leaky_relu')
elif activation_fn == 'tanh':
conv = fluid.layers.tanh(conv, name=name + '_tanh')
elif activation_fn == 'sigmoid':
conv = fluid.layers.sigmoid(conv, name=name + '_sigmoid')
elif activation_fn == None:
conv = conv
else:
raise NotImplementedError("activation: [%s] is not support" %
activation_fn)
return conv
def conv2d_with_filter(input,
filter,
stride=1,
padding=0,
dilation=1,
groups=None,
bias_attr=None,
use_cudnn=True,
act=None,
name=None):
"""
Similar with conv2d, this is a convolution2D layers. Difference
is filter can be token as input directly instead of setting filter size
and number of fliters. Filter is a 4-D tensor with shape
[num_filter, num_channel, filter_size_h, filter_size_w].
Args:
input (Variable): The input image with [N, C, H, W] format.
filter(Variable): The input filter with [N, C, H, W] format.
stride (int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
padding (int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
dilation (int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1.
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv2d.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
act (str): Activation type, if it is set to None, activation is not appended.
Default: None
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None
Returns:
Variable: The tensor variable storing the convolution and \
non-linearity activation result.
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
data = fluid.layers.data(name='data', shape=[3, 32, 32], \
dtype='float32')
filter = fluid.layers.data(name='filter',shape=[10,3,3,3], \
dtype='float32',append_batch_size=False)
conv2d = fluid.layers.conv2d(input=data,
filter=filter,
act="relu")
"""
helper = fluid.layer_helper.LayerHelper("conv2d_with_filter", **locals())
num_channels = input.shape[1]
num_filters = filter.shape[0]
num_filter_channels = filter.shape[1]
l_type = 'conv2d'
if (num_channels == groups and num_filters % num_channels == 0 and
not use_cudnn):
l_type = 'depthwise_conv2d'
if groups is None:
assert num_filter_channels == num_channels
else:
if num_channels % groups != 0:
raise ValueError("num_channels must be divisible by groups.")
if num_channels // groups != num_filter_channels:
raise ValueError("num_filter_channels must equal to num_channels\
divided by groups.")
stride = fluid.layers.utils.convert_to_list(stride, 2, 'stride')
padding = fluid.layers.utils.convert_to_list(padding, 2, 'padding')
dilation = fluid.layers.utils.convert_to_list(dilation, 2, 'dilation')
if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False")
pre_bias = helper.create_variable_for_type_inference(dtype=input.dtype)
helper.append_op(
type=l_type,
inputs={
'Input': input,
'Filter': filter,
},
outputs={"Output": pre_bias},
attrs={
'strides': stride,
'paddings': padding,
'dilations': dilation,
'groups': groups,
'use_cudnn': use_cudnn,
'use_mkldnn': False
})
pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2)
return helper.append_activation(pre_act)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
__all__ = ["VGGNet", "VGG11", "VGG13", "VGG16", "VGG19"]
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class VGGNet():
def __init__(self, layers=16, name=""):
self.params = train_parameters
self.layers = layers
self.name=name
def net(self, input, class_dim=1000):
layers = self.layers
vgg_spec = {
11: ([1, 1, 2, 2, 2]),
13: ([2, 2, 2, 2, 2]),
16: ([2, 2, 3, 3, 3]),
19: ([2, 2, 4, 4, 4])
}
assert layers in vgg_spec.keys(), \
"supported layers are {} but input layer is {}".format(vgg_spec.keys(), layers)
nums = vgg_spec[layers]
conv1, res = self.conv_block(input, 64, nums[0], name=self.name+"_conv1_")
conv2, res = self.conv_block(res, 128, nums[1], name=self.name+"_conv2_")
conv3, res = self.conv_block(res, 256, nums[2], name=self.name+"_conv3_")
conv4, res = self.conv_block(res, 512, nums[3], name=self.name+"_conv4_")
conv5, res = self.conv_block(res, 512, nums[4], name=self.name+"_conv5_")
if self.layers == 16:
return [conv1, conv2, conv3]
elif self.layers == 19:
return [conv1, conv2, conv3, conv4, conv5]
def conv_block(self, input, num_filter, groups, name=""):
conv = input
for i in range(groups):
conv = fluid.layers.conv2d(
input=conv,
num_filters=num_filter,
filter_size=3,
stride=1,
padding=1,
act='relu',
param_attr=fluid.param_attr.ParamAttr(
name=name + str(i + 1) + "_weights", trainable=False),
bias_attr=False
)
if i == 0:
relu_res = conv
return relu_res, fluid.layers.pool2d(
input=conv, pool_size=2, pool_type='max', pool_stride=2)
def load_vars(self, exe, program, pretrained_model):
vars = []
for var in program.list_vars():
if fluid.io.is_parameter(var) and var.name.startswith("vgg"):
vars.append(var)
print(var.name)
fluid.io.load_vars(exe, pretrained_model, program, vars)
def VGG11():
model = VGGNet(layers=11)
return model
def VGG13():
model = VGGNet(layers=13)
return model
def VGG16():
model = VGGNet(layers=16, name="vgg16")
return model
def VGG19(name="vgg19"):
model = VGGNet(layers=19, name=name)
return model
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_fraction_of_gpu_memory_to_use=0.01
CUDA_VISIBLE_DEVICES=0 python train.py --model_net SPADE --dataset cityscapes --train_list train_list --test_list val_list --crop_type Random --batch_size 1 --epoch 200 --load_height 612 --load_width 1124 --crop_height 512 --crop_width 1024 --label_nc 36
......@@ -30,7 +30,7 @@ import trainer
def train(cfg):
MODELS = [
"CGAN", "DCGAN", "Pix2pix", "CycleGAN", "StarGAN", "AttGAN", "STGAN"
"CGAN", "DCGAN", "Pix2pix", "CycleGAN", "StarGAN", "AttGAN", "STGAN", "SPADE"
]
if cfg.model_net not in MODELS:
raise NotImplementedError("{} is not support!".format(cfg.model_net))
......
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from network.SPADE_network import SPADE_model
from util import utility
import paddle.fluid as fluid
import sys
import time
import network.vgg as vgg
import pickle as pkl
import numpy as np
class GTrainer():
def __init__(self, input_label, input_img, input_ins, cfg, step_per_epoch):
self.cfg = cfg
self.program = fluid.default_main_program().clone()
with fluid.program_guard(self.program):
model = SPADE_model()
input = input_label
if not cfg.no_instance:
input = fluid.layers.concat([input_label, input_ins], 1)
self.fake_B = model.network_G(input, "generator", cfg=cfg)
self.fake_B.persistable = True
self.infer_program = self.program.clone()
fake_concat = fluid.layers.concat([input, self.fake_B], 1)
real_concat = fluid.layers.concat([input, input_img], 1)
fake_and_real = fluid.layers.concat([fake_concat, real_concat], 0)
pred = model.network_D(fake_and_real, "discriminator", cfg)
if type(pred) == list:
self.pred_fake = []
self.pred_real = []
for p in pred:
self.pred_fake.append([tensor[:tensor.shape[0] // 2] for tensor in p])
self.pred_real.append([tensor[tensor.shape[0] // 2:] for tensor in p])
else:
self.pred_fake = pred[:pred.shape[0] // 2]
self.pred_real = pred[pred.shape[0] // 2:]
###GAN Loss hinge
if isinstance(self.pred_fake, list):
self.gan_loss = 0
for pred_i in self.pred_fake:
if isinstance(pred_i, list):
pred_i = pred_i[-1]
loss_i = -1 * fluid.layers.reduce_mean(pred_i)
self.gan_loss += loss_i
self.gan_loss /= len(self.pred_fake)
else:
self.gan_loss = -1 * fluid.layers.reduce_mean(self.pred_fake)
self.gan_loss.persistable = True
#####GAN Feat loss
num_D = len(self.pred_fake)
self.gan_feat_loss = 0.0
for i in range(num_D):
num_intermediate_outputs = len(self.pred_fake[i]) - 1
for j in range(num_intermediate_outputs):
self.gan_feat_loss = fluid.layers.reduce_mean(fluid.layers.abs(fluid.layers.elementwise_sub(
x=self.pred_fake[i][j], y=self.pred_real[i][j]))) * cfg.lambda_feat / num_D
self.gan_feat_loss.persistable = True
########VGG Feat loss
weights = [1.0/32, 1.0/16, 1.0/8, 1.0/4, 1.0]
self.vgg = vgg.VGG19()
fake_vgg = self.vgg.net(self.fake_B)
real_vgg = self.vgg.net(input_img)
self.vgg_loss = 0.0
for i in range(len(fake_vgg)):
self.vgg_loss += weights[i] * fluid.layers.reduce_mean(fluid.layers.abs(fluid.layers.elementwise_sub(
x=fake_vgg[i], y=real_vgg[i])))
self.vgg_loss.persistable = True
self.g_loss = (self.gan_loss + self.gan_feat_loss + self.vgg_loss)/3
lr = cfg.learning_rate
vars = []
for var in self.program.list_vars():
if fluid.io.is_parameter(var) and var.name.startswith(
"generator"):
vars.append(var.name)
self.param = vars
if cfg.epoch <= 100:
optimizer = fluid.optimizer.Adam(
learning_rate=lr, beta1=0.5, beta2=0.999, name="net_G")
else:
optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=[99 * step_per_epoch] + [
x * step_per_epoch
for x in range(100, cfg.epoch - 1)
],
values=[lr] + [
lr * (1.0 - (x - 99.0) / 101.0)
for x in range(100, cfg.epoch)
]),
beta1=0.5,
beta2=0.999,
name="net_G")
optimizer.minimize(self.g_loss, parameter_list=vars)
class DTrainer():
def __init__(self, input_label, input_img, input_ins, fake_B, cfg, step_per_epoch):
self.program = fluid.default_main_program().clone()
lr = cfg.learning_rate
with fluid.program_guard(self.program):
model = SPADE_model()
input = input_label
if not cfg.no_instance:
input = fluid.layers.concat([input_label, input_ins], 1)
fake_concat = fluid.layers.concat([input, fake_B], 1)
real_concat = fluid.layers.concat([input, input_img], 1)
fake_and_real = fluid.layers.concat([fake_concat, real_concat], 0)
pred = model.network_D(fake_and_real, "discriminator", cfg)
if type(pred) == list:
self.pred_fake = []
self.pred_real = []
for p in pred:
self.pred_fake.append([tensor[:tensor.shape[0] // 2] for tensor in p])
self.pred_real.append([tensor[tensor.shape[0] // 2:] for tensor in p])
else:
self.pred_fake = pred[:pred.shape[0] // 2]
self.pred_real = pred[pred.shape[0] // 2:]
#####gan loss
self.gan_loss_fake = 0
for pred_i in self.pred_fake:
zeros = fluid.layers.fill_constant_batch_size_like(input=pred_i[-1],shape=pred_i[-1].shape,value=0,dtype='float32')
if isinstance(pred_i, list):
pred_i = pred_i[-1]
minval = fluid.layers.elementwise_min(-1 * pred_i-1, zeros)
loss_i = -1 * fluid.layers.reduce_mean(minval)
self.gan_loss_fake += loss_i
self.gan_loss_fake /= len(self.pred_fake)
self.gan_loss_real = 0
for pred_i in self.pred_real:
zeros = fluid.layers.fill_constant_batch_size_like(input=pred_i[-1],shape=pred_i[-1].shape,value=0,dtype='float32')
if isinstance(pred_i, list):
pred_i = pred_i[-1]
minval = fluid.layers.elementwise_min(pred_i-1, zeros)
loss_i = -1 * fluid.layers.reduce_mean(minval)
self.gan_loss_real += loss_i
self.gan_loss_real /= len(self.pred_real)
self.gan_loss_real.persistable = True
self.gan_loss_fake.persistable = True
self.d_loss = 0.5 * (self.gan_loss_real + self.gan_loss_fake)
vars = []
for var in self.program.list_vars():
if fluid.io.is_parameter(var) and var.name.startswith(
"discriminator"):
vars.append(var.name)
self.param = vars
if cfg.epoch <= 100:
optimizer = fluid.optimizer.Adam(
learning_rate=lr, beta1=0.5, beta2=0.999, name="net_D")
else:
optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=[99 * step_per_epoch] + [
x * step_per_epoch
for x in range(100, cfg.epoch - 1)
],
values=[lr] + [
lr * (1.0 - (x - 99.0) / 101.0)
for x in range(100, cfg.epoch)
]),
beta1=0.5,
beta2=0.999,
name="net_D")
optimizer.minimize(self.d_loss, parameter_list=vars)
class SPADE(object):
def add_special_args(self, parser):
parser.add_argument(
'--vgg19_pretrain',
type=str,
default="./VGG19_pretrained",
help="VGG19 pretrained model for vgg loss"
)
parser.add_argument(
'--crop_width',
type=int,
default=1024,
help="crop width for training SPADE")
parser.add_argument(
'--crop_height',
type=int,
default=512,
help="crop height for training SPADE")
parser.add_argument(
'--load_width',
type=int,
default=1124,
help="load width for training SPADE")
parser.add_argument(
'--load_height',
type=int,
default=612,
help="load height for training SPADE")
parser.add_argument(
'--d_nlayers',
type=int,
default=4,
help="num of discriminator layers for SPADE")
parser.add_argument(
'--label_nc',
type=int,
default=36,
help="label numbers of SPADE")
parser.add_argument(
'--ngf',
type=int,
default=64,
help="base channels of generator in SPADE")
parser.add_argument(
'--ndf',
type=int,
default=64,
help="base channels of discriminator in SPADE")
parser.add_argument(
'--num_D',
type=int,
default=2,
help="number of discriminators in SPADE")
parser.add_argument(
'--lambda_feat',
type=float,
default=10,
help="weight term of feature loss")
parser.add_argument(
'--lambda_vgg',
type=float,
default=10,
help="weight term of vgg loss")
parser.add_argument('--no_instance', type=bool, default=False, help="Whether to use instance label.")
return parser
def __init__(self,
cfg=None,
train_reader=None,
test_reader=None,
batch_num=1):
self.cfg = cfg
self.train_reader = train_reader
self.test_reader = test_reader
self.batch_num = batch_num
def build_model(self):
data_shape = [-1, 3, self.cfg.crop_height, self.cfg.crop_width]
label_shape = [-1, self.cfg.label_nc, self.cfg.crop_height, self.cfg.crop_width]
edge_shape = [-1, 1, self.cfg.crop_height, self.cfg.crop_width]
input_A = fluid.layers.data(
name='input_label', shape=label_shape, dtype='float32')
input_B = fluid.layers.data(
name='input_img', shape=data_shape, dtype='float32')
input_C = fluid.layers.data(
name='input_ins', shape=edge_shape, dtype='float32')
input_fake = fluid.layers.data(
name='input_fake', shape=data_shape, dtype='float32')
gen_trainer = GTrainer(input_A, input_B, input_C, self.cfg, self.batch_num)
dis_trainer = DTrainer(input_A, input_B, input_C, input_fake, self.cfg,
self.batch_num)
py_reader = fluid.io.PyReader(
feed_list=[input_A, input_B, input_C],
capacity=4, ## batch_size * 4
iterable=True,
use_double_buffer=True)
py_reader.decorate_batch_generator(
self.train_reader,
places=fluid.cuda_places()
if self.cfg.use_gpu else fluid.cpu_places())
# prepare environment
place = fluid.CUDAPlace(0) if self.cfg.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
gen_trainer.vgg.load_vars(exe, gen_trainer.program, self.cfg.vgg19_pretrain)
if self.cfg.init_model:
utility.init_checkpoints(self.cfg, exe, gen_trainer, "net_G")
utility.init_checkpoints(self.cfg, exe, dis_trainer, "net_D")
### memory optim
build_strategy = fluid.BuildStrategy()
build_strategy.enable_inplace = False
build_strategy.sync_batch_norm = True
gen_trainer_program = fluid.CompiledProgram(
gen_trainer.program).with_data_parallel(
loss_name=gen_trainer.g_loss.name,
build_strategy=build_strategy)
dis_trainer_program = fluid.CompiledProgram(
dis_trainer.program).with_data_parallel(
loss_name=dis_trainer.d_loss.name,
build_strategy=build_strategy)
t_time = 0
for epoch_id in range(self.cfg.epoch):
batch_id = 0
for tensor in py_reader():
data_A, data_B, data_C = tensor[0]['input_A'], tensor[0]['input_B'], tensor[0]['input_C']
tensor_A = fluid.LoDTensor()
tensor_B = fluid.LoDTensor()
tensor_C = fluid.LoDTensor()
tensor_A.set(data_A, place)
tensor_B.set(data_B, place)
tensor_C.set(data_C, place)
s_time = time.time()
# optimize the generator network
g_loss_gan, g_loss_vgg, g_loss_feat, fake_B_tmp = exe.run(
gen_trainer_program,
fetch_list=[
gen_trainer.gan_loss, gen_trainer.vgg_loss, gen_trainer.gan_feat_loss,
gen_trainer.fake_B
],
feed={"input_label": tensor_A,
"input_img": tensor_B,
"input_ins": tensor_C})
# optimize the discriminator network
d_loss_real, d_loss_fake = exe.run(dis_trainer_program,
fetch_list=[
dis_trainer.gan_loss_real,
dis_trainer.gan_loss_fake
],
feed={
"input_label": tensor_A,
"input_img": tensor_B,
"input_ins": tensor_C,
"input_fake": fake_B_tmp
})
batch_time = time.time() - s_time
t_time += batch_time
if batch_id % self.cfg.print_freq == 0:
print("epoch{}: batch{}: \n\
g_loss_gan: {}; g_loss_vgg: {}; g_loss_feat: {} \n\
d_loss_real: {}; d_loss_fake: {}; \n\
Batch_time_cost: {:.2f}"
.format(epoch_id, batch_id, g_loss_gan[0], g_loss_vgg[
0], g_loss_feat[0], d_loss_real[0], d_loss_fake[0], batch_time))
sys.stdout.flush()
batch_id += 1
if self.cfg.run_test:
test_program = gen_trainer.infer_program
image_name = fluid.layers.data(
name='image_name',
shape=[self.cfg.batch_size],
dtype="int32")
test_py_reader = fluid.io.PyReader(
feed_list=[input_A, input_B, image_name],
capacity=4, ## batch_size * 4
iterable=True,
use_double_buffer=True)
test_py_reader.decorate_batch_generator(
self.test_reader,
places=fluid.cuda_places()
if self.cfg.use_gpu else fluid.cpu_places())
utility.save_test_image(epoch_id, self.cfg, exe, place,
test_program, gen_trainer,
test_py_reader)
if self.cfg.save_checkpoints:
utility.checkpoints(epoch_id, self.cfg, exe, gen_trainer,
"net_G")
utility.checkpoints(epoch_id, self.cfg, exe, dis_trainer,
"net_D")
......@@ -170,6 +170,30 @@ def save_test_image(epoch,
res_inputB = Image.fromarray(((input_B_temp + 1) * 127.5).astype(
np.uint8))
res_inputB.save(os.path.join(out_path, inputB_name))
elif cfg.model_net == "SPADE":
for data in A_test_reader():
data_A, data_B, data_C, name = data[0]['input_A'], data[0]['input_B'], data[0]['input_C'], data[0]['image_name']
tensor_A = fluid.LoDTensor()
tensor_B = fluid.LoDTensor()
tensor_C = fluid.LoDTensor()
tensor_A.set(data_A, place)
tensor_B.set(data_B, place)
tensor_C.set(data_C, place)
fake_B_temp = exe.run(
test_program,
fetch_list=[g_trainer.fake_B],
feed={"input_label": tensor_A,
"input_img": tensor_B,
"input_ins": tensor_C})
fake_B_temp = np.squeeze(fake_B_temp[0]).transpose([1, 2, 0])
input_B_temp = np.squeeze(data_B[0]).transpose([1, 2, 0])
res_fakeB = Image.fromarray(((fake_B_temp + 1) * 127.5).astype(
np.uint8))
res_fakeB.save(out_path+"/fakeB_"+str(epoch)+"_"+name)
res_real = Image.fromarray(((input_B_temp + 1) * 127.5).astype(
np.uint8))
res_real.save(out_path+"/real_"+str(epoch)+"_"+name)
elif cfg.model_net == "StarGAN":
for data in A_test_reader():
real_img, label_org, label_trg, image_name = data[0][
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册