From bf0899a5b62bf39d987e7ed5d947a6bb3a61aae9 Mon Sep 17 00:00:00 2001 From: zhumanyu <259571082@qq.com> Date: Tue, 17 Sep 2019 11:58:50 +0800 Subject: [PATCH] Spade (#3343) * add SPADE --- PaddleCV/PaddleGAN/data_reader.py | 162 +++++++- PaddleCV/PaddleGAN/infer.py | 44 ++- PaddleCV/PaddleGAN/network/SPADE_network.py | 157 ++++++++ PaddleCV/PaddleGAN/network/base_network.py | 188 +++++++++- PaddleCV/PaddleGAN/network/vgg.py | 97 +++++ PaddleCV/PaddleGAN/scripts/run_SPADE.sh | 4 + PaddleCV/PaddleGAN/train.py | 2 +- PaddleCV/PaddleGAN/trainer/SPADE.py | 386 ++++++++++++++++++++ PaddleCV/PaddleGAN/util/utility.py | 24 ++ 9 files changed, 1048 insertions(+), 16 deletions(-) create mode 100644 PaddleCV/PaddleGAN/network/SPADE_network.py create mode 100644 PaddleCV/PaddleGAN/network/vgg.py create mode 100644 PaddleCV/PaddleGAN/scripts/run_SPADE.sh create mode 100644 PaddleCV/PaddleGAN/trainer/SPADE.py diff --git a/PaddleCV/PaddleGAN/data_reader.py b/PaddleCV/PaddleGAN/data_reader.py index 64a666b1..fd5baf3a 100644 --- a/PaddleCV/PaddleGAN/data_reader.py +++ b/PaddleCV/PaddleGAN/data_reader.py @@ -23,6 +23,7 @@ import struct import os import paddle import random +import sys def RandomCrop(img, crop_w, crop_h): @@ -58,6 +59,19 @@ def get_preprocess_param(load_size, crop_size): } +def get_preprocess_param(load_width, load_height, crop_width, crop_height): + if crop_width == load_width: + x = 0 + y = 0 + else: + x = np.random.randint(0, np.maximum(0, load_width - crop_width)) + y = np.random.randint(0, np.maximum(0, load_height - crop_height)) + flip = np.random.rand() > 0.5 + return { + "crop_pos": (x, y), + "flip": flip} + + class reader_creator(object): ''' read and preprocess dataset''' @@ -94,7 +108,7 @@ class reader_creator(object): if self.shuffle: np.random.shuffle(self.lines) - + for i, file in enumerate(self.lines): file = file.strip('\n\r\t ') self.name2id[os.path.basename(file)] = i @@ -209,6 +223,125 @@ class pair_reader_creator(reader_creator): return reader +class triplex_reader_creator(reader_creator): + ''' read and preprocess dataset''' + + def __init__(self, + image_dir, + list_filename, + shuffle=False, + batch_size=1, + mode="TRAIN"): + super(triplex_reader_creator, self).__init__( + image_dir, + list_filename, + shuffle=shuffle, + batch_size=batch_size, + mode=mode) + + def make_reader(self, args, return_name=False): + print(self.image_dir, self.list_filename) + print("files length:", len(self.lines)) + + def reader(): + batch_out_1 = [] + batch_out_2 = [] + batch_out_3 = [] + batch_out_name = [] + if self.shuffle: + np.random.shuffle(self.lines) + for line in self.lines: + files = line.strip('\n\r\t ').split('\t') + if len(files) != 3: + print("files is not equal to 3!") + sys.exit(-1) + #label image instance + img1 = Image.open(os.path.join(self.image_dir, files[ + 0])) + img2 = Image.open(os.path.join(self.image_dir, files[ + 1])).convert('RGB') + if not args.no_instance: + img3 = Image.open(os.path.join(self.image_dir, files[ + 2])) + + if self.mode == "TRAIN": + param = get_preprocess_param(args.load_width, args.load_height, + args.crop_width, args.crop_height) + img1 = img1.resize((args.load_width, args.load_height), + Image.NEAREST) + img2 = img2.resize((args.load_width, args.load_height), + Image.BICUBIC) + if not args.no_instance: + img3 = img3.resize((args.load_width, args.load_height), + Image.NEAREST) + if args.crop_type == 'Centor': + img1 = CentorCrop(img1, args.crop_width, args.crop_height) + img2 = CentorCrop(img2, args.crop_width, args.crop_height) + if not args.no_instance: + img3 = CentorCrop(img3, args.crop_width, args.crop_height) + elif args.crop_type == 'Random': + x = param['crop_pos'][0] + y = param['crop_pos'][1] + img1 = img1.crop( + (x, y, x + args.crop_width, y + args.crop_height)) + img2 = img2.crop( + (x, y, x + args.crop_width, y + args.crop_height)) + if not args.no_instance: + img3 = img3.crop( + (x, y, x + args.crop_width, y + args.crop_height)) + else: + img1 = img1.resize((args.crop_width, args.crop_height), + Image.NEAREST) + img2 = img2.resize((args.crop_width, args.crop_height), + Image.BICUBIC) + if not args.no_instance: + img3 = img3.resize((args.crop_width, args.crop_height), + Image.NEAREST) + + img1 = np.array(img1) + index = img1[np.newaxis, :,:] + input_label = np.zeros((args.label_nc, index.shape[1], index.shape[2])) + np.put_along_axis(input_label,index,1.0,0) + img1 = input_label + img2 = (np.array(img2).astype('float32') / 255.0 - 0.5) / 0.5 + img2 = img2.transpose([2, 0, 1]) + if not args.no_instance: + img3 = np.array(img3)[:, :, np.newaxis] + img3 = img3.transpose([2, 0, 1]) + ###extracte edge from instance + edge = np.zeros(img3.shape) + edge = edge.astype('int8') + edge[:, :, 1:] = edge[:, :, 1:] | (img3[:, :, 1:] != img3[:, :, :-1]) + edge[:, :, :-1] = edge[:, :, :-1] | (img3[:, :, 1:] != img3[:, :, :-1]) + edge[:, 1:, :] = edge[:, 1:, :] | (img3[:, 1:, :] != img3[:, :-1, :]) + edge[:, :-1, :] = edge[:, :-1, :] | (img3[:, 1:, :] != img3[:, :-1, :]) + img3 = edge.astype('float32') + ###end extracte + batch_out_1.append(img1) + batch_out_2.append(img2) + if not args.no_instance: + batch_out_3.append(img3) + if return_name: + batch_out_name.append(os.path.basename(files[0])) + if len(batch_out_1) == self.batch_size: + if return_name: + if not args.no_instance: + yield batch_out_1, batch_out_2, batch_out_3, batch_out_name + else: + yield batch_out_1, batch_out_2, batch_out_name + batch_out_name = [] + else: + if not args.no_instance: + yield batch_out_1, batch_out_2, batch_out_3 + else: + yield batch_out_1, batch_out_2 + batch_out_1 = [] + batch_out_2 = [] + batch_out_3 = [] + + return reader + + class celeba_reader_creator(reader_creator): ''' read and preprocess dataset''' @@ -461,6 +594,33 @@ class data_reader(object): mode="TEST") reader_test = test_reader.make_reader( self.cfg, return_name=True) + batch_num = train_reader.len() + reader = train_reader.make_reader(self.cfg) + return reader, reader_test, batch_num + elif self.cfg.model_net in ['SPADE']: + dataset_dir = os.path.join(self.cfg.data_dir, self.cfg.dataset) + train_list = os.path.join(dataset_dir, 'train.txt') + if self.cfg.train_list is not None: + train_list = self.cfg.train_list + train_reader = triplex_reader_creator( + image_dir=dataset_dir, + list_filename=train_list, + shuffle=self.cfg.shuffle, + batch_size=self.cfg.batch_size, + mode="TRAIN") + reader_test = None + if self.cfg.run_test: + test_list = os.path.join(dataset_dir, "test.txt") + if self.cfg.test_list is not None: + test_list = self.cfg.test_list + test_reader = triplex_reader_creator( + image_dir=dataset_dir, + list_filename=test_list, + shuffle=False, + batch_size=1, + mode="TEST") + reader_test = test_reader.make_reader( + self.cfg, return_name=True) id2name = test_reader.id2name batch_num = train_reader.len() reader = train_reader.make_reader(self.cfg) diff --git a/PaddleCV/PaddleGAN/infer.py b/PaddleCV/PaddleGAN/infer.py index fc906e17..9935e6f2 100644 --- a/PaddleCV/PaddleGAN/infer.py +++ b/PaddleCV/PaddleGAN/infer.py @@ -26,7 +26,7 @@ import numpy as np import imageio import glob from util.config import add_arguments, print_arguments -from data_reader import celeba_reader_creator, reader_creator +from data_reader import celeba_reader_creator, reader_creator, triplex_reader_creato from util.utility import check_attribute_conflict, check_gpu, save_batch_image from util import utility import copy @@ -44,13 +44,19 @@ add_arg('init_model', str, None, "The init model file of d add_arg('output', str, "./infer_result", "The directory the infer result to be saved to.") add_arg('input_style', str, "A", "The style of the input, A or B") add_arg('norm_type', str, "batch_norm", "Which normalization to used") +add_arg('crop_type', str, None, "Which crop type to use") add_arg('use_gpu', bool, True, "Whether to use GPU to train.") add_arg('dropout', bool, False, "Whether to use dropout") add_arg('g_base_dims', int, 64, "Base channels in CycleGAN generator") +add_arg('ngf', int, 64, "Base channels in SPADE generator") add_arg('c_dim', int, 13, "the size of attrs") add_arg('use_gru', bool, False, "Whether to use GRU") add_arg('crop_size', int, 178, "crop size") add_arg('image_size', int, 128, "image size") +add_arg('load_height', int, 128, "image size") +add_arg('load_width', int, 128, "image size") +add_arg('crop_height', int, 128, "height of crop size") +add_arg('crop_width', int, 128, "width of crop size") add_arg('selected_attrs', str, "Bald,Bangs,Black_Hair,Blond_Hair,Brown_Hair,Bushy_Eyebrows,Eyeglasses,Male,Mouth_Slightly_Open,Mustache,No_Beard,Pale_Skin,Young", "the attributes we selected to change") @@ -60,6 +66,8 @@ add_arg('dataset_dir', str, "./data/celeba/", "the datase add_arg('n_layers', int, 5, "default layers in generotor") add_arg('gru_n_layers', int, 4, "default layers of GRU in generotor") add_arg('noise_size', int, 100, "the noise dimension") +add_arg('label_nc', int, 36, "label numbers of SPADE") +add_arg('no_instance', type=bool, default=False, help="Whether to use instance label.") # yapf: enable @@ -159,6 +167,13 @@ def infer(args): from network.DCGAN_network import DCGAN_model model = DCGAN_model(args.n_samples) fake = model.network_G(noise, name="G") + elif args.model_net == 'SPADE': + from network.SPADE_network import SPADE_model + model = SPADE_model() + input_label = fluid.layers.data(name='input_label', shape=data_shape, dtype='float32') + input_ins = fluid.layers.data(name='input_ins', shape=data_shape, dtype='float32') + input_ = fluid.layers.concat([input_label, input_ins], 1) + fake = model.network_G(input_, "generator", cfg=args, is_test=True) else: raise NotImplementedError("model_net {} is not support".format( args.model_net)) @@ -294,6 +309,33 @@ def infer(args): imageio.imwrite( os.path.join(args.output, "fake_" + image_name), ( (fake_temp + 1) * 127.5).astype(np.uint8)) + elif args.model_net == 'SPADE': + test_reader = triplex_reader_creator( + image_dir=args.dataset_dir, + list_filename=args.test_list, + shuffle=False, + batch_size=1, + mode="TEST") + reader_test = test_reader.make_reader( + args, return_name=True) + for data in zip(reader_test()): + data_A, data_B, data_C, name = data[0] + name = name[0] + tensor_A = fluid.LoDTensor() + tensor_C = fluid.LoDTensor() + tensor_A.set(data_A, place) + tensor_C.set(data_C, place) + fake_B_temp = exe.run( + fetch_list=[fake.name], + feed={"input_label": tensor_A, + "input_ins": tensor_C}) + fake_B_temp = np.squeeze(fake_B_temp[0]).transpose([1, 2, 0]) + input_B_temp = np.squeeze(data_B[0]).transpose([1, 2, 0]) + + imageio.imwrite(args.output + "/fakeB_" + "_" + name, ( + (fake_B_temp + 1) * 127.5).astype(np.uint8)) + imageio.imwrite(args.output + "/real_" + "_" + name, ( + (input_B_temp + 1) * 127.5).astype(np.uint8)) elif args.model_net == 'CGAN': noise_data = np.random.uniform( diff --git a/PaddleCV/PaddleGAN/network/SPADE_network.py b/PaddleCV/PaddleGAN/network/SPADE_network.py new file mode 100644 index 00000000..32f89b70 --- /dev/null +++ b/PaddleCV/PaddleGAN/network/SPADE_network.py @@ -0,0 +1,157 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from .base_network import conv2d, deconv2d, norm_layer, conv2d_spectral_norm +import paddle.fluid as fluid +import numpy as np + +class SPADE_model(object): + def __init__(self): + pass + + def network_G(self, input, name, cfg, is_test=False): + nf = cfg.ngf + num_up_layers = 5 + sw = cfg.crop_width // (2**num_up_layers) + sh = cfg.crop_height // (2**num_up_layers) + seg = input + x = fluid.layers.resize_nearest(seg, out_shape=(sh, sw), align_corners=False) + x = conv2d(x, 16*nf,3,padding=1,name=name + "_fc",use_bias=True, is_test=is_test) + x = self.SPADEResnetBlock(x, seg, 16 * nf, 16 * nf, cfg, name=name+"_head_0", is_test=is_test) + x = fluid.layers.resize_nearest(x, scale=2, align_corners=False) + x = self.SPADEResnetBlock(x, seg, 16 * nf, 16 * nf, cfg, name=name+"_G_middle_0", is_test=is_test) + x = self.SPADEResnetBlock(x, seg, 16 * nf, 16 * nf, cfg, name=name+"_G_middle_1", is_test=is_test) + x = fluid.layers.resize_nearest(x, scale=2, align_corners=False) + + x = self.SPADEResnetBlock(x, seg, 16 * nf, 8 * nf, cfg, name=name+"_up_0", is_test=is_test) + x = fluid.layers.resize_nearest(x, scale=2, align_corners=False) + x = self.SPADEResnetBlock(x, seg, 8 * nf, 4 * nf, cfg, name=name+"_up_1", is_test=is_test) + x = fluid.layers.resize_nearest(x, scale=2, align_corners=False) + x = self.SPADEResnetBlock(x, seg, 4 * nf, 2 * nf, cfg, name=name+"_up_2", is_test=is_test) + x = fluid.layers.resize_nearest(x, scale=2, align_corners=False) + x = self.SPADEResnetBlock(x, seg, 2 * nf, 1 * nf, cfg, name=name+"_up_3", is_test=is_test) + x = fluid.layers.leaky_relu( + x, alpha=0.2, name=name + '_conv_img_leaky_relu') + x = conv2d(x, 3,3,padding=1,name=name + "_conv_img",use_bias=True, is_test=is_test) + x = fluid.layers.tanh(x) + + return x + def SPADEResnetBlock(self, x, seg, fin, fout, opt, name, is_test=False): + learn_shortcut = (fin != fout) + fmiddle = min(fin, fout) + semantic_nc = opt.label_nc + (0 if opt.no_instance else 1) + if learn_shortcut: + x_s = self.SPADE(x, seg, fin, name=name+".norm_s", is_test=is_test) + x_s = conv2d_spectral_norm(x_s, fout,1,use_bias=False, name=name + ".conv_s", is_test=is_test) + else: + x_s = x + dx = self.SPADE(x, seg, fin, name=name+".norm_0", is_test=is_test) + dx = fluid.layers.leaky_relu(dx, alpha=0.2, name=name+'_leaky_relu0') + dx = conv2d_spectral_norm(dx, fmiddle,3,padding=1,name=name + ".conv_0", use_bias=True, is_test=is_test) + + dx = self.SPADE(dx, seg, fmiddle, name=name+".norm_1", is_test=is_test) + dx = fluid.layers.leaky_relu(dx, alpha=0.2, name=name+'_leaky_relu1') + dx = conv2d_spectral_norm(dx, fout,3,padding=1,name=name + ".conv_1", use_bias=True, is_test=is_test) + + output = dx + x_s + return output + + + def SPADE(self, input, seg_map, norm_nc, name, is_test=False): + nhidden = 128 + ks = 3 + pw = ks // 2 + seg_map = fluid.layers.resize_nearest(seg_map, out_shape=input.shape[2:], align_corners=False) + actv = conv2d(seg_map, nhidden, ks, padding=pw, activation_fn='relu', name=name+".mlp_shared.0", use_bias=True) + gamma = conv2d(actv, norm_nc, ks, padding=pw, name=name+".mlp_gamma", use_bias=True) + beta = conv2d(actv, norm_nc, ks, padding=pw, name=name+".mlp_beta", use_bias=True) + param_attr = fluid.ParamAttr( + name=name + ".param_free_norm.weight", + initializer=fluid.initializer.Constant(value=1.0), trainable=False) + bias_attr = fluid.ParamAttr( + name=name+".param_free_norm.bias", initializer=fluid.initializer.Constant(0.0), trainable=False) + + norm = fluid.layers.batch_norm(input=input, name=name, param_attr=param_attr, + bias_attr=bias_attr, moving_mean_name=name+".param_free_norm.running_mean", moving_variance_name=name+".param_free_norm.running_var", is_test=is_test) + out = norm * (1 + gamma) + beta + return out + + + def network_D(self, input, name, cfg): + num_D = 2 + result = [] + for i in range(num_D): + out = build_discriminator_Nlayers(input, name=name+"_%d"%i) + result.append(out) + input = fluid.layers.pool2d(input, pool_size=3, pool_type="avg", pool_stride=2, pool_padding=1, name=name+"_pool%d"%i) + + return result + + + +def build_discriminator_Nlayers(input, + name="discriminator", + d_nlayers=4, + d_base_dims=64, + norm_type='instance_norm'): + kw = 4 + padw = int(np.ceil((kw - 1.0) / 2)) + nf = d_base_dims + res_list = [] + res1 = conv2d( + input, + nf, + kw, + 2, + 0.02, + 1, + name=name + ".model0.0", + activation_fn='leaky_relu', + relufactor=0.2, + use_bias=True) + d_dims = d_base_dims + res_list.append(res1) + for i in range(1, d_nlayers): + conv_name = name + ".model{}.0.0".format(i) + nf = min(nf*2, 512) + stride = 1 if i == d_nlayers - 1 else 2 + dis_output = conv2d_spectral_norm( + res_list[-1], + nf, + kw, + stride, + 0.02, + 1, + name=conv_name, + norm=norm_type, + activation_fn='leaky_relu', + relufactor=0.2, + use_bias=False, norm_affine=False) + res_list.append(dis_output) + o_c4 = conv2d( + res_list[-1], + 1, + 4, + 1, + 0.02, + 1, + name + ".model{}.0".format(d_nlayers), + use_bias=True) + res_list.append(o_c4) + return res_list + diff --git a/PaddleCV/PaddleGAN/network/base_network.py b/PaddleCV/PaddleGAN/network/base_network.py index 6141811e..55622f4c 100644 --- a/PaddleCV/PaddleGAN/network/base_network.py +++ b/PaddleCV/PaddleGAN/network/base_network.py @@ -34,12 +34,18 @@ def cal_padding(img_size, stride, filter_size, dilation=1): return out_size // 2, out_size - out_size // 2 -def norm_layer(input, norm_type='batch_norm', name=None, is_test=False): +def norm_layer(input, norm_type='batch_norm', name=None, is_test=False, affine=True): if norm_type == 'batch_norm': - param_attr = fluid.ParamAttr( - name=name + '_w', initializer=fluid.initializer.Constant(1.0)) - bias_attr = fluid.ParamAttr( - name=name + '_b', initializer=fluid.initializer.Constant(value=0.0)) + if affine == True: + param_attr = fluid.ParamAttr( + name=name + '_w', initializer=fluid.initializer.Constant(1.0)) + bias_attr = fluid.ParamAttr( + name=name + '_b', initializer=fluid.initializer.Constant(value=0.0)) + else: + param_attr = fluid.ParamAttr( + name=name + '_w', initializer=fluid.initializer.Constant(1.0), trainable=False) + bias_attr = fluid.ParamAttr( + name=name + '_b', initializer=fluid.initializer.Constant(value=0.0), trainable=False) return fluid.layers.batch_norm( input, param_attr=param_attr, @@ -58,14 +64,24 @@ def norm_layer(input, norm_type='batch_norm', name=None, is_test=False): if name is not None: scale_name = name + "_scale" offset_name = name + "_offset" - scale_param = fluid.ParamAttr( - name=scale_name, - initializer=fluid.initializer.Constant(1.0), - trainable=True) - offset_param = fluid.ParamAttr( - name=offset_name, - initializer=fluid.initializer.Constant(0.0), - trainable=True) + if affine: + scale_param = fluid.ParamAttr( + name=scale_name, + initializer=fluid.initializer.Constant(1.0), + trainable=True) + offset_param = fluid.ParamAttr( + name=offset_name, + initializer=fluid.initializer.Constant(0.0), + trainable=True) + else: + scale_param = fluid.ParamAttr( + name=scale_name, + initializer=fluid.initializer.Constant(1.0), + trainable=False) + offset_param = fluid.ParamAttr( + name=offset_name, + initializer=fluid.initializer.Constant(0.0), + trainable=False) scale = helper.create_parameter( attr=scale_param, shape=input.shape[1:2], dtype=dtype) offset = helper.create_parameter( @@ -375,3 +391,149 @@ def conv_and_pool(x, num_filters, name, stddev=0.02, act=None): bias_attr=bias_attr, act=act) return out +def conv2d_spectral_norm(input, + num_filters=64, + filter_size=7, + stride=1, + stddev=0.02, + padding=0, + name="conv2d_spectral_norm", + norm=None, + activation_fn=None, + relufactor=0.0, + use_bias=False, + padding_type=None, + initial="normal", + is_test=False, norm_affine=True): + b, c, h, w = input.shape + height = num_filters + width = c * filter_size * filter_size + helper = fluid.layer_helper.LayerHelper("conv2d_spectral_norm", **locals()) + dtype = helper.input_dtype() + weight_param = fluid.ParamAttr( + name=name+".weight_orig", + initializer=fluid.initializer.Constant(1.0), + trainable=True) + weight = helper.create_parameter( + attr=weight_param, shape=(num_filters, c, filter_size, filter_size), dtype=dtype) + weight_spectral_norm = fluid.layers.spectral_norm(weight, dim=0, name=name+".spectral_norm") + weight = weight_spectral_norm + if use_bias: + bias_attr = fluid.ParamAttr( + name=name + "_b", initializer=fluid.initializer.Constant(0.0)) + else: + bias_attr = False + conv = conv2d_with_filter(input, weight, stride, padding, bias_attr=bias_attr, name=name) + if norm is not None: + conv = norm_layer( + input=conv, norm_type=norm, name=name + "_norm", is_test=is_test, affine=norm_affine) + if activation_fn == 'relu': + conv = fluid.layers.relu(conv, name=name + '_relu') + elif activation_fn == 'leaky_relu': + conv = fluid.layers.leaky_relu( + conv, alpha=relufactor, name=name + '_leaky_relu') + elif activation_fn == 'tanh': + conv = fluid.layers.tanh(conv, name=name + '_tanh') + elif activation_fn == 'sigmoid': + conv = fluid.layers.sigmoid(conv, name=name + '_sigmoid') + elif activation_fn == None: + conv = conv + else: + raise NotImplementedError("activation: [%s] is not support" % + activation_fn) + return conv + + +def conv2d_with_filter(input, + filter, + stride=1, + padding=0, + dilation=1, + groups=None, + bias_attr=None, + use_cudnn=True, + act=None, + name=None): + """ + Similar with conv2d, this is a convolution2D layers. Difference + is filter can be token as input directly instead of setting filter size + and number of fliters. Filter is a 4-D tensor with shape + [num_filter, num_channel, filter_size_h, filter_size_w]. + Args: + input (Variable): The input image with [N, C, H, W] format. + filter(Variable): The input filter with [N, C, H, W] format. + stride (int|tuple): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. Default: stride = 1. + padding (int|tuple): The padding size. If padding is a tuple, it must + contain two integers, (padding_H, padding_W). Otherwise, the + padding_H = padding_W = padding. Default: padding = 0. + dilation (int|tuple): The dilation size. If dilation is a tuple, it must + contain two integers, (dilation_H, dilation_W). Otherwise, the + dilation_H = dilation_W = dilation. Default: dilation = 1. + bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv2d. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv2d + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + act (str): Activation type, if it is set to None, activation is not appended. + Default: None + name (str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Default: None + Returns: + Variable: The tensor variable storing the convolution and \ + non-linearity activation result. + Raises: + ValueError: If the shapes of input, filter_size, stride, padding and + groups mismatch. + Examples: + .. code-block:: python + data = fluid.layers.data(name='data', shape=[3, 32, 32], \ + dtype='float32') + filter = fluid.layers.data(name='filter',shape=[10,3,3,3], \ + dtype='float32',append_batch_size=False) + conv2d = fluid.layers.conv2d(input=data, + filter=filter, + act="relu") + """ + helper = fluid.layer_helper.LayerHelper("conv2d_with_filter", **locals()) + num_channels = input.shape[1] + num_filters = filter.shape[0] + num_filter_channels = filter.shape[1] + l_type = 'conv2d' + if (num_channels == groups and num_filters % num_channels == 0 and + not use_cudnn): + l_type = 'depthwise_conv2d' + if groups is None: + assert num_filter_channels == num_channels + else: + if num_channels % groups != 0: + raise ValueError("num_channels must be divisible by groups.") + if num_channels // groups != num_filter_channels: + raise ValueError("num_filter_channels must equal to num_channels\ + divided by groups.") + stride = fluid.layers.utils.convert_to_list(stride, 2, 'stride') + padding = fluid.layers.utils.convert_to_list(padding, 2, 'padding') + dilation = fluid.layers.utils.convert_to_list(dilation, 2, 'dilation') + if not isinstance(use_cudnn, bool): + raise ValueError("use_cudnn should be True or False") + pre_bias = helper.create_variable_for_type_inference(dtype=input.dtype) + helper.append_op( + type=l_type, + inputs={ + 'Input': input, + 'Filter': filter, + }, + outputs={"Output": pre_bias}, + attrs={ + 'strides': stride, + 'paddings': padding, + 'dilations': dilation, + 'groups': groups, + 'use_cudnn': use_cudnn, + 'use_mkldnn': False + }) + pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2) + return helper.append_activation(pre_act) diff --git a/PaddleCV/PaddleGAN/network/vgg.py b/PaddleCV/PaddleGAN/network/vgg.py new file mode 100644 index 00000000..0ae578fc --- /dev/null +++ b/PaddleCV/PaddleGAN/network/vgg.py @@ -0,0 +1,97 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle +import paddle.fluid as fluid + +__all__ = ["VGGNet", "VGG11", "VGG13", "VGG16", "VGG19"] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class VGGNet(): + def __init__(self, layers=16, name=""): + self.params = train_parameters + self.layers = layers + self.name=name + + def net(self, input, class_dim=1000): + layers = self.layers + vgg_spec = { + 11: ([1, 1, 2, 2, 2]), + 13: ([2, 2, 2, 2, 2]), + 16: ([2, 2, 3, 3, 3]), + 19: ([2, 2, 4, 4, 4]) + } + assert layers in vgg_spec.keys(), \ + "supported layers are {} but input layer is {}".format(vgg_spec.keys(), layers) + + nums = vgg_spec[layers] + conv1, res = self.conv_block(input, 64, nums[0], name=self.name+"_conv1_") + conv2, res = self.conv_block(res, 128, nums[1], name=self.name+"_conv2_") + conv3, res = self.conv_block(res, 256, nums[2], name=self.name+"_conv3_") + conv4, res = self.conv_block(res, 512, nums[3], name=self.name+"_conv4_") + conv5, res = self.conv_block(res, 512, nums[4], name=self.name+"_conv5_") + + if self.layers == 16: + return [conv1, conv2, conv3] + elif self.layers == 19: + return [conv1, conv2, conv3, conv4, conv5] + + def conv_block(self, input, num_filter, groups, name=""): + conv = input + for i in range(groups): + conv = fluid.layers.conv2d( + input=conv, + num_filters=num_filter, + filter_size=3, + stride=1, + padding=1, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + name=name + str(i + 1) + "_weights", trainable=False), + bias_attr=False + ) + if i == 0: + relu_res = conv + return relu_res, fluid.layers.pool2d( + input=conv, pool_size=2, pool_type='max', pool_stride=2) + + + def load_vars(self, exe, program, pretrained_model): + vars = [] + for var in program.list_vars(): + if fluid.io.is_parameter(var) and var.name.startswith("vgg"): + vars.append(var) + print(var.name) + fluid.io.load_vars(exe, pretrained_model, program, vars) + + +def VGG11(): + model = VGGNet(layers=11) + return model + + +def VGG13(): + model = VGGNet(layers=13) + return model + + +def VGG16(): + model = VGGNet(layers=16, name="vgg16") + return model + + +def VGG19(name="vgg19"): + model = VGGNet(layers=19, name=name) + return model diff --git a/PaddleCV/PaddleGAN/scripts/run_SPADE.sh b/PaddleCV/PaddleGAN/scripts/run_SPADE.sh new file mode 100644 index 00000000..3833dc7d --- /dev/null +++ b/PaddleCV/PaddleGAN/scripts/run_SPADE.sh @@ -0,0 +1,4 @@ +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_fraction_of_gpu_memory_to_use=0.01 +CUDA_VISIBLE_DEVICES=0 python train.py --model_net SPADE --dataset cityscapes --train_list train_list --test_list val_list --crop_type Random --batch_size 1 --epoch 200 --load_height 612 --load_width 1124 --crop_height 512 --crop_width 1024 --label_nc 36 diff --git a/PaddleCV/PaddleGAN/train.py b/PaddleCV/PaddleGAN/train.py index b40ffa1b..be10d4ac 100644 --- a/PaddleCV/PaddleGAN/train.py +++ b/PaddleCV/PaddleGAN/train.py @@ -30,7 +30,7 @@ import trainer def train(cfg): MODELS = [ - "CGAN", "DCGAN", "Pix2pix", "CycleGAN", "StarGAN", "AttGAN", "STGAN" + "CGAN", "DCGAN", "Pix2pix", "CycleGAN", "StarGAN", "AttGAN", "STGAN", "SPADE" ] if cfg.model_net not in MODELS: raise NotImplementedError("{} is not support!".format(cfg.model_net)) diff --git a/PaddleCV/PaddleGAN/trainer/SPADE.py b/PaddleCV/PaddleGAN/trainer/SPADE.py new file mode 100644 index 00000000..7ba90fae --- /dev/null +++ b/PaddleCV/PaddleGAN/trainer/SPADE.py @@ -0,0 +1,386 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from network.SPADE_network import SPADE_model +from util import utility +import paddle.fluid as fluid +import sys +import time +import network.vgg as vgg +import pickle as pkl +import numpy as np +class GTrainer(): + def __init__(self, input_label, input_img, input_ins, cfg, step_per_epoch): + self.cfg = cfg + self.program = fluid.default_main_program().clone() + with fluid.program_guard(self.program): + model = SPADE_model() + input = input_label + if not cfg.no_instance: + input = fluid.layers.concat([input_label, input_ins], 1) + self.fake_B = model.network_G(input, "generator", cfg=cfg) + self.fake_B.persistable = True + self.infer_program = self.program.clone() + fake_concat = fluid.layers.concat([input, self.fake_B], 1) + real_concat = fluid.layers.concat([input, input_img], 1) + fake_and_real = fluid.layers.concat([fake_concat, real_concat], 0) + pred = model.network_D(fake_and_real, "discriminator", cfg) + if type(pred) == list: + self.pred_fake = [] + self.pred_real = [] + for p in pred: + self.pred_fake.append([tensor[:tensor.shape[0] // 2] for tensor in p]) + self.pred_real.append([tensor[tensor.shape[0] // 2:] for tensor in p]) + else: + self.pred_fake = pred[:pred.shape[0] // 2] + self.pred_real = pred[pred.shape[0] // 2:] + + ###GAN Loss hinge + if isinstance(self.pred_fake, list): + self.gan_loss = 0 + for pred_i in self.pred_fake: + if isinstance(pred_i, list): + pred_i = pred_i[-1] + loss_i = -1 * fluid.layers.reduce_mean(pred_i) + self.gan_loss += loss_i + self.gan_loss /= len(self.pred_fake) + else: + self.gan_loss = -1 * fluid.layers.reduce_mean(self.pred_fake) + self.gan_loss.persistable = True + #####GAN Feat loss + num_D = len(self.pred_fake) + self.gan_feat_loss = 0.0 + for i in range(num_D): + num_intermediate_outputs = len(self.pred_fake[i]) - 1 + for j in range(num_intermediate_outputs): + self.gan_feat_loss = fluid.layers.reduce_mean(fluid.layers.abs(fluid.layers.elementwise_sub( + x=self.pred_fake[i][j], y=self.pred_real[i][j]))) * cfg.lambda_feat / num_D + self.gan_feat_loss.persistable = True + ########VGG Feat loss + weights = [1.0/32, 1.0/16, 1.0/8, 1.0/4, 1.0] + self.vgg = vgg.VGG19() + fake_vgg = self.vgg.net(self.fake_B) + real_vgg = self.vgg.net(input_img) + self.vgg_loss = 0.0 + for i in range(len(fake_vgg)): + self.vgg_loss += weights[i] * fluid.layers.reduce_mean(fluid.layers.abs(fluid.layers.elementwise_sub( + x=fake_vgg[i], y=real_vgg[i]))) + self.vgg_loss.persistable = True + self.g_loss = (self.gan_loss + self.gan_feat_loss + self.vgg_loss)/3 + lr = cfg.learning_rate + vars = [] + for var in self.program.list_vars(): + if fluid.io.is_parameter(var) and var.name.startswith( + "generator"): + vars.append(var.name) + self.param = vars + if cfg.epoch <= 100: + optimizer = fluid.optimizer.Adam( + learning_rate=lr, beta1=0.5, beta2=0.999, name="net_G") + else: + optimizer = fluid.optimizer.Adam( + learning_rate=fluid.layers.piecewise_decay( + boundaries=[99 * step_per_epoch] + [ + x * step_per_epoch + for x in range(100, cfg.epoch - 1) + ], + values=[lr] + [ + lr * (1.0 - (x - 99.0) / 101.0) + for x in range(100, cfg.epoch) + ]), + beta1=0.5, + beta2=0.999, + name="net_G") + optimizer.minimize(self.g_loss, parameter_list=vars) + + +class DTrainer(): + def __init__(self, input_label, input_img, input_ins, fake_B, cfg, step_per_epoch): + self.program = fluid.default_main_program().clone() + lr = cfg.learning_rate + with fluid.program_guard(self.program): + model = SPADE_model() + input = input_label + if not cfg.no_instance: + input = fluid.layers.concat([input_label, input_ins], 1) + fake_concat = fluid.layers.concat([input, fake_B], 1) + real_concat = fluid.layers.concat([input, input_img], 1) + fake_and_real = fluid.layers.concat([fake_concat, real_concat], 0) + pred = model.network_D(fake_and_real, "discriminator", cfg) + if type(pred) == list: + self.pred_fake = [] + self.pred_real = [] + for p in pred: + self.pred_fake.append([tensor[:tensor.shape[0] // 2] for tensor in p]) + self.pred_real.append([tensor[tensor.shape[0] // 2:] for tensor in p]) + else: + self.pred_fake = pred[:pred.shape[0] // 2] + self.pred_real = pred[pred.shape[0] // 2:] + + #####gan loss + self.gan_loss_fake = 0 + for pred_i in self.pred_fake: + zeros = fluid.layers.fill_constant_batch_size_like(input=pred_i[-1],shape=pred_i[-1].shape,value=0,dtype='float32') + if isinstance(pred_i, list): + pred_i = pred_i[-1] + minval = fluid.layers.elementwise_min(-1 * pred_i-1, zeros) + loss_i = -1 * fluid.layers.reduce_mean(minval) + self.gan_loss_fake += loss_i + self.gan_loss_fake /= len(self.pred_fake) + + self.gan_loss_real = 0 + for pred_i in self.pred_real: + zeros = fluid.layers.fill_constant_batch_size_like(input=pred_i[-1],shape=pred_i[-1].shape,value=0,dtype='float32') + if isinstance(pred_i, list): + pred_i = pred_i[-1] + minval = fluid.layers.elementwise_min(pred_i-1, zeros) + loss_i = -1 * fluid.layers.reduce_mean(minval) + self.gan_loss_real += loss_i + self.gan_loss_real /= len(self.pred_real) + self.gan_loss_real.persistable = True + self.gan_loss_fake.persistable = True + + self.d_loss = 0.5 * (self.gan_loss_real + self.gan_loss_fake) + vars = [] + for var in self.program.list_vars(): + if fluid.io.is_parameter(var) and var.name.startswith( + "discriminator"): + vars.append(var.name) + self.param = vars + if cfg.epoch <= 100: + optimizer = fluid.optimizer.Adam( + learning_rate=lr, beta1=0.5, beta2=0.999, name="net_D") + else: + optimizer = fluid.optimizer.Adam( + learning_rate=fluid.layers.piecewise_decay( + boundaries=[99 * step_per_epoch] + [ + x * step_per_epoch + for x in range(100, cfg.epoch - 1) + ], + values=[lr] + [ + lr * (1.0 - (x - 99.0) / 101.0) + for x in range(100, cfg.epoch) + ]), + beta1=0.5, + beta2=0.999, + name="net_D") + + optimizer.minimize(self.d_loss, parameter_list=vars) + + +class SPADE(object): + def add_special_args(self, parser): + parser.add_argument( + '--vgg19_pretrain', + type=str, + default="./VGG19_pretrained", + help="VGG19 pretrained model for vgg loss" + ) + parser.add_argument( + '--crop_width', + type=int, + default=1024, + help="crop width for training SPADE") + parser.add_argument( + '--crop_height', + type=int, + default=512, + help="crop height for training SPADE") + parser.add_argument( + '--load_width', + type=int, + default=1124, + help="load width for training SPADE") + parser.add_argument( + '--load_height', + type=int, + default=612, + help="load height for training SPADE") + parser.add_argument( + '--d_nlayers', + type=int, + default=4, + help="num of discriminator layers for SPADE") + parser.add_argument( + '--label_nc', + type=int, + default=36, + help="label numbers of SPADE") + parser.add_argument( + '--ngf', + type=int, + default=64, + help="base channels of generator in SPADE") + parser.add_argument( + '--ndf', + type=int, + default=64, + help="base channels of discriminator in SPADE") + parser.add_argument( + '--num_D', + type=int, + default=2, + help="number of discriminators in SPADE") + parser.add_argument( + '--lambda_feat', + type=float, + default=10, + help="weight term of feature loss") + parser.add_argument( + '--lambda_vgg', + type=float, + default=10, + help="weight term of vgg loss") + parser.add_argument('--no_instance', type=bool, default=False, help="Whether to use instance label.") + + return parser + + def __init__(self, + cfg=None, + train_reader=None, + test_reader=None, + batch_num=1): + self.cfg = cfg + self.train_reader = train_reader + self.test_reader = test_reader + self.batch_num = batch_num + + def build_model(self): + data_shape = [-1, 3, self.cfg.crop_height, self.cfg.crop_width] + label_shape = [-1, self.cfg.label_nc, self.cfg.crop_height, self.cfg.crop_width] + edge_shape = [-1, 1, self.cfg.crop_height, self.cfg.crop_width] + + input_A = fluid.layers.data( + name='input_label', shape=label_shape, dtype='float32') + input_B = fluid.layers.data( + name='input_img', shape=data_shape, dtype='float32') + input_C = fluid.layers.data( + name='input_ins', shape=edge_shape, dtype='float32') + input_fake = fluid.layers.data( + name='input_fake', shape=data_shape, dtype='float32') + + gen_trainer = GTrainer(input_A, input_B, input_C, self.cfg, self.batch_num) + dis_trainer = DTrainer(input_A, input_B, input_C, input_fake, self.cfg, + self.batch_num) + py_reader = fluid.io.PyReader( + feed_list=[input_A, input_B, input_C], + capacity=4, ## batch_size * 4 + iterable=True, + use_double_buffer=True) + py_reader.decorate_batch_generator( + self.train_reader, + places=fluid.cuda_places() + if self.cfg.use_gpu else fluid.cpu_places()) + + # prepare environment + place = fluid.CUDAPlace(0) if self.cfg.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + gen_trainer.vgg.load_vars(exe, gen_trainer.program, self.cfg.vgg19_pretrain) + + if self.cfg.init_model: + utility.init_checkpoints(self.cfg, exe, gen_trainer, "net_G") + utility.init_checkpoints(self.cfg, exe, dis_trainer, "net_D") + + ### memory optim + build_strategy = fluid.BuildStrategy() + build_strategy.enable_inplace = False + build_strategy.sync_batch_norm = True + + gen_trainer_program = fluid.CompiledProgram( + gen_trainer.program).with_data_parallel( + loss_name=gen_trainer.g_loss.name, + build_strategy=build_strategy) + dis_trainer_program = fluid.CompiledProgram( + dis_trainer.program).with_data_parallel( + loss_name=dis_trainer.d_loss.name, + build_strategy=build_strategy) + + t_time = 0 + + for epoch_id in range(self.cfg.epoch): + batch_id = 0 + for tensor in py_reader(): + data_A, data_B, data_C = tensor[0]['input_A'], tensor[0]['input_B'], tensor[0]['input_C'] + tensor_A = fluid.LoDTensor() + tensor_B = fluid.LoDTensor() + tensor_C = fluid.LoDTensor() + tensor_A.set(data_A, place) + tensor_B.set(data_B, place) + tensor_C.set(data_C, place) + s_time = time.time() + # optimize the generator network + g_loss_gan, g_loss_vgg, g_loss_feat, fake_B_tmp = exe.run( + gen_trainer_program, + fetch_list=[ + gen_trainer.gan_loss, gen_trainer.vgg_loss, gen_trainer.gan_feat_loss, + gen_trainer.fake_B + ], + feed={"input_label": tensor_A, + "input_img": tensor_B, + "input_ins": tensor_C}) + + # optimize the discriminator network + d_loss_real, d_loss_fake = exe.run(dis_trainer_program, + fetch_list=[ + dis_trainer.gan_loss_real, + dis_trainer.gan_loss_fake + ], + feed={ + "input_label": tensor_A, + "input_img": tensor_B, + "input_ins": tensor_C, + "input_fake": fake_B_tmp + }) + + batch_time = time.time() - s_time + t_time += batch_time + if batch_id % self.cfg.print_freq == 0: + print("epoch{}: batch{}: \n\ + g_loss_gan: {}; g_loss_vgg: {}; g_loss_feat: {} \n\ + d_loss_real: {}; d_loss_fake: {}; \n\ + Batch_time_cost: {:.2f}" + .format(epoch_id, batch_id, g_loss_gan[0], g_loss_vgg[ + 0], g_loss_feat[0], d_loss_real[0], d_loss_fake[0], batch_time)) + + sys.stdout.flush() + batch_id += 1 + + if self.cfg.run_test: + test_program = gen_trainer.infer_program + image_name = fluid.layers.data( + name='image_name', + shape=[self.cfg.batch_size], + dtype="int32") + test_py_reader = fluid.io.PyReader( + feed_list=[input_A, input_B, image_name], + capacity=4, ## batch_size * 4 + iterable=True, + use_double_buffer=True) + test_py_reader.decorate_batch_generator( + self.test_reader, + places=fluid.cuda_places() + if self.cfg.use_gpu else fluid.cpu_places()) + utility.save_test_image(epoch_id, self.cfg, exe, place, + test_program, gen_trainer, + test_py_reader) + + if self.cfg.save_checkpoints: + utility.checkpoints(epoch_id, self.cfg, exe, gen_trainer, + "net_G") + utility.checkpoints(epoch_id, self.cfg, exe, dis_trainer, + "net_D") diff --git a/PaddleCV/PaddleGAN/util/utility.py b/PaddleCV/PaddleGAN/util/utility.py index 36bce229..74738154 100644 --- a/PaddleCV/PaddleGAN/util/utility.py +++ b/PaddleCV/PaddleGAN/util/utility.py @@ -170,6 +170,30 @@ def save_test_image(epoch, res_inputB = Image.fromarray(((input_B_temp + 1) * 127.5).astype( np.uint8)) res_inputB.save(os.path.join(out_path, inputB_name)) + elif cfg.model_net == "SPADE": + for data in A_test_reader(): + data_A, data_B, data_C, name = data[0]['input_A'], data[0]['input_B'], data[0]['input_C'], data[0]['image_name'] + tensor_A = fluid.LoDTensor() + tensor_B = fluid.LoDTensor() + tensor_C = fluid.LoDTensor() + tensor_A.set(data_A, place) + tensor_B.set(data_B, place) + tensor_C.set(data_C, place) + fake_B_temp = exe.run( + test_program, + fetch_list=[g_trainer.fake_B], + feed={"input_label": tensor_A, + "input_img": tensor_B, + "input_ins": tensor_C}) + fake_B_temp = np.squeeze(fake_B_temp[0]).transpose([1, 2, 0]) + input_B_temp = np.squeeze(data_B[0]).transpose([1, 2, 0]) + + res_fakeB = Image.fromarray(((fake_B_temp + 1) * 127.5).astype( + np.uint8)) + res_fakeB.save(out_path+"/fakeB_"+str(epoch)+"_"+name) + res_real = Image.fromarray(((input_B_temp + 1) * 127.5).astype( + np.uint8)) + res_real.save(out_path+"/real_"+str(epoch)+"_"+name) elif cfg.model_net == "StarGAN": for data in A_test_reader(): real_img, label_org, label_trg, image_name = data[0][ -- GitLab