From b4d5c316edf3ac637166f0f7a7377baebcc2bd6a Mon Sep 17 00:00:00 2001 From: hypox64 Date: Tue, 14 May 2019 01:32:04 +0800 Subject: [PATCH] Update to Pytorch 1.0 --- CleanMosaic.py | 2 +- README.md | 8 +- models/loadmodel.py | 7 +- models/pix2pix_model.py | 625 ++++++++++++++++++++------------- models/unet_model.py | 2 +- models/unet_parts.py | 9 +- options/addmosaic_options.py | 2 +- options/cleanmosaic_options.py | 2 +- util/image_processing.py | 4 +- util/mosaic.py | 13 +- 10 files changed, 416 insertions(+), 258 deletions(-) diff --git a/CleanMosaic.py b/CleanMosaic.py index c710e3f..7a41bab 100755 --- a/CleanMosaic.py +++ b/CleanMosaic.py @@ -17,7 +17,7 @@ opt = CleanOptions().getparse() def get_mosaic_position(img_origin): mask =runmodel.run_unet_rectim(img_origin,net_mosaic_pos,use_gpu = opt.use_gpu) mask = impro.mask_threshold(mask,10,128) - x,y,size,area = impro.boundingSquare(mask,threshold=128,Ex_mul=1.5) + x,y,size,area = impro.boundingSquare(mask,Ex_mul=1.5) rat = min(img_origin.shape[:2])/128.0 x,y,size = int(rat*x),int(rat*y),int(rat*size) return x,y,size diff --git a/README.md b/README.md index 53eeaaa..d3a2081 100755 --- a/README.md +++ b/README.md @@ -10,9 +10,9 @@ The code do not include the part of training, I will finish it in my free time. ## Prerequisites - Linux, (I didn't try this code on Windows or Mac OS) -- Python 3.5+ +- Python 3.6+ - ffmpeg -- Pytorch 0.4 [(Pytorch 1.0+ is available)](https://github.com/HypoX64/DeepMosaics) +- Pytorch 1.0+ [(Old version codes)](https://github.com/HypoX64/DeepMosaics/tree/Pytorch0.4) - CPU or NVIDIA GPU + CUDA CuDNN ## Getting Started @@ -23,8 +23,8 @@ cd DeepMosaics ``` ### Get pre_trained models and test video You can download pre_trained models and test video and replace the files in the project.
-[[Google Drive]](https://drive.google.com/open?id=1PXt3dE9Eez2xUqpemLJutwTCC0tW-D2g) - [[百度云,提取码z8vz]](https://pan.baidu.com/s/1Wi8T6PE4ExTjrHVQhv3rJA) +[[Google Drive]](https://drive.google.com/open?id=10nARsiZoZGcaKw40nQu9fJuRp1oeabPs) + [[百度云,提取码7thu]](https://pan.baidu.com/s/1IG4bdIiIC9PH9-oEyae5Sg) ### Dependencies This code depends on numpy, scipy, opencv-python, torchvision, available via pip install. diff --git a/models/loadmodel.py b/models/loadmodel.py index 85aa768..c87ff80 100755 --- a/models/loadmodel.py +++ b/models/loadmodel.py @@ -4,9 +4,7 @@ from .unet_model import UNet def pix2pix(model_path,G_model_type,use_gpu = True): gpu_ids=[] - if use_gpu: - gpu_ids=[0] - netG = define_G(3, 3, 64, G_model_type, norm='instance', init_type='normal', gpu_ids=gpu_ids) + netG = define_G(3, 3, 64, G_model_type, norm='batch', init_type='normal', gpu_ids=gpu_ids) netG.load_state_dict(torch.load(model_path)) netG.eval() if use_gpu: @@ -20,6 +18,3 @@ def unet(model_path,use_gpu = True): if use_gpu: net.cuda() return net - - -# def unet(): \ No newline at end of file diff --git a/models/pix2pix_model.py b/models/pix2pix_model.py index b118c6a..3d5db1d 100755 --- a/models/pix2pix_model.py +++ b/models/pix2pix_model.py @@ -2,240 +2,361 @@ import torch import torch.nn as nn from torch.nn import init import functools -from torch.autograd import Variable from torch.optim import lr_scheduler + + ############################################################################### -# Functions +# Helper Functions ############################################################################### -def weights_init_normal(m): - classname = m.__class__.__name__ - # print(classname) - if classname.find('Conv') != -1: - init.normal(m.weight.data, 0.0, 0.02) - elif classname.find('Linear') != -1: - init.normal(m.weight.data, 0.0, 0.02) - elif classname.find('BatchNorm2d') != -1: - init.normal(m.weight.data, 1.0, 0.02) - init.constant(m.bias.data, 0.0) - - -def weights_init_xavier(m): - classname = m.__class__.__name__ - # print(classname) - if classname.find('Conv') != -1: - init.xavier_normal(m.weight.data, gain=0.02) - elif classname.find('Linear') != -1: - init.xavier_normal(m.weight.data, gain=0.02) - elif classname.find('BatchNorm2d') != -1: - init.normal(m.weight.data, 1.0, 0.02) - init.constant(m.bias.data, 0.0) - - -def weights_init_kaiming(m): - classname = m.__class__.__name__ - # print(classname) - if classname.find('Conv') != -1: - init.kaiming_normal(m.weight.data, a=0, mode='fan_in') - elif classname.find('Linear') != -1: - init.kaiming_normal(m.weight.data, a=0, mode='fan_in') - elif classname.find('BatchNorm2d') != -1: - init.normal(m.weight.data, 1.0, 0.02) - init.constant(m.bias.data, 0.0) - - -def weights_init_orthogonal(m): - classname = m.__class__.__name__ - print(classname) - if classname.find('Conv') != -1: - init.orthogonal(m.weight.data, gain=1) - elif classname.find('Linear') != -1: - init.orthogonal(m.weight.data, gain=1) - elif classname.find('BatchNorm2d') != -1: - init.normal(m.weight.data, 1.0, 0.02) - init.constant(m.bias.data, 0.0) - - -def init_weights(net, init_type='normal'): - print('initialization method [%s]' % init_type) - if init_type == 'normal': - net.apply(weights_init_normal) - elif init_type == 'xavier': - net.apply(weights_init_xavier) - elif init_type == 'kaiming': - net.apply(weights_init_kaiming) - elif init_type == 'orthogonal': - net.apply(weights_init_orthogonal) - else: - raise NotImplementedError('initialization method [%s] is not implemented' % init_type) +class Identity(nn.Module): + def forward(self, x): + return x def get_norm_layer(norm_type='instance'): + """Return a normalization layer + + Parameters: + norm_type (str) -- the name of the normalization layer: batch | instance | none + + For BatchNorm, we use learnable affine parameters and track running statistics (mean/stddev). + For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics. + """ if norm_type == 'batch': - norm_layer = functools.partial(nn.BatchNorm2d, affine=True) + norm_layer = functools.partial(nn.BatchNorm2d, affine=True, track_running_stats=True) elif norm_type == 'instance': - norm_layer = functools.partial(nn.InstanceNorm2d, affine=False) + norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False) elif norm_type == 'none': - norm_layer = None + norm_layer = lambda x: Identity() else: raise NotImplementedError('normalization layer [%s] is not found' % norm_type) return norm_layer def get_scheduler(optimizer, opt): - if opt.lr_policy == 'lambda': + """Return a learning rate scheduler + + Parameters: + optimizer -- the optimizer of the network + opt (option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions.  + opt.lr_policy is the name of learning rate policy: linear | step | plateau | cosine + + For 'linear', we keep the same learning rate for the first epochs + and linearly decay the rate to zero over the next epochs. + For other schedulers (step, plateau, and cosine), we use the default PyTorch schedulers. + See https://pytorch.org/docs/stable/optim.html for more details. + """ + if opt.lr_policy == 'linear': def lambda_rule(epoch): - lr_l = 1.0 - max(0, epoch + 1 + opt.epoch_count - opt.niter) / float(opt.niter_decay + 1) + lr_l = 1.0 - max(0, epoch + opt.epoch_count - opt.niter) / float(opt.niter_decay + 1) return lr_l scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) elif opt.lr_policy == 'step': scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1) elif opt.lr_policy == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5) + elif opt.lr_policy == 'cosine': + scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=opt.niter, eta_min=0) else: return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy) return scheduler -def define_G(input_nc, output_nc, ngf, which_model_netG, norm='batch', use_dropout=False, init_type='normal', gpu_ids=[]): - netG = None - use_gpu = len(gpu_ids) > 0 - norm_layer = get_norm_layer(norm_type=norm) - - if use_gpu: +def init_weights(net, init_type='normal', init_gain=0.02): + """Initialize network weights. + + Parameters: + net (network) -- network to be initialized + init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal + init_gain (float) -- scaling factor for normal, xavier and orthogonal. + + We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might + work better for some applications. Feel free to try yourself. + """ + def init_func(m): # define the initialization function + classname = m.__class__.__name__ + if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): + if init_type == 'normal': + init.normal_(m.weight.data, 0.0, init_gain) + elif init_type == 'xavier': + init.xavier_normal_(m.weight.data, gain=init_gain) + elif init_type == 'kaiming': + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif init_type == 'orthogonal': + init.orthogonal_(m.weight.data, gain=init_gain) + else: + raise NotImplementedError('initialization method [%s] is not implemented' % init_type) + if hasattr(m, 'bias') and m.bias is not None: + init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm2d') != -1: # BatchNorm Layer's weight is not a matrix; only normal distribution applies. + init.normal_(m.weight.data, 1.0, init_gain) + init.constant_(m.bias.data, 0.0) + + print('initialize network with %s' % init_type) + net.apply(init_func) # apply the initialization function + + +def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=[]): + """Initialize a network: 1. register CPU/GPU device (with multi-GPU support); 2. initialize the network weights + Parameters: + net (network) -- the network to be initialized + init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal + gain (float) -- scaling factor for normal, xavier and orthogonal. + gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + + Return an initialized network. + """ + if len(gpu_ids) > 0: assert(torch.cuda.is_available()) + net.to(gpu_ids[0]) + net = torch.nn.DataParallel(net, gpu_ids) # multi-GPUs + init_weights(net, init_type, init_gain=init_gain) + return net - if which_model_netG == 'resnet_9blocks': - netG = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=9, gpu_ids=gpu_ids) - elif which_model_netG == 'resnet_6blocks': - netG = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=6, gpu_ids=gpu_ids) - elif which_model_netG == 'unet_128': - netG = UnetGenerator(input_nc, output_nc, 7, ngf, norm_layer=norm_layer, use_dropout=use_dropout, gpu_ids=gpu_ids) - elif which_model_netG == 'unet_256': - netG = UnetGenerator(input_nc, output_nc, 8, ngf, norm_layer=norm_layer, use_dropout=use_dropout, gpu_ids=gpu_ids) - else: - raise NotImplementedError('Generator model name [%s] is not recognized' % which_model_netG) - if len(gpu_ids) > 0: - netG.cuda(gpu_ids[0]) - init_weights(netG, init_type=init_type) - return netG + +def define_G(input_nc, output_nc, ngf, netG, norm='batch', use_dropout=False, init_type='normal', init_gain=0.02, gpu_ids=[]): + """Create a generator + + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + ngf (int) -- the number of filters in the last conv layer + netG (str) -- the architecture's name: resnet_9blocks | resnet_6blocks | unet_256 | unet_128 + norm (str) -- the name of normalization layers used in the network: batch | instance | none + use_dropout (bool) -- if use dropout layers. + init_type (str) -- the name of our initialization method. + init_gain (float) -- scaling factor for normal, xavier and orthogonal. + gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + + Returns a generator + + Our current implementation provides two types of generators: + U-Net: [unet_128] (for 128x128 input images) and [unet_256] (for 256x256 input images) + The original U-Net paper: https://arxiv.org/abs/1505.04597 + + Resnet-based generator: [resnet_6blocks] (with 6 Resnet blocks) and [resnet_9blocks] (with 9 Resnet blocks) + Resnet-based generator consists of several Resnet blocks between a few downsampling/upsampling operations. + We adapt Torch code from Justin Johnson's neural style transfer project (https://github.com/jcjohnson/fast-neural-style). -def define_D(input_nc, ndf, which_model_netD, - n_layers_D=3, norm='batch', use_sigmoid=False, init_type='normal', gpu_ids=[]): - netD = None - use_gpu = len(gpu_ids) > 0 + The generator has been initialized by . It uses RELU for non-linearity. + """ + net = None norm_layer = get_norm_layer(norm_type=norm) - if use_gpu: - assert(torch.cuda.is_available()) - if which_model_netD == 'basic': - netD = NLayerDiscriminator(input_nc, ndf, n_layers=3, norm_layer=norm_layer, use_sigmoid=use_sigmoid, gpu_ids=gpu_ids) - elif which_model_netD == 'n_layers': - netD = NLayerDiscriminator(input_nc, ndf, n_layers_D, norm_layer=norm_layer, use_sigmoid=use_sigmoid, gpu_ids=gpu_ids) - elif which_model_netD == 'pixel': - netD = PixelDiscriminator(input_nc, ndf, norm_layer=norm_layer, use_sigmoid=use_sigmoid, gpu_ids=gpu_ids) + if netG == 'resnet_9blocks': + net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=9) + elif netG == 'resnet_6blocks': + net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=6) + elif netG == 'unet_128': + net = UnetGenerator(input_nc, output_nc, 7, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + elif netG == 'unet_256': + net = UnetGenerator(input_nc, output_nc, 8, ngf, norm_layer=norm_layer, use_dropout=use_dropout) else: - raise NotImplementedError('Discriminator model name [%s] is not recognized' % - which_model_netD) - if use_gpu: - netD.cuda(gpu_ids[0]) - init_weights(netD, init_type=init_type) - return netD + raise NotImplementedError('Generator model name [%s] is not recognized' % netG) + return init_net(net, init_type, init_gain, gpu_ids) + + +def define_D(input_nc, ndf, netD, n_layers_D=3, norm='batch', init_type='normal', init_gain=0.02, gpu_ids=[]): + """Create a discriminator + + Parameters: + input_nc (int) -- the number of channels in input images + ndf (int) -- the number of filters in the first conv layer + netD (str) -- the architecture's name: basic | n_layers | pixel + n_layers_D (int) -- the number of conv layers in the discriminator; effective when netD=='n_layers' + norm (str) -- the type of normalization layers used in the network. + init_type (str) -- the name of the initialization method. + init_gain (float) -- scaling factor for normal, xavier and orthogonal. + gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + + Returns a discriminator + Our current implementation provides three types of discriminators: + [basic]: 'PatchGAN' classifier described in the original pix2pix paper. + It can classify whether 70×70 overlapping patches are real or fake. + Such a patch-level discriminator architecture has fewer parameters + than a full-image discriminator and can work on arbitrarily-sized images + in a fully convolutional fashion. -def print_network(net): - num_params = 0 - for param in net.parameters(): - num_params += param.numel() - print(net) - print('Total number of parameters: %d' % num_params) + [n_layers]: With this mode, you cna specify the number of conv layers in the discriminator + with the parameter (default=3 as used in [basic] (PatchGAN).) + + [pixel]: 1x1 PixelGAN discriminator can classify whether a pixel is real or not. + It encourages greater color diversity but has no effect on spatial statistics. + + The discriminator has been initialized by . It uses Leakly RELU for non-linearity. + """ + net = None + norm_layer = get_norm_layer(norm_type=norm) + + if netD == 'basic': # default PatchGAN classifier + net = NLayerDiscriminator(input_nc, ndf, n_layers=3, norm_layer=norm_layer) + elif netD == 'n_layers': # more options + net = NLayerDiscriminator(input_nc, ndf, n_layers_D, norm_layer=norm_layer) + elif netD == 'pixel': # classify if each pixel is real or fake + net = PixelDiscriminator(input_nc, ndf, norm_layer=norm_layer) + else: + raise NotImplementedError('Discriminator model name [%s] is not recognized' % net) + return init_net(net, init_type, init_gain, gpu_ids) ############################################################################## # Classes ############################################################################## +class GANLoss(nn.Module): + """Define different GAN objectives. + The GANLoss class abstracts away the need to create the target label tensor + that has the same size as the input. + """ -# Defines the GAN loss which uses either LSGAN or the regular GAN. -# When LSGAN is used, it is basically same as MSELoss, -# but it abstracts away the need to create the target label tensor -# that has the same size as the input -class GANLoss(nn.Module): - def __init__(self, use_lsgan=True, target_real_label=1.0, target_fake_label=0.0, - tensor=torch.FloatTensor): + def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0): + """ Initialize the GANLoss class. + + Parameters: + gan_mode (str) - - the type of GAN objective. It currently supports vanilla, lsgan, and wgangp. + target_real_label (bool) - - label for a real image + target_fake_label (bool) - - label of a fake image + + Note: Do not use sigmoid as the last layer of Discriminator. + LSGAN needs no sigmoid. vanilla GANs will handle it with BCEWithLogitsLoss. + """ super(GANLoss, self).__init__() - self.real_label = target_real_label - self.fake_label = target_fake_label - self.real_label_var = None - self.fake_label_var = None - self.Tensor = tensor - if use_lsgan: + self.register_buffer('real_label', torch.tensor(target_real_label)) + self.register_buffer('fake_label', torch.tensor(target_fake_label)) + self.gan_mode = gan_mode + if gan_mode == 'lsgan': self.loss = nn.MSELoss() + elif gan_mode == 'vanilla': + self.loss = nn.BCEWithLogitsLoss() + elif gan_mode in ['wgangp']: + self.loss = None else: - self.loss = nn.BCELoss() + raise NotImplementedError('gan mode %s not implemented' % gan_mode) + + def get_target_tensor(self, prediction, target_is_real): + """Create label tensors with the same size as the input. + + Parameters: + prediction (tensor) - - tpyically the prediction from a discriminator + target_is_real (bool) - - if the ground truth label is for real images or fake images + + Returns: + A label tensor filled with ground truth label, and with the size of the input + """ - def get_target_tensor(self, input, target_is_real): - target_tensor = None if target_is_real: - create_label = ((self.real_label_var is None) or - (self.real_label_var.numel() != input.numel())) - if create_label: - real_tensor = self.Tensor(input.size()).fill_(self.real_label) - self.real_label_var = Variable(real_tensor, requires_grad=False) - target_tensor = self.real_label_var + target_tensor = self.real_label else: - create_label = ((self.fake_label_var is None) or - (self.fake_label_var.numel() != input.numel())) - if create_label: - fake_tensor = self.Tensor(input.size()).fill_(self.fake_label) - self.fake_label_var = Variable(fake_tensor, requires_grad=False) - target_tensor = self.fake_label_var - return target_tensor - - def __call__(self, input, target_is_real): - target_tensor = self.get_target_tensor(input, target_is_real) - return self.loss(input, target_tensor) - - -# Defines the generator that consists of Resnet blocks between a few -# downsampling/upsampling operations. -# Code and idea originally from Justin Johnson's architecture. -# https://github.com/jcjohnson/fast-neural-style/ + target_tensor = self.fake_label + return target_tensor.expand_as(prediction) + + def __call__(self, prediction, target_is_real): + """Calculate loss given Discriminator's output and grount truth labels. + + Parameters: + prediction (tensor) - - tpyically the prediction output from a discriminator + target_is_real (bool) - - if the ground truth label is for real images or fake images + + Returns: + the calculated loss. + """ + if self.gan_mode in ['lsgan', 'vanilla']: + target_tensor = self.get_target_tensor(prediction, target_is_real) + loss = self.loss(prediction, target_tensor) + elif self.gan_mode == 'wgangp': + if target_is_real: + loss = -prediction.mean() + else: + loss = prediction.mean() + return loss + + +def cal_gradient_penalty(netD, real_data, fake_data, device, type='mixed', constant=1.0, lambda_gp=10.0): + """Calculate the gradient penalty loss, used in WGAN-GP paper https://arxiv.org/abs/1704.00028 + + Arguments: + netD (network) -- discriminator network + real_data (tensor array) -- real images + fake_data (tensor array) -- generated images from the generator + device (str) -- GPU / CPU: from torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu') + type (str) -- if we mix real and fake data or not [real | fake | mixed]. + constant (float) -- the constant used in formula ( | |gradient||_2 - constant)^2 + lambda_gp (float) -- weight for this loss + + Returns the gradient penalty loss + """ + if lambda_gp > 0.0: + if type == 'real': # either use real images, fake images, or a linear interpolation of two. + interpolatesv = real_data + elif type == 'fake': + interpolatesv = fake_data + elif type == 'mixed': + alpha = torch.rand(real_data.shape[0], 1) + alpha = alpha.expand(real_data.shape[0], real_data.nelement() // real_data.shape[0]).contiguous().view(*real_data.shape) + alpha = alpha.to(device) + interpolatesv = alpha * real_data + ((1 - alpha) * fake_data) + else: + raise NotImplementedError('{} not implemented'.format(type)) + interpolatesv.requires_grad_(True) + disc_interpolates = netD(interpolatesv) + gradients = torch.autograd.grad(outputs=disc_interpolates, inputs=interpolatesv, + grad_outputs=torch.ones(disc_interpolates.size()).to(device), + create_graph=True, retain_graph=True, only_inputs=True) + gradients = gradients[0].view(real_data.size(0), -1) # flat the data + gradient_penalty = (((gradients + 1e-16).norm(2, dim=1) - constant) ** 2).mean() * lambda_gp # added eps + return gradient_penalty, gradients + else: + return 0.0, None + + class ResnetGenerator(nn.Module): - def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, gpu_ids=[], padding_type='reflect'): + """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations. + + We adapt Torch code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style) + """ + + def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, padding_type='reflect'): + """Construct a Resnet-based generator + + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + ngf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + use_dropout (bool) -- if use dropout layers + n_blocks (int) -- the number of ResNet blocks + padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero + """ assert(n_blocks >= 0) super(ResnetGenerator, self).__init__() - self.input_nc = input_nc - self.output_nc = output_nc - self.ngf = ngf - self.gpu_ids = gpu_ids if type(norm_layer) == functools.partial: use_bias = norm_layer.func == nn.InstanceNorm2d else: use_bias = norm_layer == nn.InstanceNorm2d model = [nn.ReflectionPad2d(3), - nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, - bias=use_bias), + nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias), norm_layer(ngf), nn.ReLU(True)] n_downsampling = 2 - for i in range(n_downsampling): - mult = 2**i - model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, - stride=2, padding=1, bias=use_bias), + for i in range(n_downsampling): # add downsampling layers + mult = 2 ** i + model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias), norm_layer(ngf * mult * 2), nn.ReLU(True)] - mult = 2**n_downsampling - for i in range(n_blocks): + mult = 2 ** n_downsampling + for i in range(n_blocks): # add ResNet blocks + model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] - for i in range(n_downsampling): - mult = 2**(n_downsampling - i) + for i in range(n_downsampling): # add upsampling layers + mult = 2 ** (n_downsampling - i) model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1, @@ -249,19 +370,36 @@ class ResnetGenerator(nn.Module): self.model = nn.Sequential(*model) def forward(self, input): - if self.gpu_ids and isinstance(input.data, torch.cuda.FloatTensor): - return nn.parallel.data_parallel(self.model, input, self.gpu_ids) - else: - return self.model(input) + """Standard forward""" + return self.model(input) -# Define a resnet block class ResnetBlock(nn.Module): + """Define a Resnet block""" + def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias): + """Initialize the Resnet block + + A resnet block is a conv block with skip connections + We construct a conv block with build_conv_block function, + and implement skip connections in function. + Original Resnet paper: https://arxiv.org/pdf/1512.03385.pdf + """ super(ResnetBlock, self).__init__() self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias) def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias): + """Construct a convolutional block. + + Parameters: + dim (int) -- the number of channels in the conv layer. + padding_type (str) -- the name of padding layer: reflect | replicate | zero + norm_layer -- normalization layer + use_dropout (bool) -- if use dropout layers. + use_bias (bool) -- if the conv layer uses bias or not + + Returns a conv block (with a conv layer, a normalization layer, and a non-linearity layer (ReLU)) + """ conv_block = [] p = 0 if padding_type == 'reflect': @@ -273,9 +411,7 @@ class ResnetBlock(nn.Module): else: raise NotImplementedError('padding [%s] is not implemented' % padding_type) - conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), - norm_layer(dim), - nn.ReLU(True)] + conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim), nn.ReLU(True)] if use_dropout: conv_block += [nn.Dropout(0.5)] @@ -288,50 +424,68 @@ class ResnetBlock(nn.Module): p = 1 else: raise NotImplementedError('padding [%s] is not implemented' % padding_type) - conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), - norm_layer(dim)] + conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim)] return nn.Sequential(*conv_block) def forward(self, x): - out = x + self.conv_block(x) + """Forward function (with skip connections)""" + out = x + self.conv_block(x) # add skip connections return out -# Defines the Unet generator. -# |num_downs|: number of downsamplings in UNet. For example, -# if |num_downs| == 7, image of size 128x128 will become of size 1x1 -# at the bottleneck class UnetGenerator(nn.Module): - def __init__(self, input_nc, output_nc, num_downs, ngf=64, - norm_layer=nn.BatchNorm2d, use_dropout=False, gpu_ids=[]): + """Create a Unet-based generator""" + + def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False): + """Construct a Unet generator + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7, + image of size 128x128 will become of size 1x1 # at the bottleneck + ngf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + + We construct the U-Net from the innermost layer to the outermost layer. + It is a recursive process. + """ super(UnetGenerator, self).__init__() - self.gpu_ids = gpu_ids - # construct unet structure - unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) - for i in range(num_downs - 5): + unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) # add the innermost layer + for i in range(num_downs - 5): # add intermediate layers with ngf * 8 filters unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout) + # gradually reduce the number of filters from ngf * 8 to ngf unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer) unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer) unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer) - unet_block = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) - - self.model = unet_block + self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) # add the outermost layer def forward(self, input): - if self.gpu_ids and isinstance(input.data, torch.cuda.FloatTensor): - return nn.parallel.data_parallel(self.model, input, self.gpu_ids) - else: - return self.model(input) + """Standard forward""" + return self.model(input) -# Defines the submodule with skip connection. -# X -------------------identity---------------------- X -# |-- downsampling -- |submodule| -- upsampling --| class UnetSkipConnectionBlock(nn.Module): + """Defines the Unet submodule with skip connection. + X -------------------identity---------------------- + |-- downsampling -- |submodule| -- upsampling --| + """ + def __init__(self, outer_nc, inner_nc, input_nc=None, submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False): + """Construct a Unet submodule with skip connections. + + Parameters: + outer_nc (int) -- the number of filters in the outer conv layer + inner_nc (int) -- the number of filters in the inner conv layer + input_nc (int) -- the number of channels in input images/features + submodule (UnetSkipConnectionBlock) -- previously defined submodules + outermost (bool) -- if this module is the outermost module + innermost (bool) -- if this module is the innermost module + norm_layer -- normalization layer + user_dropout (bool) -- if use dropout layers. + """ super(UnetSkipConnectionBlock, self).__init__() self.outermost = outermost if type(norm_layer) == functools.partial: @@ -378,70 +532,74 @@ class UnetSkipConnectionBlock(nn.Module): def forward(self, x): if self.outermost: return self.model(x) - else: + else: # add skip connections return torch.cat([x, self.model(x)], 1) -# Defines the PatchGAN discriminator with the specified arguments. class NLayerDiscriminator(nn.Module): - def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=False, gpu_ids=[]): + """Defines a PatchGAN discriminator""" + + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d): + """Construct a PatchGAN discriminator + + Parameters: + input_nc (int) -- the number of channels in input images + ndf (int) -- the number of filters in the last conv layer + n_layers (int) -- the number of conv layers in the discriminator + norm_layer -- normalization layer + """ super(NLayerDiscriminator, self).__init__() - self.gpu_ids = gpu_ids - if type(norm_layer) == functools.partial: - use_bias = norm_layer.func == nn.InstanceNorm2d + if type(norm_layer) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters + use_bias = norm_layer.func != nn.BatchNorm2d else: - use_bias = norm_layer == nn.InstanceNorm2d + use_bias = norm_layer != nn.BatchNorm2d kw = 4 padw = 1 - sequence = [ - nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), - nn.LeakyReLU(0.2, True) - ] - + sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)] nf_mult = 1 nf_mult_prev = 1 - for n in range(1, n_layers): + for n in range(1, n_layers): # gradually increase the number of filters nf_mult_prev = nf_mult - nf_mult = min(2**n, 8) + nf_mult = min(2 ** n, 8) sequence += [ - nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, - kernel_size=kw, stride=2, padding=padw, bias=use_bias), + nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias=use_bias), norm_layer(ndf * nf_mult), nn.LeakyReLU(0.2, True) ] nf_mult_prev = nf_mult - nf_mult = min(2**n_layers, 8) + nf_mult = min(2 ** n_layers, 8) sequence += [ - nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, - kernel_size=kw, stride=1, padding=padw, bias=use_bias), + nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=use_bias), norm_layer(ndf * nf_mult), nn.LeakyReLU(0.2, True) ] - sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)] - - if use_sigmoid: - sequence += [nn.Sigmoid()] - + sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)] # output 1 channel prediction map self.model = nn.Sequential(*sequence) def forward(self, input): - if len(self.gpu_ids) and isinstance(input.data, torch.cuda.FloatTensor): - return nn.parallel.data_parallel(self.model, input, self.gpu_ids) - else: - return self.model(input) + """Standard forward.""" + return self.model(input) class PixelDiscriminator(nn.Module): - def __init__(self, input_nc, ndf=64, norm_layer=nn.BatchNorm2d, use_sigmoid=False, gpu_ids=[]): + """Defines a 1x1 PatchGAN discriminator (pixelGAN)""" + + def __init__(self, input_nc, ndf=64, norm_layer=nn.BatchNorm2d): + """Construct a 1x1 PatchGAN discriminator + + Parameters: + input_nc (int) -- the number of channels in input images + ndf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + """ super(PixelDiscriminator, self).__init__() - self.gpu_ids = gpu_ids - if type(norm_layer) == functools.partial: - use_bias = norm_layer.func == nn.InstanceNorm2d + if type(norm_layer) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters + use_bias = norm_layer.func != nn.InstanceNorm2d else: - use_bias = norm_layer == nn.InstanceNorm2d + use_bias = norm_layer != nn.InstanceNorm2d self.net = [ nn.Conv2d(input_nc, ndf, kernel_size=1, stride=1, padding=0), @@ -451,13 +609,8 @@ class PixelDiscriminator(nn.Module): nn.LeakyReLU(0.2, True), nn.Conv2d(ndf * 2, 1, kernel_size=1, stride=1, padding=0, bias=use_bias)] - if use_sigmoid: - self.net.append(nn.Sigmoid()) - self.net = nn.Sequential(*self.net) def forward(self, input): - if len(self.gpu_ids) and isinstance(input.data, torch.cuda.FloatTensor): - return nn.parallel.data_parallel(self.net, input, self.gpu_ids) - else: - return self.net(input) + """Standard forward.""" + return self.net(input) diff --git a/models/unet_model.py b/models/unet_model.py index 5990649..f5b37d9 100755 --- a/models/unet_model.py +++ b/models/unet_model.py @@ -29,4 +29,4 @@ class UNet(nn.Module): x = self.up3(x, x2) x = self.up4(x, x1) x = self.outc(x) - return F.sigmoid(x) + return torch.sigmoid(x) diff --git a/models/unet_parts.py b/models/unet_parts.py index b24e375..df5c8fd 100755 --- a/models/unet_parts.py +++ b/models/unet_parts.py @@ -45,7 +45,14 @@ class down(nn.Module): x = self.mpconv(x) return x +class Upsample(nn.Module): + def __init__(self, scale_factor): + super(Upsample, self).__init__() + self.scale_factor = scale_factor + def forward(self, x): + return F.interpolate(x, scale_factor=self.scale_factor,mode='bilinear', align_corners=True) +F.interpolate class up(nn.Module): def __init__(self, in_ch, out_ch, bilinear=True): super(up, self).__init__() @@ -53,7 +60,7 @@ class up(nn.Module): # would be a nice idea if the upsampling could be learned too, # but my machine do not have enough memory to handle all those weights if bilinear: - self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) + self.up = Upsample(scale_factor=2) else: self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2) diff --git a/options/addmosaic_options.py b/options/addmosaic_options.py index c5e2cc4..28c8032 100755 --- a/options/addmosaic_options.py +++ b/options/addmosaic_options.py @@ -18,7 +18,7 @@ class AddOptions(): self.parser.add_argument('--mask_extend', type=int, default=20,help='more mosaic area') self.parser.add_argument('--mask_threshold', type=int, default=64,help='threshold of recognize mosaic position 0~255') self.parser.add_argument('--output_size', type=int, default=0,help='size of output file,if 0 -> origin') - self.parser.add_argument('--tempimage_type', type=str, default='jpg',help='type of temp image, png | jpg, png is better but occupy more storage space') + self.parser.add_argument('--tempimage_type', type=str, default='png',help='type of temp image, png | jpg, png is better but occupy more storage space') self.initialized = True def getparse(self): diff --git a/options/cleanmosaic_options.py b/options/cleanmosaic_options.py index ec9c1ca..36edd75 100755 --- a/options/cleanmosaic_options.py +++ b/options/cleanmosaic_options.py @@ -18,7 +18,7 @@ class CleanOptions(): help='name of model use to find mosaic position') self.parser.add_argument('--no_feather', action='store_true', help='if true, no edge feather,but run faster') self.parser.add_argument('--medfilt_num', type=int, default=11,help='medfilt window of mosaic movement in the video') - self.parser.add_argument('--tempimage_type', type=str, default='jpg',help='type of temp image, png | jpg, png is better but occupy more storage space') + self.parser.add_argument('--tempimage_type', type=str, default='png',help='type of temp image, png | jpg, png is better but occupy more storage space') # self.parser.add_argument('--zoom_multiple', type=float, default=1.0,help='zoom video') self.initialized = True diff --git a/util/image_processing.py b/util/image_processing.py index f44f235..55ba738 100755 --- a/util/image_processing.py +++ b/util/image_processing.py @@ -4,7 +4,7 @@ import numpy as np def resize(img,size): h, w = img.shape[:2] - if min(h, w) ==size: + if np.min((w,h)) ==size: return img if w >= h: res = cv2.resize(img,(int(size*w/h), size)) @@ -62,7 +62,7 @@ def mergeimage(img1,img2,orgin_image): result_img = cv2.add(new_img1,new_img2) return result_img -def boundingSquare(mask,threshold,Ex_mul): +def boundingSquare(mask,Ex_mul): # thresh = mask_threshold(mask,10,threshold) area = mask_area(mask) if area == 0 : diff --git a/util/mosaic.py b/util/mosaic.py index 483c4a7..6c69e17 100755 --- a/util/mosaic.py +++ b/util/mosaic.py @@ -6,6 +6,7 @@ from .image_processing import resize,ch_one2three,mask_area def addmosaic(img,mask,n,out_size = 0,model = 'squa_avg'): + n = int(n) if out_size: img = resize(img,out_size) h, w = img.shape[:2] @@ -59,22 +60,24 @@ def random_mosaic_mod(img,mask,n): return img def random_mosaic(img,mask): - img = resize(img,512) + # img = resize(img,512) h,w = img.shape[:2] mask = cv2.resize(mask,(w,h)) + alpha = np.min((w,h))/512 #area_avg=5925*4 try: area = mask_area(mask) except: area = 0 + area = area/(alpha*alpha) if area>50000: - img_mosaic = random_mosaic_mod(img,mask,random.randint(14,26)) + img_mosaic = random_mosaic_mod(img,mask,alpha*random.uniform(16,28)) elif 20000