diff --git a/.gitignore b/.gitignore index 1635c5630c085ff819456c2f2a648852e5df5978..cf5d361388641f9e888bae9a4902dbb0acc22dff 100644 --- a/.gitignore +++ b/.gitignore @@ -141,6 +141,7 @@ dataset/ test* video_tmp/ result/ +nohup.out #./ /pix2pix /pix2pixHD diff --git a/README.md b/README.md index a9616187a647f50b6c9fbf9debf84fcfe0f240e0..f45c07f6abcc9c3f79d6a995469206f81d2c8490 100755 --- a/README.md +++ b/README.md @@ -67,11 +67,11 @@ You can download pre_trained models and put them into './pretrained_models'.
#### Simple example * Add Mosaic (output media will save in './result')
```bash -python3 deepmosaic.py --media_path ./imgs/ruoruo.jpg --model_path ./pretrained_models/mosaic/add_face.pth --use_gpu -1 +python3 deepmosaic.py --media_path ./imgs/ruoruo.jpg --model_path ./pretrained_models/mosaic/add_face.pth --use_gpu 0 ``` * Clean Mosaic (output media will save in './result')
```bash -python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretrained_models/mosaic/clean_face_HD.pth --use_gpu -1 +python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretrained_models/mosaic/clean_face_HD.pth --use_gpu 0 ``` #### More parameters If you want to test other image or video, please refer to this file.
diff --git a/README_CN.md b/README_CN.md index bb684beae7bed10708a0daeba0a0e0d84e99a215..c7c84d525c3a8ecb9a08f0a7e59b0454f3692d4a 100644 --- a/README_CN.md +++ b/README_CN.md @@ -64,11 +64,11 @@ cd DeepMosaics #### 简单的例子 * 为视频添加马赛克,例子中认为脸是需要打码的区域 ,可以通过切换预训练模型切换自动打码区域(输出结果将储存到 './result')
```bash -python3 deepmosaic.py --media_path ./imgs/ruoruo.jpg --model_path ./pretrained_models/mosaic/add_face.pth --use_gpu -1 +python3 deepmosaic.py --media_path ./imgs/ruoruo.jpg --model_path ./pretrained_models/mosaic/add_face.pth --use_gpu 0 ``` * 将视频中的马赛克移除,对于不同的打码物体需要使用对应的预训练模型进行马赛克消除(输出结果将储存到 './result')
```bash -python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretrained_models/mosaic/clean_face_HD.pth --use_gpu -1 +python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretrained_models/mosaic/clean_face_HD.pth --use_gpu 0 ``` #### 更多的参数 如果想要测试其他的图片或视频,请参照以下文件输入参数.
diff --git a/cores/options.py b/cores/options.py index d0a4a778dc3398f83ee7b805f1d72aaaea769682..05188dfdb5a122f1dfa2f072af9e8788f8990b88 100644 --- a/cores/options.py +++ b/cores/options.py @@ -1,6 +1,6 @@ import argparse import os -import torch + class Options(): def __init__(self): @@ -10,7 +10,7 @@ class Options(): def initialize(self): #base - self.parser.add_argument('--use_gpu',type=int,default=0, help='if -1, do not use gpu') + self.parser.add_argument('--use_gpu',type=int,default=0, help='if -1, use cpu') # self.parser.add_argument('--use_gpu', action='store_true', help='if input it, use gpu') self.parser.add_argument('--media_path', type=str, default='./imgs/ruoruo.jpg',help='your videos or images path') self.parser.add_argument('--mode', type=str, default='auto',help='Program running mode. auto | add | clean | style') @@ -54,10 +54,12 @@ class Options(): model_name = os.path.basename(self.opt.model_path) + os.environ["CUDA_VISIBLE_DEVICES"] = str(self.opt.use_gpu) + import torch if torch.cuda.is_available() and self.opt.use_gpu > -1: - self.opt.use_gpu = True + pass else: - self.opt.use_gpu = False + self.opt.use_gpu = -1 if self.opt.mode == 'auto': if 'clean' in model_name or self.opt.traditional: diff --git a/deepmosaic.py b/deepmosaic.py index a92f9416dfe8261e822aeed9ee7151b39d44dd48..3f91413e4b44c7b946c74b6e9d4866f779d72d8f 100644 --- a/deepmosaic.py +++ b/deepmosaic.py @@ -72,6 +72,6 @@ if __name__ == '__main__': for stack in traceback.extract_tb(ex_stack): print(stack) input('Please press any key to exit.\n') - util.clean_tempfiles(tmp_init = False) + #util.clean_tempfiles(tmp_init = False) exit(0) diff --git a/docs/training_with_your_own_dataset.md b/docs/training_with_your_own_dataset.md index cc052896d90fd767b82bbcc13954a8178a510fd0..fa3693894875ea6afad8af12562167ff50def592 100644 --- a/docs/training_with_your_own_dataset.md +++ b/docs/training_with_your_own_dataset.md @@ -53,7 +53,7 @@ python make_video_dataset.py --datadir 'dir for your videos' --model_path ../pre ### Add ```bash cd train/add -python train.py --gpu_id 0 --dataset ../../datasets/draw/face --savename face --loadsize 512 --finesize 360 --batchsize 16 +python train.py --use_gpu 0 --dataset ../../datasets/draw/face --savename face --loadsize 512 --finesize 360 --batchsize 16 ``` ### Clean * For image datasets(generated by ```make_pix2pix_dataset.py```) @@ -67,7 +67,7 @@ python train.py --name face --resize_or_crop resize_and_crop --loadSize 563 --fi * For video datasets(generated by ```make_video_dataset.py```) ```bash cd train/clean -python train.py --dataset ../../datasets/video/face --savename face --savefreq 100000 --gan --hd --lr 0.0002 --lambda_gan 1 --gpu_id 0 --perload_num 8 +python train.py --dataset ../../datasets/video/face --savename face --savefreq 100000 --gan --hd --lr 0.0002 --lambda_gan 1 --use_gpu 0 ``` ## Testing Put saved network to ```./pretrained_models/mosaic/``` and rename it as ```add_face.pth``` or ```clean_face_HD.pth``` or ```clean_face_video_HD.pth``` diff --git a/make_datasets/make_pix2pix_dataset.py b/make_datasets/make_pix2pix_dataset.py index 68c39c80772d313bc16661e30e78cf206ddaa80d..58949c830a43bcb87b4846ebd0b875231d2e8a45 100644 --- a/make_datasets/make_pix2pix_dataset.py +++ b/make_datasets/make_pix2pix_dataset.py @@ -1,24 +1,24 @@ import os -import random import sys +sys.path.append("..") +from cores import Options +opt = Options() + +import random import datetime import time -import shutil -import threading import warnings warnings.filterwarnings(action='ignore') import numpy as np import cv2 +import torch -sys.path.append("..") from models import runmodel,loadmodel import util.image_processing as impro from util import util,mosaic,data -from cores import Options -opt = Options() opt.parser.add_argument('--datadir',type=str,default='../datasets/draw/face', help='') opt.parser.add_argument('--savedir',type=str,default='../datasets/pix2pix/face', help='') opt.parser.add_argument('--name',type=str,default='', help='save name') @@ -61,51 +61,16 @@ if 'drawn' in opt.mod: maskpaths.sort() if 'network' in opt.mod or 'irregular' in opt.mod: imgpaths = util.Traversal(opt.datadir) + imgpaths = util.is_imgs(imgpaths) random.shuffle (imgpaths) if 'irregular' in opt.mod: irrpaths = util.Traversal(opt.irrholedir) -#def network +#def network if 'network' in opt.mod: net = loadmodel.bisenet(opt,'roi') - -# def checksaveimage(opt,img,mask): - -# #check -# saveflag = True -# x,y,size,area = impro.boundingSquare(mask, random.uniform(1.4,1.6)) -# if area < 1000: -# saveflag = False -# else: -# if opt.square: -# if size < opt.minsize: -# saveflag = False -# else: -# img = impro.resize(img[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC) -# mask = impro.resize(mask[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC) -# if impro.Q_lapulase(img)0.5: + Q = random.randint(1,15) + img = impro.dctblur(img,Q) + img_mosaic = impro.dctblur(img_mosaic,Q) + savecnt += 1 if opt.hd: @@ -185,6 +157,6 @@ for fold in range(opt.fold): all_time = used_time/filecnt*all_length print('\r','',str(filecnt)+'/'+str(all_length)+' ', - util.get_bar(percent,30),'', + util.get_bar(percent,25),'', util.second2stamp(used_time)+'/'+util.second2stamp(all_time), 'f:'+str(savecnt),end= " ") \ No newline at end of file diff --git a/make_datasets/make_video_dataset.py b/make_datasets/make_video_dataset.py index d5658d607e8a2a5debbd13ea5e4edafbb4e67992..328f05f61a535c0c72e455737f869c4280167417 100644 --- a/make_datasets/make_video_dataset.py +++ b/make_datasets/make_video_dataset.py @@ -1,21 +1,22 @@ import os -import random import sys +sys.path.append("..") +from cores import Options +opt = Options() + +import random import datetime import time -import shutil -import threading import numpy as np import cv2 +import torch -sys.path.append("..") from models import runmodel,loadmodel import util.image_processing as impro from util import util,mosaic,data,ffmpeg -from cores import Options -opt = Options() + opt.parser.add_argument('--datadir',type=str,default='your video dir', help='') opt.parser.add_argument('--savedir',type=str,default='../datasets/video/face', help='') opt.parser.add_argument('--interval',type=int,default=30, help='interval of split video ') @@ -25,6 +26,7 @@ opt.parser.add_argument('--quality', type=int ,default= 45,help='minimal quality opt.parser.add_argument('--outsize', type=int ,default= 286,help='') opt.parser.add_argument('--startcnt', type=int ,default= 0,help='') opt.parser.add_argument('--minsize', type=int ,default= 96,help='minimal roi size') +opt.parser.add_argument('--no_sclectscene', action='store_true', help='') opt = opt.getparse() @@ -44,52 +46,31 @@ video_cnt = 1 starttime = datetime.datetime.now() for videopath in videopaths: try: - timestamps=[] - fps,endtime,height,width = ffmpeg.get_video_infos(videopath) - for cut_point in range(1,int((endtime-opt.time)/opt.interval)): - util.clean_tempfiles() - ffmpeg.video2image(videopath, './tmp/video2image/%05d.'+opt.tempimage_type,fps=1, - start_time = util.second2stamp(cut_point*opt.interval),last_time = util.second2stamp(opt.time)) - imagepaths = util.Traversal('./tmp/video2image') - cnt = 0 - for i in range(opt.time): - img = impro.imread(imagepaths[i]) - mask = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0] - if not opt.all_mosaic_area: - mask = impro.find_mostlikely_ROI(mask) - x,y,size,area = impro.boundingSquare(mask,Ex_mul=1) - if area > opt.minmaskarea and size>opt.minsize and impro.Q_lapulase(img)>opt.quality: - cnt +=1 - if cnt == opt.time: - # print(second) - timestamps.append(util.second2stamp(cut_point*opt.interval)) + if opt.no_sclectscene: + timestamps=['00:00:00'] + else: + timestamps=[] + fps,endtime,height,width = ffmpeg.get_video_infos(videopath) + for cut_point in range(1,int((endtime-opt.time)/opt.interval)): + util.clean_tempfiles() + ffmpeg.video2image(videopath, './tmp/video2image/%05d.'+opt.tempimage_type,fps=1, + start_time = util.second2stamp(cut_point*opt.interval),last_time = util.second2stamp(opt.time)) + imagepaths = util.Traversal('./tmp/video2image') + cnt = 0 + for i in range(opt.time): + img = impro.imread(imagepaths[i]) + mask = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0] + if not opt.all_mosaic_area: + mask = impro.find_mostlikely_ROI(mask) + x,y,size,area = impro.boundingSquare(mask,Ex_mul=1) + if area > opt.minmaskarea and size>opt.minsize and impro.Q_lapulase(img)>opt.quality: + cnt +=1 + if cnt == opt.time: + # print(second) + timestamps.append(util.second2stamp(cut_point*opt.interval)) util.writelog(os.path.join(opt.savedir,'opt.txt'),videopath+'\n'+str(timestamps)) #print(timestamps) - # util.clean_tempfiles() - # fps,endtime,height,width = ffmpeg.get_video_infos(videopath) - # # print(fps,endtime,height,width) - # ffmpeg.continuous_screenshot(videopath, './tmp/video2image', 1) - - # # find where to cut - # print('Find where to cut...') - # timestamps=[] - # imagepaths = util.Traversal('./tmp/video2image') - # for second in range(int(endtime)): - # if second%opt.interval==0: - # cnt = 0 - # for i in range(opt.time): - # img = impro.imread(imagepaths[second+i]) - # mask = runmodel.get_ROI_position(img,net,opt)[0] - # if not opt.all_mosaic_area: - # mask = impro.find_mostlikely_ROI(mask) - # if impro.mask_area(mask) > opt.minmaskarea and impro.Q_lapulase(img)>opt.quality: - # # print(impro.mask_area(mask)) - # cnt +=1 - # if cnt == opt.time: - # # print(second) - # timestamps.append(util.second2stamp(second)) - #generate datasets print('Generate datasets...') for timestamp in timestamps: @@ -143,3 +124,5 @@ for videopath in videopaths: util.writelog(os.path.join(opt.savedir,'opt.txt'), videopath+'\n'+str(result_cnt)+'\n'+str(e)) video_cnt +=1 + if opt.use_gpu != -1: + torch.cuda.empty_cache() diff --git a/models/loadmodel.py b/models/loadmodel.py index eedd34e3e227f06a44d2b5265e69f161f7bd4041..974064afcd1e9328b46934ecf48f76d92a4708b3 100755 --- a/models/loadmodel.py +++ b/models/loadmodel.py @@ -34,7 +34,7 @@ def pix2pix(opt): show_paramsnumber(netG,'netG') netG.load_state_dict(torch.load(opt.model_path)) netG.eval() - if opt.use_gpu: + if opt.use_gpu != -1: netG.cuda() return netG @@ -60,7 +60,7 @@ def style(opt): __patch_instance_norm_state_dict(state_dict, netG, key.split('.')) netG.load_state_dict(state_dict) - if opt.use_gpu: + if opt.use_gpu != -1: netG.cuda() return netG @@ -72,7 +72,7 @@ def video(opt): show_paramsnumber(netG,'netG') netG.load_state_dict(torch.load(opt.model_path)) netG.eval() - if opt.use_gpu: + if opt.use_gpu != -1: netG.cuda() return netG @@ -87,24 +87,6 @@ def bisenet(opt,type='roi'): elif type == 'mosaic': net.load_state_dict(torch.load(opt.mosaic_position_model_path)) net.eval() - if opt.use_gpu: + if opt.use_gpu != -1: net.cuda() return net - -# def unet_clean(opt): -# net = UNet(n_channels = 3, n_classes = 1) -# show_paramsnumber(net,'segment') -# net.load_state_dict(torch.load(opt.mosaic_position_model_path)) -# net.eval() -# if opt.use_gpu: -# net.cuda() -# return net - -# def unet(opt): -# net = UNet(n_channels = 3, n_classes = 1) -# show_paramsnumber(net,'segment') -# net.load_state_dict(torch.load(opt.model_path)) -# net.eval() -# if opt.use_gpu: -# net.cuda() -# return net diff --git a/models/pix2pixHD_model.py b/models/pix2pixHD_model.py index b20369b0780bf989ca67a49f7da1e6254df8b303..f968940667ba3a5eeca8b50025cb8873984273cc 100644 --- a/models/pix2pixHD_model.py +++ b/models/pix2pixHD_model.py @@ -48,7 +48,7 @@ def define_G(input_nc, output_nc, ngf, netG, n_downsample_global=3, n_blocks_glo def define_D(input_nc, ndf, n_layers_D, norm='instance', use_sigmoid=False, num_D=1, getIntermFeat=False, gpu_ids=[]): norm_layer = get_norm_layer(norm_type=norm) netD = MultiscaleDiscriminator(input_nc, ndf, n_layers_D, norm_layer, use_sigmoid, num_D, getIntermFeat) - print(netD) + #print(netD) if len(gpu_ids) > 0: assert(torch.cuda.is_available()) netD.cuda(gpu_ids[0]) @@ -67,6 +67,24 @@ def print_network(net): ############################################################################## # Losses ############################################################################## +class GAN_Feat_loss(nn.Module): + def __init__(self, opt): + super(GAN_Feat_loss, self).__init__() + self.num_D = opt.num_D + self.n_layers_D = opt.n_layers_D + self.lambda_feat = opt.lambda_feat + self.criterionFeat = nn.L1Loss() + + def forward(self, pred_fake, pred_real): + loss_G_GAN_Feat = 0 + feat_weights = 4.0 / (self.n_layers_D + 1) + D_weights = 1.0 / self.num_D + for i in range(self.num_D): + for j in range(len(pred_fake[i])-1): + loss_G_GAN_Feat += D_weights * feat_weights * \ + self.criterionFeat(pred_fake[i][j], pred_real[i][j].detach()) * self.lambda_feat + return loss_G_GAN_Feat + class GANLoss(nn.Module): def __init__(self, use_lsgan=True, target_real_label=1.0, target_fake_label=0.0, tensor=torch.FloatTensor): diff --git a/models/runmodel.py b/models/runmodel.py index 2ff8414b0cbf9255415fe76747fc9a42eb579a3b..2bdc88d8df80759775927159d6fb78df7f4345b0 100755 --- a/models/runmodel.py +++ b/models/runmodel.py @@ -7,7 +7,7 @@ from util import data import torch import numpy as np -def run_segment(img,net,size = 360,use_gpu = True): +def run_segment(img,net,size = 360,use_gpu = 0): img = impro.resize(img,size) img = data.im2tensor(img,use_gpu = use_gpu, bgr2rgb = False,use_transform = False , is0_1 = True) mask = net(img) diff --git a/train/add/train.py b/train/add/train.py index bb8e953dc9572d912e01185c61b08223af9b8f92..f8c23bdedcf14ec275722d688fef722412668a6a 100644 --- a/train/add/train.py +++ b/train/add/train.py @@ -1,5 +1,10 @@ -import sys import os +import sys +sys.path.append("..") +sys.path.append("../..") +from cores import Options +opt = Options() + import random import datetime import time @@ -13,9 +18,6 @@ import torch.backends.cudnn as cudnn import torch.nn as nn from torch import optim -sys.path.append("..") -sys.path.append("../..") -from cores import Options from util import mosaic,util,ffmpeg,filt,data from util import image_processing as impro from models import unet_model,BiSeNet_model @@ -24,8 +26,6 @@ from models import unet_model,BiSeNet_model ''' --------------------------Get options-------------------------- ''' -opt = Options() -opt.parser.add_argument('--gpu_id',type=int,default=0, help='') opt.parser.add_argument('--lr',type=float,default=0.001, help='') opt.parser.add_argument('--finesize',type=int,default=360, help='') opt.parser.add_argument('--loadsize',type=int,default=400, help='') @@ -51,12 +51,11 @@ dir_checkpoint = os.path.join('checkpoints/',opt.savename) util.makedirs(dir_checkpoint) util.writelog(os.path.join(dir_checkpoint,'loss.txt'), str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt)) -torch.cuda.set_device(opt.gpu_id) def Totensor(img,use_gpu=True): size=img.shape[0] img = torch.from_numpy(img).float() - if opt.use_gpu: + if opt.use_gpu != -1: img = img.cuda() return img @@ -110,7 +109,7 @@ if opt.continuetrain: f = open(os.path.join(dir_checkpoint,'epoch_log.txt'),'r') opt.startepoch = int(f.read()) f.close() -if opt.use_gpu: +if opt.use_gpu != -1: net.cuda() cudnn.benchmark = True @@ -134,7 +133,7 @@ for epoch in range(opt.startepoch,opt.maxepoch): starttime = datetime.datetime.now() util.writelog(os.path.join(dir_checkpoint,'loss.txt'),'Epoch {}/{}.'.format(epoch + 1, opt.maxepoch),True) net.train() - if opt.use_gpu: + if opt.use_gpu != -1: net.cuda() epoch_loss = 0 for i in range(int(img_num*0.8/opt.batchsize)): diff --git a/train/clean/train.py b/train/clean/train.py index 6ec5e881aeeaa6f18ace9c0433c1a858cb54bafc..b865246f59da75c66ed66184e819c8906ba181a0 100644 --- a/train/clean/train.py +++ b/train/clean/train.py @@ -1,4 +1,10 @@ import os +import sys +sys.path.append("..") +sys.path.append("../..") +from cores import Options +opt = Options() + import numpy as np import cv2 import random @@ -6,12 +12,8 @@ import torch import torch.nn as nn import time -import sys -sys.path.append("..") -sys.path.append("../..") from util import mosaic,util,ffmpeg,filt,data from util import image_processing as impro -from cores import Options from models import pix2pix_model,pix2pixHD_model,video_model,unet_model,loadmodel,videoHD_model from matplotlib import pyplot as plt import torch.backends.cudnn as cudnn @@ -19,9 +21,6 @@ import torch.backends.cudnn as cudnn ''' --------------------------Get options-------------------------- ''' - -opt = Options() -opt.parser.add_argument('--gpu_id',type=int,default=0, help='') opt.parser.add_argument('--N',type=int,default=25, help='') opt.parser.add_argument('--lr',type=float,default=0.0002, help='') opt.parser.add_argument('--beta1',type=float,default=0.5, help='') @@ -33,8 +32,11 @@ opt.parser.add_argument('--lambda_gan',type=float,default=1, help='') opt.parser.add_argument('--finesize',type=int,default=256, help='') opt.parser.add_argument('--loadsize',type=int,default=286, help='') opt.parser.add_argument('--batchsize',type=int,default=1, help='') -opt.parser.add_argument('--perload_num',type=int,default=16, help='number of images pool') +opt.parser.add_argument('--perload_num',type=int,default=64, help='number of images pool') opt.parser.add_argument('--norm',type=str,default='instance', help='') +opt.parser.add_argument('--num_D', type=int, default=2, help='number of discriminators to use') +opt.parser.add_argument('--n_layers_D', type=int, default=3, help='only used if which_model_netD==n_layers') +opt.parser.add_argument('--lambda_feat', type=float, default=10.0, help='weight for feature matching loss') opt.parser.add_argument('--dataset',type=str,default='./datasets/face/', help='') opt.parser.add_argument('--maxiter',type=int,default=10000000, help='') @@ -52,11 +54,11 @@ dir_checkpoint = os.path.join('checkpoints/',opt.savename) util.makedirs(dir_checkpoint) util.writelog(os.path.join(dir_checkpoint,'loss.txt'), str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt)) -torch.cuda.set_device(opt.gpu_id) +cudnn.benchmark = True N = opt.N -loss_sum = [0.,0.,0.,0.] -loss_plot = [[],[]] +loss_sum = [0.,0.,0.,0.,0.,0] +loss_plot = [[],[],[],[]] item_plot = [] # list video dir @@ -71,22 +73,24 @@ for video in videonames: tmp.append(video) videonames = tmp video_num = len(videonames) -#def network + +#--------------------------Init network-------------------------- print('Init network...') if opt.hd: netG = videoHD_model.MosaicNet(3*N+1, 3, norm=opt.norm) else: netG = video_model.MosaicNet(3*N+1, 3, norm=opt.norm) +netG.cuda() loadmodel.show_paramsnumber(netG,'netG') if opt.gan: if opt.hd: - #netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=1) - netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=2,getIntermFeat=True) + netD = pix2pixHD_model.define_D(6, 64, opt.n_layers_D, norm = opt.norm, use_sigmoid=False, num_D=opt.num_D,getIntermFeat=True) else: netD = pix2pix_model.define_D(3*2, 64, 'basic', norm = opt.norm) - netD.train() + netD.cuda() +#--------------------------continue train-------------------------- if opt.continuetrain: if not os.path.isfile(os.path.join(dir_checkpoint,'last_G.pth')): opt.continuetrain = False @@ -99,58 +103,56 @@ if opt.continuetrain: opt.startiter = int(f.read()) f.close() +#--------------------------optimizer & loss-------------------------- optimizer_G = torch.optim.Adam(netG.parameters(), lr=opt.lr,betas=(opt.beta1, 0.999)) criterion_L1 = nn.L1Loss() criterion_L2 = nn.MSELoss() if opt.gan: optimizer_D = torch.optim.Adam(netD.parameters(), lr=opt.lr,betas=(opt.beta1, 0.999)) if opt.hd: - criterionGAN = pix2pixHD_model.GANLoss(tensor=torch.cuda.FloatTensor) + criterionGAN = pix2pixHD_model.GANLoss(tensor=torch.cuda.FloatTensor).cuda() + # criterionFeat = torch.nn.L1Loss() + criterionFeat = pix2pixHD_model.GAN_Feat_loss(opt) + criterionVGG = pix2pixHD_model.VGGLoss([opt.use_gpu]) else: criterionGAN = pix2pix_model.GANLoss(gan_mode='lsgan').cuda() -if opt.use_gpu: - netG.cuda() - if opt.gan: - netD.cuda() - criterionGAN.cuda() - cudnn.benchmark = True - ''' --------------------------preload data & data pool-------------------------- ''' -def loaddata(video_index): +# def loaddata(video_index): - videoname = videonames[video_index] - img_index = random.randint(int(N/2)+1,lengths[video_index]- int(N/2)-1) +# videoname = videonames[video_index] +# img_index = random.randint(int(N/2)+1,lengths[video_index]- int(N/2)-1) - input_img = np.zeros((opt.loadsize,opt.loadsize,3*N+1), dtype='uint8') - # this frame - this_mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index)+'.png'),'gray',loadsize=opt.loadsize) - input_img[:,:,-1] = this_mask - #print(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg')) - ground_true = impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg'),loadsize=opt.loadsize) - mosaic_size,mod,rect_rat,father = mosaic.get_random_parameter(ground_true,this_mask) - # merge other frame - for i in range(0,N): - img = impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index+i-int(N/2))+'.jpg'),loadsize=opt.loadsize) - mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index+i-int(N/2))+'.png'),'gray',loadsize=opt.loadsize) - img_mosaic = mosaic.addmosaic_base(img, mask, mosaic_size,model = mod,rect_rat=rect_rat,father=father) - input_img[:,:,i*3:(i+1)*3] = img_mosaic - # to tensor - input_img,ground_true = data.random_transform_video(input_img,ground_true,opt.finesize,N) - input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False) - ground_true = data.im2tensor(ground_true,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False) +# input_img = np.zeros((opt.loadsize,opt.loadsize,3*N+1), dtype='uint8') +# # this frame +# this_mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index)+'.png'),'gray',loadsize=opt.loadsize) +# input_img[:,:,-1] = this_mask +# #print(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg')) +# ground_true = impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg'),loadsize=opt.loadsize) +# mosaic_size,mod,rect_rat,feather = mosaic.get_random_parameter(ground_true,this_mask) +# start_pos = mosaic.get_random_startpos(num=N,bisa_p=0.3,bisa_max=mosaic_size,bisa_max_part=3) +# # merge other frame +# for i in range(0,N): +# img = impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index+i-int(N/2))+'.jpg'),loadsize=opt.loadsize) +# mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index+i-int(N/2))+'.png'),'gray',loadsize=opt.loadsize) +# img_mosaic = mosaic.addmosaic_base(img, mask, mosaic_size,model = mod,rect_rat=rect_rat,feather=feather,start_point=start_pos[i]) +# input_img[:,:,i*3:(i+1)*3] = img_mosaic +# # to tensor +# input_img,ground_true = data.random_transform_video(input_img,ground_true,opt.finesize,N) +# input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=-1,use_transform = False,is0_1=False) +# ground_true = data.im2tensor(ground_true,bgr2rgb=False,use_gpu=-1,use_transform = False,is0_1=False) - return input_img,ground_true +# return input_img,ground_true print('Preloading data, please wait...') if opt.perload_num <= opt.batchsize: opt.perload_num = opt.batchsize*2 #data pool -input_imgs = torch.rand(opt.perload_num,N*3+1,opt.finesize,opt.finesize).cuda() -ground_trues = torch.rand(opt.perload_num,3,opt.finesize,opt.finesize).cuda() +input_imgs = torch.rand(opt.perload_num,N*3+1,opt.finesize,opt.finesize) +ground_trues = torch.rand(opt.perload_num,3,opt.finesize,opt.finesize) load_cnt = 0 def preload(): @@ -158,8 +160,10 @@ def preload(): while 1: try: video_index = random.randint(0,video_num-1) - ran = random.randint(0, opt.perload_num-1) - input_imgs[ran],ground_trues[ran] = loaddata(video_index) + videoname = videonames[video_index] + img_index = random.randint(int(N/2)+1,lengths[video_index]- int(N/2)-1) + input_imgs[load_cnt%opt.perload_num],ground_trues[load_cnt%opt.perload_num] = data.load_train_video(videoname,img_index,opt) + # input_imgs[load_cnt%opt.perload_num],ground_trues[load_cnt%opt.perload_num] = loaddata(video_index) load_cnt += 1 # time.sleep(0.1) except Exception as e: @@ -172,7 +176,8 @@ time_start=time.time() while load_cnt < opt.perload_num: time.sleep(0.1) time_end=time.time() -print('load speed:',round((time_end-time_start)/opt.perload_num,3),'s/it') +util.writelog(os.path.join(dir_checkpoint,'loss.txt'), + 'load speed: '+str(round((time_end-time_start)/(opt.perload_num),3))+' s/it',True) ''' --------------------------train-------------------------- @@ -180,22 +185,24 @@ print('load speed:',round((time_end-time_start)/opt.perload_num,3),'s/it') util.copyfile('./train.py', os.path.join(dir_checkpoint,'train.py')) util.copyfile('../../models/videoHD_model.py', os.path.join(dir_checkpoint,'model.py')) netG.train() +netD.train() time_start=time.time() print("Begin training...") for iter in range(opt.startiter+1,opt.maxiter): - ran = random.randint(0, opt.perload_num-opt.batchsize-1) - inputdata = input_imgs[ran:ran+opt.batchsize].clone() - target = ground_trues[ran:ran+opt.batchsize].clone() + ran = random.randint(0, opt.perload_num-opt.batchsize) + inputdata = (input_imgs[ran:ran+opt.batchsize].clone()).cuda() + target = (ground_trues[ran:ran+opt.batchsize].clone()).cuda() if opt.gan: # compute fake images: G(A) pred = netG(inputdata) - # update D + real_A = inputdata[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:] + + # --------------------update D-------------------- pix2pix_model.set_requires_grad(netD,True) optimizer_D.zero_grad() # Fake - real_A = inputdata[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:] fake_AB = torch.cat((real_A, pred), 1) pred_fake = netD(fake_AB.detach()) loss_D_fake = criterionGAN(pred_fake, False) @@ -205,29 +212,51 @@ for iter in range(opt.startiter+1,opt.maxiter): loss_D_real = criterionGAN(pred_real, True) # combine loss and calculate gradients loss_D = (loss_D_fake + loss_D_real) * 0.5 - loss_sum[2] += loss_D_fake.item() - loss_sum[3] += loss_D_real.item() + loss_sum[4] += loss_D_fake.item() + loss_sum[5] += loss_D_real.item() # udpate D's weights loss_D.backward() optimizer_D.step() - # update G + # --------------------update G-------------------- pix2pix_model.set_requires_grad(netD,False) optimizer_G.zero_grad() + # First, G(A) should fake the discriminator - real_A = inputdata[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:] fake_AB = torch.cat((real_A, pred), 1) pred_fake = netD(fake_AB) loss_G_GAN = criterionGAN(pred_fake, True)*opt.lambda_gan - # Second, G(A) = B + # GAN feature matching loss + # if opt.hd: + # real_AB = torch.cat((real_A, target), 1) + # pred_real = netD(real_AB) + # loss_G_GAN_Feat=criterionFeat(pred_fake,pred_real) + # loss_G_GAN_Feat = 0 + # feat_weights = 4.0 / (opt.n_layers_D + 1) + # D_weights = 1.0 / opt.num_D + # for i in range(opt.num_D): + # for j in range(len(pred_fake[i])-1): + # loss_G_GAN_Feat += D_weights * feat_weights * criterionFeat(pred_fake[i][j], pred_real[i][j].detach()) * opt.lambda_feat + + # combine loss and calculate gradients if opt.l2: loss_G_L1 = (criterion_L1(pred, target)+criterion_L2(pred, target)) * opt.lambda_L1 else: loss_G_L1 = criterion_L1(pred, target) * opt.lambda_L1 - # combine loss and calculate gradients - loss_G = loss_G_GAN + loss_G_L1 + + if opt.hd: + real_AB = torch.cat((real_A, target), 1) + pred_real = netD(real_AB) + loss_G_GAN_Feat = criterionFeat(pred_fake,pred_real) + loss_VGG = criterionVGG(pred, target) * opt.lambda_feat + loss_G = loss_G_GAN + loss_G_L1 + loss_G_GAN_Feat + loss_VGG + else: + loss_G = loss_G_GAN + loss_G_L1 loss_sum[0] += loss_G_L1.item() loss_sum[1] += loss_G_GAN.item() + loss_sum[2] += loss_G_GAN_Feat.item() + loss_sum[3] += loss_VGG.item() + # udpate G's weights loss_G.backward() optimizer_G.step() @@ -244,64 +273,67 @@ for iter in range(opt.startiter+1,opt.maxiter): loss_G_L1.backward() optimizer_G.step() - if (iter+1)%100 == 0: + # save eval result + if (iter+1)%1000 == 0: + video_index = random.randint(0,video_num-1) + videoname = videonames[video_index] + img_index = random.randint(int(N/2)+1,lengths[video_index]- int(N/2)-1) + inputdata,target = data.load_train_video(videoname, img_index, opt) + + # inputdata,target = loaddata(random.randint(0,video_num-1)) + inputdata,target = inputdata.cuda(),target.cuda() + with torch.no_grad(): + pred = netG(inputdata) try: data.showresult(inputdata[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:], - target, pred,os.path.join(dir_checkpoint,'result_train.jpg')) + target, pred, os.path.join(dir_checkpoint,'result_eval.jpg')) except Exception as e: print(e) + # plot if (iter+1)%1000 == 0: time_end = time.time() if opt.gan: - savestr ='iter:{0:d} L1_loss:{1:.4f} G_loss:{2:.4f} D_f:{3:.4f} D_r:{4:.4f} time:{5:.2f}'.format( + savestr ='iter:{0:d} L1_loss:{1:.3f} GAN_loss:{2:.3f} Feat:{3:.3f} VGG:{4:.3f} time:{5:.2f}'.format( iter+1,loss_sum[0]/1000,loss_sum[1]/1000,loss_sum[2]/1000,loss_sum[3]/1000,(time_end-time_start)/1000) util.writelog(os.path.join(dir_checkpoint,'loss.txt'), savestr,True) if (iter+1)/1000 >= 10: - loss_plot[0].append(loss_sum[0]/1000) - loss_plot[1].append(loss_sum[1]/1000) - item_plot.append(iter+1) - try: - plt.plot(item_plot,loss_plot[0]) - plt.plot(item_plot,loss_plot[1]) - plt.savefig(os.path.join(dir_checkpoint,'loss.jpg')) - plt.close() - except Exception as e: - print("error:",e) - else: - savestr ='iter:{0:d} L1_loss:{1:.4f} time:{2:.2f}'.format(iter+1,loss_sum[0]/1000,(time_end-time_start)/1000) - util.writelog(os.path.join(dir_checkpoint,'loss.txt'), savestr,True) - if (iter+1)/1000 >= 10: - loss_plot[0].append(loss_sum[0]/1000) + for i in range(4):loss_plot[i].append(loss_sum[i]/1000) item_plot.append(iter+1) try: - plt.plot(item_plot,loss_plot[0]) + labels = ['L1_loss','GAN_loss','GAN_Feat_loss','VGG_loss'] + for i in range(4):plt.plot(item_plot,loss_plot[i],label=labels[i]) + plt.xlabel('iter') + plt.legend(loc=1) plt.savefig(os.path.join(dir_checkpoint,'loss.jpg')) plt.close() except Exception as e: print("error:",e) - loss_sum = [0.,0.,0.,0.] + + loss_sum = [0.,0.,0.,0.,0.,0.] time_start=time.time() # save network - if (iter+1)%opt.savefreq == 0: - if iter+1 != opt.savefreq: - os.rename(os.path.join(dir_checkpoint,'last_G.pth'),os.path.join(dir_checkpoint,str(iter+1-opt.savefreq)+'G.pth')) + if (iter+1)%(opt.savefreq//10) == 0: torch.save(netG.cpu().state_dict(),os.path.join(dir_checkpoint,'last_G.pth')) if opt.gan: - if iter+1 != opt.savefreq: - os.rename(os.path.join(dir_checkpoint,'last_D.pth'),os.path.join(dir_checkpoint,str(iter+1-opt.savefreq)+'D.pth')) torch.save(netD.cpu().state_dict(),os.path.join(dir_checkpoint,'last_D.pth')) - if opt.use_gpu: + if opt.use_gpu !=-1 : netG.cuda() if opt.gan: netD.cuda() f = open(os.path.join(dir_checkpoint,'iter'),'w+') f.write(str(iter+1)) f.close() + + if (iter+1)%opt.savefreq == 0: + os.rename(os.path.join(dir_checkpoint,'last_G.pth'),os.path.join(dir_checkpoint,str(iter+1)+'G.pth')) + if opt.gan: + os.rename(os.path.join(dir_checkpoint,'last_D.pth'),os.path.join(dir_checkpoint,str(iter+1)+'D.pth')) print('network saved.') - #test + #test + if (iter+1)%opt.savefreq == 0: if os.path.isdir('./test'): netG.eval() diff --git a/util/data.py b/util/data.py index 1ffb0e1376aa21cf1ed178db7ac0585cb7bfe1bd..c76dfa336262a0ecdd3b298c1b5f8889b3c506a4 100755 --- a/util/data.py +++ b/util/data.py @@ -1,10 +1,11 @@ import random +import os import numpy as np import torch import torchvision.transforms as transforms import cv2 -from .image_processing import color_adjust,dctblur - +from . import image_processing as impro +from . import mosaic transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5)) @@ -34,7 +35,7 @@ def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True ,is0_1 = return image_numpy.astype(imtype) -def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape = True, use_gpu = True, use_transform = True,is0_1 = True): +def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape = True, use_gpu = 0, use_transform = True,is0_1 = True): if gray: h, w = image_numpy.shape @@ -57,7 +58,7 @@ def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape = image_tensor = torch.from_numpy(image_numpy).float() if reshape: image_tensor = image_tensor.reshape(1,ch,h,w) - if use_gpu: + if use_gpu != -1: image_tensor = image_tensor.cuda() return image_tensor @@ -68,12 +69,18 @@ def shuffledata(data,target): np.random.shuffle(target) def random_transform_video(src,target,finesize,N): + #random blur + if random.random()<0.2: + h,w = src.shape[:2] + src = src[:8*(h//8),:8*(w//8)] + Q_ran = random.randint(1,15) + src[:,:,:3*N] = impro.dctblur(src[:,:,:3*N],Q_ran) + target = impro.dctblur(target,Q_ran) #random crop h,w = target.shape[:2] h_move = int((h-finesize)*random.random()) w_move = int((w-finesize)*random.random()) - # print(h,w,h_move,w_move) target = target[h_move:h_move+finesize,w_move:w_move+finesize,:] src = src[h_move:h_move+finesize,w_move:w_move+finesize,:] @@ -89,10 +96,10 @@ def random_transform_video(src,target,finesize,N): g = random.uniform(-0.05,0.05) r = random.uniform(-0.05,0.05) for i in range(N): - src[:,:,i*3:(i+1)*3] = color_adjust(src[:,:,i*3:(i+1)*3],alpha,beta,b,g,r) - target = color_adjust(target,alpha,beta,b,g,r) + src[:,:,i*3:(i+1)*3] = impro.color_adjust(src[:,:,i*3:(i+1)*3],alpha,beta,b,g,r) + target = impro.color_adjust(target,alpha,beta,b,g,r) - #random blur + #random resize blur if random.random()<0.5: interpolations = [cv2.INTER_LINEAR,cv2.INTER_CUBIC,cv2.INTER_LANCZOS4] size_ran = random.uniform(0.7,1.5) @@ -156,7 +163,7 @@ def random_transform_image(img,mask,finesize,test_flag = False): img,mask = img_crop,mask_crop #random color - img = color_adjust(img,ran=True) + img = impro.color_adjust(img,ran=True) #random flip if random.random()<0.5: @@ -169,7 +176,7 @@ def random_transform_image(img,mask,finesize,test_flag = False): #random blur if random.random()<0.5: - img = dctblur(img,random.randint(1,15)) + img = impro.dctblur(img,random.randint(1,15)) # interpolations = [cv2.INTER_LINEAR,cv2.INTER_CUBIC,cv2.INTER_LANCZOS4] # size_ran = random.uniform(0.7,1.5) @@ -183,6 +190,31 @@ def random_transform_image(img,mask,finesize,test_flag = False): print('warning! shape error.') return img,mask + +def load_train_video(videoname,img_index,opt): + N = opt.N + input_img = np.zeros((opt.loadsize,opt.loadsize,3*N+1), dtype='uint8') + # this frame + this_mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index)+'.png'),'gray',loadsize=opt.loadsize) + input_img[:,:,-1] = this_mask + #print(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg')) + ground_true = impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg'),loadsize=opt.loadsize) + mosaic_size,mod,rect_rat,feather = mosaic.get_random_parameter(ground_true,this_mask) + start_pos = mosaic.get_random_startpos(num=N,bisa_p=0.3,bisa_max=mosaic_size,bisa_max_part=3) + # merge other frame + for i in range(0,N): + img = impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index+i-int(N/2))+'.jpg'),loadsize=opt.loadsize) + mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index+i-int(N/2))+'.png'),'gray',loadsize=opt.loadsize) + img_mosaic = mosaic.addmosaic_base(img, mask, mosaic_size,model = mod,rect_rat=rect_rat,feather=feather,start_point=start_pos[i]) + input_img[:,:,i*3:(i+1)*3] = img_mosaic + # to tensor + input_img,ground_true = random_transform_video(input_img,ground_true,opt.finesize,N) + input_img = im2tensor(input_img,bgr2rgb=False,use_gpu=-1,use_transform = False,is0_1=False) + ground_true = im2tensor(ground_true,bgr2rgb=False,use_gpu=-1,use_transform = False,is0_1=False) + + return input_img,ground_true + + def showresult(img1,img2,img3,name,is0_1 = False): size = img1.shape[3] showimg=np.zeros((size,size*3,3)) diff --git a/util/image_processing.py b/util/image_processing.py index 4ba9c78b085c8c41391ff053122287a438f1b0fa..95783b2056ba5d37b69c5a1cfa92df8e1c5e0042 100755 --- a/util/image_processing.py +++ b/util/image_processing.py @@ -117,44 +117,17 @@ def makedataset(target_image,orgin_image): img[0:256,256:512] = orgin_image[0:256,int(w/2-256/2):int(w/2+256/2)] return img -def spiltimage(img,size = 128): - h, w = img.shape[:2] - # size = min(h,w) - if w >= h: - img1 = img[:,0:size] - img2 = img[:,w-size:w] - else: - img1 = img[0:size,:] - img2 = img[h-size:h,:] - - return img1,img2 - -def mergeimage(img1,img2,orgin_image,size = 128): - h, w = orgin_image.shape[:2] - new_img1 = np.zeros((h,w), dtype = "uint8") - new_img2 = np.zeros((h,w), dtype = "uint8") - - # size = min(h,w) - if w >= h: - new_img1[:,0:size]=img1 - new_img2[:,w-size:w]=img2 - else: - new_img1[0:size,:]=img1 - new_img2[h-size:h,:]=img2 - result_img = cv2.add(new_img1,new_img2) - return result_img - -def block_dct_and_idct(g,QQF): - T = cv2.dct(g) - IT = np.round(cv2.idct(np.round(np.round(16.0*T/QQF)*QQF/16))) - return IT +def block_dct_and_idct(g,QQF,QQF_16): + return cv2.idct(np.round(16.0*cv2.dct(g)/QQF)*QQF_16) def image_dct_and_idct(I,QF): h,w = I.shape QQF = DCT_Q*QF - for i in range(int(h/8)): - for j in range(int(w/8)): - I[i*8:(i+1)*8,j*8:(j+1)*8] = block_dct_and_idct(I[i*8:(i+1)*8,j*8:(j+1)*8],QQF) + QQF_16 = QQF/16.0 + for i in range(h//8): + for j in range(w//8): + I[i*8:(i+1)*8,j*8:(j+1)*8] = cv2.idct(np.round(16.0*cv2.dct(I[i*8:(i+1)*8,j*8:(j+1)*8])/QQF)*QQF_16) + #I[i*8:(i+1)*8,j*8:(j+1)*8] = block_dct_and_idct(I[i*8:(i+1)*8,j*8:(j+1)*8],QQF,QQF_16) return I def dctblur(img,Q): @@ -162,7 +135,7 @@ def dctblur(img,Q): Q: 1~20, 1->best ''' h,w = img.shape[:2] - img[:8*int(h/8),:8*int(w/8)] + img = img[:8*(h//8),:8*(w//8)] img = img.astype(np.float32) if img.ndim == 2: img = image_dct_and_idct(img, Q) @@ -250,9 +223,9 @@ def Q_lapulase(resImg): score = res.var() return score -def replace_mosaic(img_origin,img_fake,mask,x,y,size,no_father): +def replace_mosaic(img_origin,img_fake,mask,x,y,size,no_feather): img_fake = cv2.resize(img_fake,(size*2,size*2),interpolation=cv2.INTER_LANCZOS4) - if no_father: + if no_feather: img_origin[y-size:y+size,x-size:x+size]=img_fake img_result = img_origin else: diff --git a/util/mosaic.py b/util/mosaic.py index 936227b39430b49bf90013d6157d86d7de01ec6f..c76a24841e285c5d0d0b60f9f708602dd5c4b8ad 100755 --- a/util/mosaic.py +++ b/util/mosaic.py @@ -13,7 +13,7 @@ def addmosaic(img,mask,opt): img = addmosaic_base(img,mask,opt.mosaic_size,opt.output_size,model = opt.mosaic_mod) return img -def addmosaic_base(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6,father=0): +def addmosaic_base(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6,feather=0,start_point=[0,0]): ''' img: input image mask: input mask @@ -21,37 +21,50 @@ def addmosaic_base(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6,fat out_size: output size 0->original model : squa_avg squa_mid squa_random squa_avg_circle_edge rect_avg rect_rat: if model==rect_avg , mosaic w/h=rect_rat - father : father size, -1->no 0->auto + feather : feather size, -1->no 0->auto + start_point : [0,0], please not input this parameter ''' n = int(n) + + h_start = np.clip(start_point[0], 0, n) + w_start = np.clip(start_point[1], 0, n) + pix_mid_h = n//2+h_start + pix_mid_w = n//2+w_start + h, w = img.shape[:2] + h_step = (h-h_start)//n + w_step = (w-w_start)//n if out_size: img = resize(img,out_size) - h, w = img.shape[:2] - mask = cv2.resize(mask,(w,h)) - img_mosaic=img.copy() + if mask.shape[0] != h: + mask = cv2.resize(mask,(w,h)) + img_mosaic = img.copy() if model=='squa_avg': - for i in range(int(h/n)): - for j in range(int(w/n)): - if mask[int(i*n+n/2),int(j*n+n/2)] == 255: - img_mosaic[i*n:(i+1)*n,j*n:(j+1)*n,:]=img[i*n:(i+1)*n,j*n:(j+1)*n,:].mean(0).mean(0) + for i in range(h_step): + for j in range(w_step): + if mask[i*n+pix_mid_h,j*n+pix_mid_w]: + img_mosaic[i*n+h_start:(i+1)*n+h_start,j*n+w_start:(j+1)*n+w_start,:]=\ + img[i*n+h_start:(i+1)*n+h_start,j*n+w_start:(j+1)*n+w_start,:].mean(axis=(0,1)) elif model=='squa_mid': - for i in range(int(h/n)): - for j in range(int(w/n)): - if mask[int(i*n+n/2),int(j*n+n/2)] == 255: - img_mosaic[i*n:(i+1)*n,j*n:(j+1)*n,:]=img[i*n+int(n/2),j*n+int(n/2),:] + for i in range(h_step): + for j in range(w_step): + if mask[i*n+pix_mid_h,j*n+pix_mid_w]: + img_mosaic[i*n+h_start:(i+1)*n+h_start,j*n+w_start:(j+1)*n+w_start,:]=\ + img[i*n+n//2+h_start,j*n+n//2+w_start,:] elif model == 'squa_random': - for i in range(int(h/n)): - for j in range(int(w/n)): - if mask[int(i*n+n/2),int(j*n+n/2)] == 255: - img_mosaic[i*n:(i+1)*n,j*n:(j+1)*n,:]=img[int(i*n-n/2+n*random.random()),int(j*n-n/2+n*random.random()),:] + for i in range(h_step): + for j in range(w_step): + if mask[i*n+pix_mid_h,j*n+pix_mid_w]: + img_mosaic[i*n+h_start:(i+1)*n+h_start,j*n+w_start:(j+1)*n+w_start,:]=\ + img[h_start+int(i*n-n/2+n*random.random()),w_start+int(j*n-n/2+n*random.random()),:] elif model == 'squa_avg_circle_edge': - for i in range(int(h/n)): - for j in range(int(w/n)): - img_mosaic[i*n:(i+1)*n,j*n:(j+1)*n,:]=img[i*n:(i+1)*n,j*n:(j+1)*n,:].mean(0).mean(0) + for i in range(h_step): + for j in range(w_step): + img_mosaic[i*n+h_start:(i+1)*n+h_start,j*n+w_start:(j+1)*n+w_start,:]=\ + img[i*n+h_start:(i+1)*n+h_start,j*n+w_start:(j+1)*n+w_start,:].mean(axis=(0,1)) mask = cv2.threshold(mask,127,255,cv2.THRESH_BINARY)[1] _mask = ch_one2three(mask) mask_inv = cv2.bitwise_not(_mask) @@ -60,20 +73,24 @@ def addmosaic_base(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6,fat img_mosaic = cv2.add(imgroi1,imgroi2) elif model =='rect_avg': - n_h=n - n_w=int(n*rect_rat) - for i in range(int(h/n_h)): - for j in range(int(w/n_w)): - if mask[int(i*n_h+n_h/2),int(j*n_w+n_w/2)] == 255: - img_mosaic[i*n_h:(i+1)*n_h,j*n_w:(j+1)*n_w,:]=img[i*n_h:(i+1)*n_h,j*n_w:(j+1)*n_w,:].mean(0).mean(0) + n_h = n + n_w = int(n*rect_rat) + n_h_half = n_h//2+h_start + n_w_half = n_w//2+w_start + for i in range((h-h_start)//n_h): + for j in range((w-w_start)//n_w): + if mask[i*n_h+n_h_half,j*n_w+n_w_half]: + img_mosaic[i*n_h+h_start:(i+1)*n_h+h_start,j*n_w+w_start:(j+1)*n_w+w_start,:]=\ + img[i*n_h+h_start:(i+1)*n_h+h_start,j*n_w+w_start:(j+1)*n_w+w_start,:].mean(axis=(0,1)) - if father != -1: - if father==0: + if feather != -1: + if feather==0: mask = (cv2.blur(mask, (n, n))) else: - mask = (cv2.blur(mask, (father, father))) - mask = ch_one2three(mask)/255.0 - img_mosaic = (img*(1-mask)+img_mosaic*mask).astype('uint8') + mask = (cv2.blur(mask, (feather, feather))) + mask = mask/255.0 + for i in range(3):img_mosaic[:,:,i] = (img[:,:,i]*(1-mask)+img_mosaic[:,:,i]*mask) + img_mosaic = img_mosaic.astype(np.uint8) return img_mosaic @@ -108,19 +125,21 @@ def get_random_parameter(img,mask): p = np.array([0.5,0.5]) mod = np.random.choice(['normal','bounding'], p = p.ravel()) mosaic_size = get_autosize(img,mask,area_type = mod) - mosaic_size = int(mosaic_size*random.uniform(0.9,2.1)) + mosaic_size = int(mosaic_size*random.uniform(0.9,2.5)) # mosaic mod - p = np.array([0.25, 0.25, 0.1, 0.4]) - mod = np.random.choice(['squa_mid','squa_avg','squa_avg_circle_edge','rect_avg'], p = p.ravel()) + p = np.array([0.25, 0.3, 0.45]) + mod = np.random.choice(['squa_mid','squa_avg','rect_avg'], p = p.ravel()) # rect_rat for rect_avg rect_rat = random.uniform(1.1,1.6) - # father size - father = int(mosaic_size*random.uniform(0,1.5)) + # feather size + feather = -1 + if random.random()<0.7: + feather = int(mosaic_size*random.uniform(0,1.5)) - return mosaic_size,mod,rect_rat,father + return mosaic_size,mod,rect_rat,feather def addmosaic_autosize(img,mask,model,area_type = 'normal'): @@ -129,6 +148,17 @@ def addmosaic_autosize(img,mask,model,area_type = 'normal'): return img_mosaic def addmosaic_random(img,mask): - mosaic_size,mod,rect_rat,father = get_random_parameter(img,mask) - img_mosaic = addmosaic_base(img,mask,mosaic_size,model = mod,rect_rat=rect_rat,father=father) - return img_mosaic \ No newline at end of file + mosaic_size,mod,rect_rat,feather = get_random_parameter(img,mask) + img_mosaic = addmosaic_base(img,mask,mosaic_size,model = mod,rect_rat=rect_rat,feather=feather) + return img_mosaic + +def get_random_startpos(num,bisa_p,bisa_max,bisa_max_part): + pos = np.zeros((num,2), dtype=np.int64) + if random.random()