diff --git a/cores/core.py b/cores/core.py index db992177dd07d3c1a33ea9f924182c6d8c98f3e5..9fb858a91bbe271e3634ba42d7ed7e7f4be7ab30 100644 --- a/cores/core.py +++ b/cores/core.py @@ -283,7 +283,7 @@ def cleanmosaic_video_fusion(opt,netG,netM): mosaic_input[:,:,k*3:(k+1)*3] = impro.resize(img_pool[k][y-size:y+size,x-size:x+size], INPUT_SIZE) mask_input = impro.resize(mask,np.min(img_origin.shape[:2]))[y-size:y+size,x-size:x+size] mosaic_input[:,:,-1] = impro.resize(mask_input, INPUT_SIZE) - mosaic_input_tensor = data.im2tensor(mosaic_input,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1 = False) + mosaic_input_tensor = data.im2tensor(mosaic_input,bgr2rgb=False,gpu_id=opt.gpu_id,use_transform = False,is0_1 = False) unmosaic_pred = netG(mosaic_input_tensor) img_fake = data.tensor2im(unmosaic_pred,rgb2bgr = False ,is0_1 = False) img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather) diff --git a/cores/options.py b/cores/options.py index 6c972b5ef559ae9bc863b19307c56333b70d3ea4..58759f8c468c41a5cb3e5fd7e2dd94d2aad8ffca 100644 --- a/cores/options.py +++ b/cores/options.py @@ -11,7 +11,7 @@ class Options(): def initialize(self): #base - self.parser.add_argument('--use_gpu', type=str,default='0', help='if -1, use cpu') + self.parser.add_argument('--gpu_id', type=str,default='0', help='if -1, use cpu') self.parser.add_argument('--media_path', type=str, default='./imgs/ruoruo.jpg',help='your videos or images path') self.parser.add_argument('-ss', '--start_time', type=str, default='00:00:00',help='start position of video, default is the beginning of video') self.parser.add_argument('-t', '--last_time', type=str, default='00:00:00',help='duration of the video, default is the entire video') @@ -60,13 +60,13 @@ class Options(): self.opt.temp_dir = os.path.join(self.opt.temp_dir, 'DeepMosaics_temp') - if self.opt.use_gpu != '-1': - os.environ["CUDA_VISIBLE_DEVICES"] = str(self.opt.use_gpu) + if self.opt.gpu_id != '-1': + os.environ["CUDA_VISIBLE_DEVICES"] = str(self.opt.gpu_id) import torch if not torch.cuda.is_available(): - self.opt.use_gpu = '-1' + self.opt.gpu_id = '-1' # else: - # self.opt.use_gpu = '-1' + # self.opt.gpu_id = '-1' if test_flag: if not os.path.exists(self.opt.media_path): diff --git a/make_datasets/make_pix2pix_dataset.py b/make_datasets/make_pix2pix_dataset.py index 4256f6c444bbf4df182f6c295efece6f4dbc4fb7..7dcae2561f583fb5182065373e5c8d7f834956d7 100644 --- a/make_datasets/make_pix2pix_dataset.py +++ b/make_datasets/make_pix2pix_dataset.py @@ -91,7 +91,7 @@ for fold in range(opt.fold): mask = mask_irr if 'network' in opt.mod: mask_net = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0] - if opt.use_gpu != -1: + if opt.gpu_id != -1: torch.cuda.empty_cache() if not opt.all_mosaic_area: mask_net = impro.find_mostlikely_ROI(mask_net) diff --git a/make_datasets/make_video_dataset.py b/make_datasets/make_video_dataset.py index b9ce6cb86c439ccc2468580cfd231dd2a892a221..64c7fb77ff2d8dfe7346272256e7f116ccfd2894 100644 --- a/make_datasets/make_video_dataset.py +++ b/make_datasets/make_video_dataset.py @@ -56,6 +56,7 @@ for videopath in videopaths: ffmpeg.video2image(videopath, opt.temp_dir+'/video2image/%05d.'+opt.tempimage_type,fps=1, start_time = util.second2stamp(cut_point*opt.interval),last_time = util.second2stamp(opt.time)) imagepaths = util.Traversal(opt.temp_dir+'/video2image') + imagepaths = sorted(imagepaths) cnt = 0 for i in range(opt.time): img = impro.imread(imagepaths[i]) @@ -124,5 +125,5 @@ for videopath in videopaths: util.writelog(os.path.join(opt.savedir,'opt.txt'), videopath+'\n'+str(result_cnt)+'\n'+str(e)) video_cnt +=1 - if opt.use_gpu != -1: + if opt.gpu_id != '-1': torch.cuda.empty_cache() diff --git a/models/loadmodel.py b/models/loadmodel.py index 974064afcd1e9328b46934ecf48f76d92a4708b3..75ca731a8b9b6b7691f5f6d020a610669a6c2e93 100755 --- a/models/loadmodel.py +++ b/models/loadmodel.py @@ -34,7 +34,7 @@ def pix2pix(opt): show_paramsnumber(netG,'netG') netG.load_state_dict(torch.load(opt.model_path)) netG.eval() - if opt.use_gpu != -1: + if opt.gpu_id != -1: netG.cuda() return netG @@ -60,7 +60,7 @@ def style(opt): __patch_instance_norm_state_dict(state_dict, netG, key.split('.')) netG.load_state_dict(state_dict) - if opt.use_gpu != -1: + if opt.gpu_id != -1: netG.cuda() return netG @@ -72,7 +72,7 @@ def video(opt): show_paramsnumber(netG,'netG') netG.load_state_dict(torch.load(opt.model_path)) netG.eval() - if opt.use_gpu != -1: + if opt.gpu_id != -1: netG.cuda() return netG @@ -87,6 +87,6 @@ def bisenet(opt,type='roi'): elif type == 'mosaic': net.load_state_dict(torch.load(opt.mosaic_position_model_path)) net.eval() - if opt.use_gpu != -1: + if opt.gpu_id != -1: net.cuda() return net diff --git a/models/runmodel.py b/models/runmodel.py index 2bdc88d8df80759775927159d6fb78df7f4345b0..bba1fe40c13d214f9306742f6f1af599e6957bc3 100755 --- a/models/runmodel.py +++ b/models/runmodel.py @@ -7,9 +7,9 @@ from util import data import torch import numpy as np -def run_segment(img,net,size = 360,use_gpu = 0): +def run_segment(img,net,size = 360,gpu_id = 0): img = impro.resize(img,size) - img = data.im2tensor(img,use_gpu = use_gpu, bgr2rgb = False,use_transform = False , is0_1 = True) + img = data.im2tensor(img,gpu_id = gpu_id, bgr2rgb = False,use_transform = False , is0_1 = True) mask = net(img) mask = data.tensor2im(mask, gray=True,rgb2bgr = False, is0_1 = True) return mask @@ -19,7 +19,7 @@ def run_pix2pix(img,net,opt): img = impro.resize(img,512) else: img = impro.resize(img,128) - img = data.im2tensor(img,use_gpu=opt.use_gpu) + img = data.im2tensor(img,gpu_id=opt.gpu_id) img_fake = net(img) img_fake = data.tensor2im(img_fake) return img_fake @@ -53,15 +53,15 @@ def run_styletransfer(opt, net, img): img = cv2.Canny(img,opt.canny-50,opt.canny+50) if opt.only_edges: return img - img = data.im2tensor(img,use_gpu=opt.use_gpu,gray=True,use_transform = False,is0_1 = False) + img = data.im2tensor(img,gpu_id=opt.gpu_id,gray=True,use_transform = False,is0_1 = False) else: - img = data.im2tensor(img,use_gpu=opt.use_gpu,gray=False,use_transform = True) + img = data.im2tensor(img,gpu_id=opt.gpu_id,gray=False,use_transform = True) img = net(img) img = data.tensor2im(img) return img def get_ROI_position(img,net,opt,keepsize=True): - mask = run_segment(img,net,size=360,use_gpu = opt.use_gpu) + mask = run_segment(img,net,size=360,gpu_id = opt.gpu_id) mask = impro.mask_threshold(mask,opt.mask_extend,opt.mask_threshold) if keepsize: mask = impro.resize_like(mask, img) @@ -70,7 +70,7 @@ def get_ROI_position(img,net,opt,keepsize=True): def get_mosaic_position(img_origin,net_mosaic_pos,opt): h,w = img_origin.shape[:2] - mask = run_segment(img_origin,net_mosaic_pos,size=360,use_gpu = opt.use_gpu) + mask = run_segment(img_origin,net_mosaic_pos,size=360,gpu_id = opt.gpu_id) # mask_1 = mask.copy() mask = impro.mask_threshold(mask,ex_mun=int(min(h,w)/20),threshold=opt.mask_threshold) if not opt.all_mosaic_area: diff --git a/train/add/train.py b/train/add/train.py index 8ccc6ee5711290cf032be4a7cb918423678db0ca..a2f04eff39967943375e8a53234616853e170659 100644 --- a/train/add/train.py +++ b/train/add/train.py @@ -54,10 +54,10 @@ util.makedirs(dir_checkpoint) util.writelog(os.path.join(dir_checkpoint,'loss.txt'), str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt)) -def Totensor(img,use_gpu=True): +def Totensor(img,gpu_id=True): size=img.shape[0] img = torch.from_numpy(img).float() - if opt.use_gpu != -1: + if opt.gpu_id != -1: img = img.cuda() return img @@ -71,8 +71,8 @@ def loadimage(imagepaths,maskpaths,opt,test_flag = False): img,mask = data.random_transform_pair_image(img, mask, opt.finesize, test_flag) images[i] = (img.transpose((2, 0, 1))/255.0) masks[i] = (mask.reshape(1,1,opt.finesize,opt.finesize)/255.0) - images = Totensor(images,opt.use_gpu) - masks = Totensor(masks,opt.use_gpu) + images = Totensor(images,opt.gpu_id) + masks = Totensor(masks,opt.gpu_id) return images,masks @@ -111,7 +111,7 @@ if opt.continue_train: f = open(os.path.join(dir_checkpoint,'epoch_log.txt'),'r') opt.startepoch = int(f.read()) f.close() -if opt.use_gpu != -1: +if opt.gpu_id != -1: net.cuda() cudnn.benchmark = True @@ -135,7 +135,7 @@ for epoch in range(opt.startepoch,opt.maxepoch): starttime = datetime.datetime.now() util.writelog(os.path.join(dir_checkpoint,'loss.txt'),'Epoch {}/{}.'.format(epoch + 1, opt.maxepoch),True) net.train() - if opt.use_gpu != -1: + if opt.gpu_id != -1: net.cuda() epoch_loss = 0 for i in range(int(img_num*0.8/opt.batchsize)): diff --git a/train/clean/train.py b/train/clean/train.py index a944cd6b462665b42fc842b0bdb297cd4fc26c8c..c8269966033ca75553e360c53ce0d18b208ab5eb 100644 --- a/train/clean/train.py +++ b/train/clean/train.py @@ -75,6 +75,9 @@ class TrainVideoLoader(object): _ori_img = impro.imread(os.path.join(video_dir,'origin_image','%05d' % (i+1)+'.jpg'),loadsize=self.opt.loadsize,rgb=True) _mask = impro.imread(os.path.join(video_dir,'mask','%05d' % (i+1)+'.png' ),mod='gray',loadsize=self.opt.loadsize) _mosaic_img = mosaic.addmosaic_base(_ori_img, _mask, self.mosaic_size,0, self.mod,self.rect_rat,self.feather,self.startpos) + _ori_img = data.random_transform_single_image(_ori_img,opt.finesize,self.transform_params) + _mosaic_img = data.random_transform_single_image(_mosaic_img,opt.finesize,self.transform_params) + self.ori_load_pool.append(self.normalize(_ori_img)) self.mosaic_load_pool.append(self.normalize(_mosaic_img)) self.ori_load_pool = np.array(self.ori_load_pool) @@ -110,7 +113,9 @@ class TrainVideoLoader(object): _ori_img = impro.imread(os.path.join(self.video_dir,'origin_image','%05d' % (self.opt.S*self.opt.T+self.t)+'.jpg'),loadsize=self.opt.loadsize,rgb=True) _mask = impro.imread(os.path.join(self.video_dir,'mask','%05d' % (self.opt.S*self.opt.T+self.t)+'.png' ),mod='gray',loadsize=self.opt.loadsize) _mosaic_img = mosaic.addmosaic_base(_ori_img, _mask, self.mosaic_size,0, self.mod,self.rect_rat,self.feather,self.startpos) - + _ori_img = data.random_transform_single_image(_ori_img,opt.finesize,self.transform_params) + _mosaic_img = data.random_transform_single_image(_mosaic_img,opt.finesize,self.transform_params) + _ori_img,_mosaic_img = self.normalize(_ori_img),self.normalize(_mosaic_img) self.ori_load_pool [self.opt.S*self.opt.T-1] = _ori_img self.mosaic_load_pool[self.opt.S*self.opt.T-1] = _mosaic_img @@ -184,17 +189,17 @@ TBGlobalWriter = SummaryWriter(tensorboard_savedir) net = BVDNet.BVDNet(opt.N) -if opt.use_gpu != '-1' and len(opt.use_gpu) == 1: +if opt.gpu_id != '-1' and len(opt.gpu_id) == 1: torch.backends.cudnn.benchmark = True net.cuda() -elif opt.use_gpu != '-1' and len(opt.use_gpu) > 1: +elif opt.gpu_id != '-1' and len(opt.gpu_id) > 1: torch.backends.cudnn.benchmark = True net = nn.DataParallel(net) net.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=opt.lr, betas=(opt.beta1, opt.beta2)) lossf_L1 = nn.L1Loss() -lossf_VGG = BVDNet.VGGLoss([opt.use_gpu]) +lossf_VGG = BVDNet.VGGLoss([opt.gpu_id]) videolist_tmp = os.listdir(opt.dataset) videolist = [] @@ -214,12 +219,12 @@ for train_iter in range(dataloader_train.n_iter): t_start = time.time() # train ori_stream,mosaic_stream,previous_frame = dataloader_train.get_data() - ori_stream = data.to_tensor(ori_stream, opt.use_gpu) - mosaic_stream = data.to_tensor(mosaic_stream, opt.use_gpu) + ori_stream = data.to_tensor(ori_stream, opt.gpu_id) + mosaic_stream = data.to_tensor(mosaic_stream, opt.gpu_id) if previous_frame is None: - previous_frame = data.to_tensor(previous_predframe_tmp, opt.use_gpu) + previous_frame = data.to_tensor(previous_predframe_tmp, opt.gpu_id) else: - previous_frame = data.to_tensor(previous_frame, opt.use_gpu) + previous_frame = data.to_tensor(previous_frame, opt.gpu_id) optimizer.zero_grad() out = net(mosaic_stream,previous_frame) loss_L1 = lossf_L1(out,ori_stream[:,:,opt.N]) @@ -232,7 +237,7 @@ for train_iter in range(dataloader_train.n_iter): # save network if train_iter%opt.save_freq == 0 and train_iter != 0: - model_util.save(net, os.path.join('checkpoints',opt.savename,str(train_iter)+'.pth'), opt.use_gpu) + model_util.save(net, os.path.join('checkpoints',opt.savename,str(train_iter)+'.pth'), opt.gpu_id) # psnr if train_iter%opt.psnr_freq ==0: @@ -253,12 +258,12 @@ for train_iter in range(dataloader_train.n_iter): # eval if (train_iter)%5 ==0: ori_stream,mosaic_stream,previous_frame = dataloader_eval.get_data() - ori_stream = data.to_tensor(ori_stream, opt.use_gpu) - mosaic_stream = data.to_tensor(mosaic_stream, opt.use_gpu) + ori_stream = data.to_tensor(ori_stream, opt.gpu_id) + mosaic_stream = data.to_tensor(mosaic_stream, opt.gpu_id) if previous_frame is None: - previous_frame = data.to_tensor(previous_predframe_tmp, opt.use_gpu) + previous_frame = data.to_tensor(previous_predframe_tmp, opt.gpu_id) else: - previous_frame = data.to_tensor(previous_frame, opt.use_gpu) + previous_frame = data.to_tensor(previous_frame, opt.gpu_id) with torch.no_grad(): out = net(mosaic_stream,previous_frame) loss_L1 = lossf_L1(out,ori_stream[:,:,opt.N]) @@ -301,8 +306,8 @@ for train_iter in range(dataloader_train.n_iter): previous = impro.imread(os.path.join(opt.dataset_test,video,'image',frames[opt.N*opt.S-1]),loadsize=opt.finesize,rgb=True) mosaic_stream = (np.array(mosaic_stream).astype(np.float32)/255.0-0.5)/0.5 mosaic_stream = mosaic_stream.reshape(1,opt.T,opt.finesize,opt.finesize,3).transpose((0,4,1,2,3)) - mosaic_stream = data.to_tensor(mosaic_stream, opt.use_gpu) - previous = data.im2tensor(previous,bgr2rgb = False, use_gpu = opt.use_gpu,use_transform = False, is0_1 = False) + mosaic_stream = data.to_tensor(mosaic_stream, opt.gpu_id) + previous = data.im2tensor(previous,bgr2rgb = False, gpu_id = opt.gpu_id,use_transform = False, is0_1 = False) with torch.no_grad(): out = net(mosaic_stream,previous) show_imgs+= [data.tensor2im(mosaic_stream[:,:,opt.N],rgb2bgr = False),data.tensor2im(out,rgb2bgr = False)] diff --git a/train/clean/train_old.py b/train/clean/train_old.py deleted file mode 100644 index 70efb416ef05edc9c8037f8cd807b9dd3b1e5671..0000000000000000000000000000000000000000 --- a/train/clean/train_old.py +++ /dev/null @@ -1,310 +0,0 @@ -import os -import sys -sys.path.append("..") -sys.path.append("../..") -from cores import Options -opt = Options() - -import numpy as np -import cv2 -import random -import torch -import torch.nn as nn -import time -from multiprocessing import Process, Queue - -from util import mosaic,util,ffmpeg,filt,data -from util import image_processing as impro -from models import pix2pix_model,pix2pixHD_model,video_model,unet_model,loadmodel,videoHD_model -import matplotlib -matplotlib.use('Agg') -from matplotlib import pyplot as plt -import torch.backends.cudnn as cudnn - -''' ---------------------------Get options-------------------------- -''' -opt.parser.add_argument('--N',type=int,default=25, help='') -opt.parser.add_argument('--lr',type=float,default=0.0002, help='') -opt.parser.add_argument('--beta1',type=float,default=0.5, help='') -opt.parser.add_argument('--gan', action='store_true', help='if specified, use gan') -opt.parser.add_argument('--l2', action='store_true', help='if specified, use L2 loss') -opt.parser.add_argument('--hd', action='store_true', help='if specified, use HD model') -opt.parser.add_argument('--lambda_L1',type=float,default=100, help='') -opt.parser.add_argument('--lambda_gan',type=float,default=1, help='') -opt.parser.add_argument('--finesize',type=int,default=256, help='') -opt.parser.add_argument('--loadsize',type=int,default=286, help='') -opt.parser.add_argument('--batchsize',type=int,default=1, help='') -opt.parser.add_argument('--norm',type=str,default='instance', help='') -opt.parser.add_argument('--num_D', type=int, default=2, help='number of discriminators to use') -opt.parser.add_argument('--n_layers_D', type=int, default=3, help='only used if which_model_netD==n_layers') -opt.parser.add_argument('--lambda_feat', type=float, default=10.0, help='weight for feature matching loss') -opt.parser.add_argument('--image_pool',type=int,default=8, help='number of image load pool') -opt.parser.add_argument('--load_process',type=int,default=4, help='number of process for loading data') - -opt.parser.add_argument('--dataset',type=str,default='./datasets/face/', help='') -opt.parser.add_argument('--maxiter',type=int,default=10000000, help='') -opt.parser.add_argument('--savefreq',type=int,default=10000, help='') -opt.parser.add_argument('--startiter',type=int,default=0, help='') -opt.parser.add_argument('--continue_train', action='store_true', help='') -opt.parser.add_argument('--savename',type=str,default='face', help='') - - -''' ---------------------------Init-------------------------- -''' -opt = opt.getparse() -dir_checkpoint = os.path.join('checkpoints/',opt.savename) -util.makedirs(dir_checkpoint) -util.writelog(os.path.join(dir_checkpoint,'loss.txt'), - str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt)) -cudnn.benchmark = True - -N = opt.N -loss_sum = [0.,0.,0.,0.,0.,0] -loss_plot = [[],[],[],[]] -item_plot = [] - -# list video dir -videonames = os.listdir(opt.dataset) -videonames.sort() -lengths = [];tmp = [] -print('Check dataset...') -for video in videonames: - if video != 'opt.txt': - video_images = os.listdir(os.path.join(opt.dataset,video,'origin_image')) - lengths.append(len(video_images)) - tmp.append(video) -videonames = tmp -video_num = len(videonames) - -#--------------------------Init network-------------------------- -print('Init network...') -if opt.hd: - netG = videoHD_model.MosaicNet(3*N+1, 3, norm=opt.norm) -else: - netG = video_model.MosaicNet(3*N+1, 3, norm=opt.norm) -netG.cuda() -loadmodel.show_paramsnumber(netG,'netG') - -if opt.gan: - if opt.hd: - netD = pix2pixHD_model.define_D(6, 64, opt.n_layers_D, norm = opt.norm, use_sigmoid=False, num_D=opt.num_D,getIntermFeat=True) - else: - netD = pix2pix_model.define_D(3*2, 64, 'basic', norm = opt.norm) - netD.cuda() - netD.train() - -#--------------------------continue train-------------------------- -if opt.continue_train: - if not os.path.isfile(os.path.join(dir_checkpoint,'last_G.pth')): - opt.continue_train = False - print('can not load last_G, training on init weight.') -if opt.continue_train: - netG.load_state_dict(torch.load(os.path.join(dir_checkpoint,'last_G.pth'))) - if opt.gan: - netD.load_state_dict(torch.load(os.path.join(dir_checkpoint,'last_D.pth'))) - f = open(os.path.join(dir_checkpoint,'iter'),'r') - opt.startiter = int(f.read()) - f.close() - -#--------------------------optimizer & loss-------------------------- -optimizer_G = torch.optim.Adam(netG.parameters(), lr=opt.lr,betas=(opt.beta1, 0.999)) -criterion_L1 = nn.L1Loss() -criterion_L2 = nn.MSELoss() -if opt.gan: - optimizer_D = torch.optim.Adam(netD.parameters(), lr=opt.lr,betas=(opt.beta1, 0.999)) - if opt.hd: - criterionGAN = pix2pixHD_model.GANLoss(tensor=torch.cuda.FloatTensor).cuda() - criterionFeat = pix2pixHD_model.GAN_Feat_loss(opt) - criterionVGG = pix2pixHD_model.VGGLoss([opt.use_gpu]) - else: - criterionGAN = pix2pix_model.GANLoss(gan_mode='lsgan').cuda() - -''' ---------------------------preload data & data pool-------------------------- -''' -print('Preloading data, please wait...') -def preload(pool): - cnt = 0 - input_imgs = torch.rand(opt.batchsize,N*3+1,opt.finesize,opt.finesize) - ground_trues = torch.rand(opt.batchsize,3,opt.finesize,opt.finesize) - while 1: - try: - for i in range(opt.batchsize): - video_index = random.randint(0,video_num-1) - videoname = videonames[video_index] - img_index = random.randint(int(N/2)+1,lengths[video_index]- int(N/2)-1) - input_imgs[i],ground_trues[i] = data.load_train_video(videoname,img_index,opt) - cnt += 1 - pool.put([input_imgs,ground_trues]) - except Exception as e: - print("Error:",videoname,e) -pool = Queue(opt.image_pool) -for i in range(opt.load_process): - p = Process(target=preload,args=(pool,)) - p.daemon = True - p.start() - -''' ---------------------------train-------------------------- -''' -util.copyfile('./train.py', os.path.join(dir_checkpoint,'train.py')) -util.copyfile('../../models/videoHD_model.py', os.path.join(dir_checkpoint,'model.py')) -netG.train() -time_start=time.time() -print("Begin training...") -for iter in range(opt.startiter+1,opt.maxiter): - - inputdata,target = pool.get() - inputdata,target = inputdata.cuda(),target.cuda() - - if opt.gan: - # compute fake images: G(A) - pred = netG(inputdata) - real_A = inputdata[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:] - - # --------------------update D-------------------- - pix2pix_model.set_requires_grad(netD,True) - optimizer_D.zero_grad() - # Fake - fake_AB = torch.cat((real_A, pred), 1) - pred_fake = netD(fake_AB.detach()) - loss_D_fake = criterionGAN(pred_fake, False) - # Real - real_AB = torch.cat((real_A, target), 1) - pred_real = netD(real_AB) - loss_D_real = criterionGAN(pred_real, True) - # combine loss and calculate gradients - loss_D = (loss_D_fake + loss_D_real) * 0.5 - loss_sum[4] += loss_D_fake.item() - loss_sum[5] += loss_D_real.item() - # udpate D's weights - loss_D.backward() - optimizer_D.step() - - # --------------------update G-------------------- - pix2pix_model.set_requires_grad(netD,False) - optimizer_G.zero_grad() - - # First, G(A) should fake the discriminator - fake_AB = torch.cat((real_A, pred), 1) - pred_fake = netD(fake_AB) - loss_G_GAN = criterionGAN(pred_fake, True)*opt.lambda_gan - - # combine loss and calculate gradients - if opt.l2: - loss_G_L1 = (criterion_L1(pred, target)+criterion_L2(pred, target)) * opt.lambda_L1 - else: - loss_G_L1 = criterion_L1(pred, target) * opt.lambda_L1 - - if opt.hd: - real_AB = torch.cat((real_A, target), 1) - pred_real = netD(real_AB) - loss_G_GAN_Feat = criterionFeat(pred_fake,pred_real) - loss_VGG = criterionVGG(pred, target) * opt.lambda_feat - loss_G = loss_G_GAN + loss_G_L1 + loss_G_GAN_Feat + loss_VGG - else: - loss_G = loss_G_GAN + loss_G_L1 - loss_sum[0] += loss_G_L1.item() - loss_sum[1] += loss_G_GAN.item() - loss_sum[2] += loss_G_GAN_Feat.item() - loss_sum[3] += loss_VGG.item() - - # udpate G's weights - loss_G.backward() - optimizer_G.step() - - else: - pred = netG(inputdata) - if opt.l2: - loss_G_L1 = (criterion_L1(pred, target)+criterion_L2(pred, target)) * opt.lambda_L1 - else: - loss_G_L1 = criterion_L1(pred, target) * opt.lambda_L1 - loss_sum[0] += loss_G_L1.item() - - optimizer_G.zero_grad() - loss_G_L1.backward() - optimizer_G.step() - - # save train result - if (iter+1)%1000 == 0: - try: - data.showresult(inputdata[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:], - target, pred, os.path.join(dir_checkpoint,'result_train.jpg')) - except Exception as e: - print(e) - - # plot - if (iter+1)%1000 == 0: - time_end = time.time() - #if opt.gan: - savestr ='iter:{0:d} L1_loss:{1:.3f} GAN_loss:{2:.3f} Feat:{3:.3f} VGG:{4:.3f} time:{5:.2f}'.format( - iter+1,loss_sum[0]/1000,loss_sum[1]/1000,loss_sum[2]/1000,loss_sum[3]/1000,(time_end-time_start)/1000) - util.writelog(os.path.join(dir_checkpoint,'loss.txt'), savestr,True) - if (iter+1)/1000 >= 10: - for i in range(4):loss_plot[i].append(loss_sum[i]/1000) - item_plot.append(iter+1) - try: - labels = ['L1_loss','GAN_loss','GAN_Feat_loss','VGG_loss'] - for i in range(4):plt.plot(item_plot,loss_plot[i],label=labels[i]) - plt.xlabel('iter') - plt.legend(loc=1) - plt.savefig(os.path.join(dir_checkpoint,'loss.jpg')) - plt.close() - except Exception as e: - print("error:",e) - - loss_sum = [0.,0.,0.,0.,0.,0.] - time_start=time.time() - - # save network - if (iter+1)%(opt.savefreq//10) == 0: - torch.save(netG.cpu().state_dict(),os.path.join(dir_checkpoint,'last_G.pth')) - if opt.gan: - torch.save(netD.cpu().state_dict(),os.path.join(dir_checkpoint,'last_D.pth')) - if opt.use_gpu !=-1 : - netG.cuda() - if opt.gan: - netD.cuda() - f = open(os.path.join(dir_checkpoint,'iter'),'w+') - f.write(str(iter+1)) - f.close() - - if (iter+1)%opt.savefreq == 0: - os.rename(os.path.join(dir_checkpoint,'last_G.pth'),os.path.join(dir_checkpoint,str(iter+1)+'G.pth')) - if opt.gan: - os.rename(os.path.join(dir_checkpoint,'last_D.pth'),os.path.join(dir_checkpoint,str(iter+1)+'D.pth')) - print('network saved.') - - #test - if (iter+1)%opt.savefreq == 0: - if os.path.isdir('./test'): - netG.eval() - - test_names = os.listdir('./test') - test_names.sort() - result = np.zeros((opt.finesize*2,opt.finesize*len(test_names),3), dtype='uint8') - - for cnt,test_name in enumerate(test_names,0): - img_names = os.listdir(os.path.join('./test',test_name,'image')) - img_names.sort() - inputdata = np.zeros((opt.finesize,opt.finesize,3*N+1), dtype='uint8') - for i in range(0,N): - img = impro.imread(os.path.join('./test',test_name,'image',img_names[i])) - img = impro.resize(img,opt.finesize) - inputdata[:,:,i*3:(i+1)*3] = img - - mask = impro.imread(os.path.join('./test',test_name,'mask.png'),'gray') - mask = impro.resize(mask,opt.finesize) - mask = impro.mask_threshold(mask,15,128) - inputdata[:,:,-1] = mask - result[0:opt.finesize,opt.finesize*cnt:opt.finesize*(cnt+1),:] = inputdata[:,:,int((N-1)/2)*3:(int((N-1)/2)+1)*3] - inputdata = data.im2tensor(inputdata,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1 = False) - pred = netG(inputdata) - - pred = data.tensor2im(pred,rgb2bgr = False, is0_1 = False) - result[opt.finesize:opt.finesize*2,opt.finesize*cnt:opt.finesize*(cnt+1),:] = pred - - cv2.imwrite(os.path.join(dir_checkpoint,str(iter+1)+'_test.jpg'), result) - netG.train() \ No newline at end of file diff --git a/util/data.py b/util/data.py index 8884047aa0cf85acfe2525fcf29a41837a5107c5..83a14cd659dcf0cdcc3870c9dbc8685f52fb28a5 100755 --- a/util/data.py +++ b/util/data.py @@ -42,7 +42,7 @@ def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True ,is0_1 = return image_numpy.astype(imtype) -def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape = True, use_gpu = 0, use_transform = True,is0_1 = True): +def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape = True, gpu_id = 0, use_transform = True,is0_1 = True): if gray: h, w = image_numpy.shape @@ -65,7 +65,7 @@ def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape = image_tensor = torch.from_numpy(image_numpy).float() if reshape: image_tensor = image_tensor.reshape(1,ch,h,w) - if use_gpu != '-1': + if gpu_id != '-1': image_tensor = image_tensor.cuda() return image_tensor @@ -281,8 +281,8 @@ def random_transform_pair_image(img,mask,finesize,test_flag = False): # input_img[:,:,i*3:(i+1)*3] = img_mosaic # # to tensor # input_img,ground_true = random_transform_video(input_img,ground_true,opt.finesize,N) -# input_img = im2tensor(input_img,bgr2rgb=False,use_gpu=-1,use_transform = False,is0_1=False) -# ground_true = im2tensor(ground_true,bgr2rgb=False,use_gpu=-1,use_transform = False,is0_1=False) +# input_img = im2tensor(input_img,bgr2rgb=False,gpu_id=-1,use_transform = False,is0_1=False) +# ground_true = im2tensor(ground_true,bgr2rgb=False,gpu_id=-1,use_transform = False,is0_1=False) # return input_img,ground_true diff --git a/util/ffmpeg.py b/util/ffmpeg.py index 4eb668b4081acaef92a53d4ea761a2fe60bde0cf..20881422a4cbec633982d5f2ccad254e762b4881 100755 --- a/util/ffmpeg.py +++ b/util/ffmpeg.py @@ -1,5 +1,5 @@ import os,json - +import subprocess # ffmpeg 3.4.6 def args2cmd(args): @@ -32,10 +32,11 @@ def run(args,mode = 0): return sout def video2image(videopath, imagepath, fps=0, start_time='00:00:00', last_time='00:00:00'): - args = ['ffmpeg', '-i', '"'+videopath+'"'] + args = ['ffmpeg'] if last_time != '00:00:00': args += ['-ss', start_time] args += ['-t', last_time] + args += ['-i', '"'+videopath+'"'] if fps != 0: args += ['-r', str(fps)] args += ['-f', 'image2','-q:v','-0',imagepath]