From 8f4e9158d1b0ded134607e4f3f6ec68330788928 Mon Sep 17 00:00:00 2001 From: hypox64 Date: Sun, 18 Apr 2021 21:41:42 +0800 Subject: [PATCH] Fix frame leak --- deepmosaic.py | 2 +- models/BVDNet.py | 44 ++++++++++++------------ models/model_util.py | 46 ++++++++++++++++++++++++- train/clean/train.py | 82 ++++++++++++++++++++------------------------ 4 files changed, 106 insertions(+), 68 deletions(-) diff --git a/deepmosaic.py b/deepmosaic.py index cfc8717..2571d38 100644 --- a/deepmosaic.py +++ b/deepmosaic.py @@ -76,7 +76,7 @@ if __name__ == '__main__': except Exception as ex: print('--------------------ERROR--------------------') print('--------------Environment--------------') - print('DeepMosaics: 0.4.0') + print('DeepMosaics: 0.5.0') print('Python:',sys.version) import torch print('Pytorch:',torch.__version__) diff --git a/models/BVDNet.py b/models/BVDNet.py index eb26f04..ab5adeb 100644 --- a/models/BVDNet.py +++ b/models/BVDNet.py @@ -2,13 +2,13 @@ import torch import torch.nn as nn import torch.nn.functional as F from .pix2pixHD_model import * +from .model_util import * class Encoder2d(nn.Module): - def __init__(self, input_nc, ngf=64, n_downsampling=3, norm_layer=nn.BatchNorm2d): + def __init__(self, input_nc, ngf=64, n_downsampling=3, norm_layer=nn.BatchNorm2d, activation = nn.ReLU(True)): super(Encoder2d, self).__init__() - activation = nn.ReLU(True) - + model = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0), norm_layer(ngf), activation] ### downsample for i in range(n_downsampling): @@ -22,10 +22,9 @@ class Encoder2d(nn.Module): return self.model(input) class Encoder3d(nn.Module): - def __init__(self, input_nc, ngf=64, n_downsampling=3, norm_layer=nn.BatchNorm3d): + def __init__(self, input_nc, ngf=64, n_downsampling=3, norm_layer=nn.BatchNorm3d,activation = nn.ReLU(True)): super(Encoder3d, self).__init__() - activation = nn.ReLU(True) - + model = [nn.Conv3d(input_nc, ngf, kernel_size=3, padding=1), norm_layer(ngf), activation] ### downsample for i in range(n_downsampling): @@ -39,17 +38,18 @@ class Encoder3d(nn.Module): return self.model(input) class BVDNet(nn.Module): - def __init__(self, N, n_downsampling=3, n_blocks=1, input_nc=3, output_nc=3): + def __init__(self, N, n_downsampling=3, n_blocks=1, input_nc=3, output_nc=3,norm='batch',activation=nn.LeakyReLU(0.2)): super(BVDNet, self).__init__() ngf = 64 padding_type = 'reflect' - norm_layer = nn.BatchNorm2d + norm_layer = get_norm_layer(norm,'2d') + norm_layer_3d = get_norm_layer(norm,'3d') self.N = N - # encoder - self.encoder3d = Encoder3d(input_nc,64,n_downsampling) - self.encoder2d = Encoder2d(input_nc,64,n_downsampling) + # encoder + self.encoder3d = Encoder3d(input_nc,64,n_downsampling,norm_layer_3d,activation) + self.encoder2d = Encoder2d(input_nc,64,n_downsampling,norm_layer,activation) ### resnet blocks self.blocks = [] @@ -62,31 +62,31 @@ class BVDNet(nn.Module): self.decoder = [] for i in range(n_downsampling): mult = 2**(n_downsampling - i) - # self.decoder += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1), - # norm_layer(int(ngf * mult / 2)), nn.ReLU(True)] - self.decoder += [ nn.Upsample(scale_factor = 2, mode='nearest'), - nn.ReflectionPad2d(1), - nn.Conv2d(ngf * mult, int(ngf * mult / 2),kernel_size=3, stride=1, padding=0), - norm_layer(int(ngf * mult / 2)), - nn.ReLU(True)] + self.decoder += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1), + norm_layer(int(ngf * mult / 2)), activation] + # self.decoder += [ nn.Upsample(scale_factor = 2, mode='nearest'), + # nn.ReflectionPad2d(1), + # nn.Conv2d(ngf * mult, int(ngf * mult / 2),kernel_size=3, stride=1, padding=0), + # norm_layer(int(ngf * mult / 2)), + # activation] self.decoder += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] self.decoder = nn.Sequential(*self.decoder) self.limiter = nn.Tanh() - def forward(self, stream, last): + def forward(self, stream, previous): this_shortcut = stream[:,:,self.N] stream = self.encoder3d(stream) stream = stream.reshape(stream.size(0),stream.size(1),stream.size(3),stream.size(4)) # print(stream.shape) - last = self.encoder2d(last) - x = stream + last + previous = self.encoder2d(previous) + x = stream + previous x = self.blocks(x) x = self.decoder(x) x = x+this_shortcut x = self.limiter(x) #print(x.shape) - # print(stream.shape,last.shape) + # print(stream.shape,previous.shape) return x class VGGLoss(nn.Module): diff --git a/models/model_util.py b/models/model_util.py index 8f1789f..8865d74 100644 --- a/models/model_util.py +++ b/models/model_util.py @@ -1,5 +1,8 @@ import torch import torch.nn as nn +from torch.nn import init +import functools + def save(net,path,gpu_id): if isinstance(net, nn.DataParallel): @@ -7,4 +10,45 @@ def save(net,path,gpu_id): else: torch.save(net.cpu().state_dict(),path) if gpu_id != '-1': - net.cuda() \ No newline at end of file + net.cuda() + +def get_norm_layer(norm_type='instance',mod = '2d'): + if norm_type == 'batch': + if mod == '2d': + norm_layer = functools.partial(nn.BatchNorm2d, affine=True) + elif mod == '3d': + norm_layer = functools.partial(nn.BatchNorm3d, affine=True) + elif norm_type == 'instance': + if mod == '2d': + norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=True) + elif mod =='3d': + norm_layer = functools.partial(nn.InstanceNorm3d, affine=False, track_running_stats=True) + elif norm_type == 'none': + norm_layer = None + else: + raise NotImplementedError('normalization layer [%s] is not found' % norm_type) + + return norm_layer + +def init_weights(net, init_type='normal', gain=0.02): + def init_func(m): + classname = m.__class__.__name__ + if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): + if init_type == 'normal': + init.normal_(m.weight.data, 0.0, gain) + elif init_type == 'xavier': + init.xavier_normal_(m.weight.data, gain=gain) + elif init_type == 'kaiming': + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif init_type == 'orthogonal': + init.orthogonal_(m.weight.data, gain=gain) + else: + raise NotImplementedError('initialization method [%s] is not implemented' % init_type) + if hasattr(m, 'bias') and m.bias is not None: + init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm2d') != -1: + init.normal_(m.weight.data, 1.0, gain) + init.constant_(m.bias.data, 0.0) + + print('initialize network with %s' % init_type) + net.apply(init_func) \ No newline at end of file diff --git a/train/clean/train.py b/train/clean/train.py index 49c886f..a944cd6 100644 --- a/train/clean/train.py +++ b/train/clean/train.py @@ -36,6 +36,7 @@ opt.parser.add_argument('--lambda_VGG',type=float,default=0.1, help='') opt.parser.add_argument('--load_thread',type=int,default=4, help='number of thread for loading data') opt.parser.add_argument('--dataset',type=str,default='./datasets/face/', help='') +opt.parser.add_argument('--dataset_test',type=str,default='./datasets/face_test/', help='') opt.parser.add_argument('--n_epoch',type=int,default=200, help='') opt.parser.add_argument('--save_freq',type=int,default=100000, help='') opt.parser.add_argument('--continue_train', action='store_true', help='') @@ -46,9 +47,11 @@ opt.parser.add_argument('--psnr_freq',type=int,default=100, help='') class TrainVideoLoader(object): """docstring for VideoLoader + Load a single video(Converted to images) + How to use: 1.Init TrainVideoLoader as loader 2.Get data by loader.ori_stream - 3.loader.next() + 3.loader.next() to get next stream """ def __init__(self, opt, video_dir, test_flag=False): super(TrainVideoLoader, self).__init__() @@ -60,7 +63,7 @@ class TrainVideoLoader(object): self.transform_params = data.get_transform_params() self.ori_load_pool = [] self.mosaic_load_pool = [] - self.last_pred = None + self.previous_pred = None feg_ori = impro.imread(os.path.join(video_dir,'origin_image','00001.jpg'),loadsize=self.opt.loadsize,rgb=True) feg_mask = impro.imread(os.path.join(video_dir,'mask','00001.png'),mod='gray',loadsize=self.opt.loadsize) self.mosaic_size,self.mod,self.rect_rat,self.feather = mosaic.get_random_parameter(feg_ori,feg_mask) @@ -72,8 +75,6 @@ class TrainVideoLoader(object): _ori_img = impro.imread(os.path.join(video_dir,'origin_image','%05d' % (i+1)+'.jpg'),loadsize=self.opt.loadsize,rgb=True) _mask = impro.imread(os.path.join(video_dir,'mask','%05d' % (i+1)+'.png' ),mod='gray',loadsize=self.opt.loadsize) _mosaic_img = mosaic.addmosaic_base(_ori_img, _mask, self.mosaic_size,0, self.mod,self.rect_rat,self.feather,self.startpos) - # _ori_img = data.random_transform_single_image(_ori_img, opt.finesize,self.transform_params,self.test_flag) - # _mosaic_img = data.random_transform_single_image(_mosaic_img, opt.finesize,self.transform_params,self.test_flag) self.ori_load_pool.append(self.normalize(_ori_img)) self.mosaic_load_pool.append(self.normalize(_mosaic_img)) self.ori_load_pool = np.array(self.ori_load_pool) @@ -87,28 +88,29 @@ class TrainVideoLoader(object): self.mosaic_stream = self.mosaic_stream.reshape(1,self.opt.T,opt.finesize,opt.finesize,3).transpose((0,4,1,2,3)) #Init frist previous frame - self.last_pred = self.ori_load_pool[self.opt.S*self.opt.N-1].copy() + self.previous_pred = self.ori_load_pool[self.opt.S*self.opt.N-1].copy() # previous B,C,H,W - self.last_pred = self.last_pred.reshape(1,opt.finesize,opt.finesize,3).transpose((0,3,1,2)) + self.previous_pred = self.previous_pred.reshape(1,opt.finesize,opt.finesize,3).transpose((0,3,1,2)) def normalize(self,data): + ''' + normalize to -1 ~ 1 + ''' return (data.astype(np.float32)/255.0-0.5)/0.5 + + def anti_normalize(self,data): + return np.clip((data*0.5+0.5)*255,0,255).astype(np.uint8) def next(self): if self.t != 0: - self.last_pred = None + self.previous_pred = None self.ori_load_pool [:self.opt.S*self.opt.T-1] = self.ori_load_pool [1:self.opt.S*self.opt.T] self.mosaic_load_pool[:self.opt.S*self.opt.T-1] = self.mosaic_load_pool[1:self.opt.S*self.opt.T] #print(os.path.join(self.video_dir,'origin_image','%05d' % (self.opt.S*self.opt.T+self.t)+'.jpg')) _ori_img = impro.imread(os.path.join(self.video_dir,'origin_image','%05d' % (self.opt.S*self.opt.T+self.t)+'.jpg'),loadsize=self.opt.loadsize,rgb=True) _mask = impro.imread(os.path.join(self.video_dir,'mask','%05d' % (self.opt.S*self.opt.T+self.t)+'.png' ),mod='gray',loadsize=self.opt.loadsize) _mosaic_img = mosaic.addmosaic_base(_ori_img, _mask, self.mosaic_size,0, self.mod,self.rect_rat,self.feather,self.startpos) - # if np.random.random() < 0.01: - # print('1') - # cv2.imwrite(util.randomstr(10)+'.jpg', _ori_img) - # _ori_img = data.random_transform_single_image(_ori_img, opt.finesize,self.transform_params,self.test_flag) - # _mosaic_img = data.random_transform_single_image(_mosaic_img, opt.finesize,self.transform_params,self.test_flag) _ori_img,_mosaic_img = self.normalize(_ori_img),self.normalize(_mosaic_img) self.ori_load_pool [self.opt.S*self.opt.T-1] = _ori_img self.mosaic_load_pool[self.opt.S*self.opt.T-1] = _mosaic_img @@ -116,11 +118,6 @@ class TrainVideoLoader(object): self.ori_stream = self.ori_load_pool [np.linspace(0, (self.opt.T-1)*self.opt.S,self.opt.T,dtype=np.int64)].copy() self.mosaic_stream = self.mosaic_load_pool[np.linspace(0, (self.opt.T-1)*self.opt.S,self.opt.T,dtype=np.int64)].copy() - if np.random.random() < 0.01: - # print(self.ori_stream[0,0].shape) - print('1') - cv2.imwrite(util.randomstr(10)+'.jpg', self.ori_stream[0]) - # stream B,T,H,W,C -> B,C,T,H,W self.ori_stream = self.ori_stream.reshape (1,self.opt.T,opt.finesize,opt.finesize,3).transpose((0,4,1,2,3)) self.mosaic_stream = self.mosaic_stream.reshape(1,self.opt.T,opt.finesize,opt.finesize,3).transpose((0,4,1,2,3)) @@ -141,8 +138,9 @@ class DataLoader(object): self.n_iter = len(self.videolist)//self.opt.load_thread//self.opt.batchsize*self.each_video_n_iter*self.opt.load_thread self.queue = Queue(self.opt.load_thread) self.ori_stream = np.zeros((self.opt.batchsize,3,self.opt.T,self.opt.finesize,self.opt.finesize),dtype=np.float32)# B,C,T,H,W - self.mosaic_stream = self.ori_stream.copy() - self.last_pred = np.zeros((self.opt.batchsize,3,self.opt.finesize,self.opt.finesize),dtype=np.float32) + self.mosaic_stream = np.zeros((self.opt.batchsize,3,self.opt.T,self.opt.finesize,self.opt.finesize),dtype=np.float32)# B,C,T,H,W + self.previous_pred = np.zeros((self.opt.batchsize,3,self.opt.finesize,self.opt.finesize),dtype=np.float32) + self.load_init() def load(self,videolist): for load_video_iter in range(len(videolist)//self.opt.batchsize): @@ -153,12 +151,12 @@ class DataLoader(object): self.ori_stream[i] = videoloaders[i].ori_stream self.mosaic_stream[i] = videoloaders[i].mosaic_stream if each_video_iter == 0: - self.last_pred[i] = videoloaders[i].last_pred + self.previous_pred[i] = videoloaders[i].previous_pred videoloaders[i].next() if each_video_iter == 0: - self.queue.put([self.ori_stream,self.mosaic_stream,self.last_pred]) + self.queue.put([self.ori_stream.copy(),self.mosaic_stream.copy(),self.previous_pred]) else: - self.queue.put([self.ori_stream,self.mosaic_stream,None]) + self.queue.put([self.ori_stream.copy(),self.mosaic_stream.copy(),None]) def load_init(self): ptvn = len(self.videolist)//self.opt.load_thread #pre_thread_video_num @@ -209,31 +207,28 @@ videolist_train = videolist[:int(len(videolist)*0.8)].copy() videolist_eval = videolist[int(len(videolist)*0.8):].copy() dataloader_train = DataLoader(opt, videolist_train) -dataloader_train.load_init() dataloader_eval = DataLoader(opt, videolist_eval) -dataloader_eval.load_init() -previous_predframe_train = 0 -previous_predframe_eval = 0 +previous_predframe_tmp = 0 for train_iter in range(dataloader_train.n_iter): t_start = time.time() # train - ori_stream,mosaic_stream,last_frame = dataloader_train.get_data() + ori_stream,mosaic_stream,previous_frame = dataloader_train.get_data() ori_stream = data.to_tensor(ori_stream, opt.use_gpu) mosaic_stream = data.to_tensor(mosaic_stream, opt.use_gpu) - if last_frame is None: - last_frame = data.to_tensor(previous_predframe_train, opt.use_gpu) + if previous_frame is None: + previous_frame = data.to_tensor(previous_predframe_tmp, opt.use_gpu) else: - last_frame = data.to_tensor(last_frame, opt.use_gpu) + previous_frame = data.to_tensor(previous_frame, opt.use_gpu) optimizer.zero_grad() - out = net(mosaic_stream,last_frame) + out = net(mosaic_stream,previous_frame) loss_L1 = lossf_L1(out,ori_stream[:,:,opt.N]) loss_VGG = lossf_VGG(out,ori_stream[:,:,opt.N]) * opt.lambda_VGG TBGlobalWriter.add_scalars('loss/train', {'L1':loss_L1.item(),'VGG':loss_VGG.item()}, train_iter) loss = loss_L1+loss_VGG loss.backward() optimizer.step() - previous_predframe_train = out.detach().cpu().numpy() + previous_predframe_tmp = out.detach().cpu().numpy() # save network if train_iter%opt.save_freq == 0 and train_iter != 0: @@ -257,19 +252,19 @@ for train_iter in range(dataloader_train.n_iter): # eval if (train_iter)%5 ==0: - ori_stream,mosaic_stream,last_frame = dataloader_eval.get_data() + ori_stream,mosaic_stream,previous_frame = dataloader_eval.get_data() ori_stream = data.to_tensor(ori_stream, opt.use_gpu) mosaic_stream = data.to_tensor(mosaic_stream, opt.use_gpu) - if last_frame is None: - last_frame = data.to_tensor(previous_predframe_eval, opt.use_gpu) + if previous_frame is None: + previous_frame = data.to_tensor(previous_predframe_tmp, opt.use_gpu) else: - last_frame = data.to_tensor(last_frame, opt.use_gpu) + previous_frame = data.to_tensor(previous_frame, opt.use_gpu) with torch.no_grad(): - out = net(mosaic_stream,last_frame) + out = net(mosaic_stream,previous_frame) loss_L1 = lossf_L1(out,ori_stream[:,:,opt.N]) loss_VGG = lossf_VGG(out,ori_stream[:,:,opt.N]) * opt.lambda_VGG TBGlobalWriter.add_scalars('loss/eval', {'L1':loss_L1.item(),'VGG':loss_VGG.item()}, train_iter) - previous_predframe_eval = out.detach().cpu().numpy() + previous_predframe_tmp = out.detach().cpu().numpy() #psnr if (train_iter)%opt.psnr_freq ==0: @@ -292,19 +287,18 @@ for train_iter in range(dataloader_train.n_iter): t_strat = time.time() # test - test_dir = '../../datasets/video_test' - if train_iter % opt.showresult_freq == 0 and os.path.isdir(test_dir): + if train_iter % opt.showresult_freq == 0 and os.path.isdir(opt.dataset_test): show_imgs = [] - videos = os.listdir(test_dir) + videos = os.listdir(opt.dataset_test) sorted(videos) for video in videos: - frames = os.listdir(os.path.join(test_dir,video,'image')) + frames = os.listdir(os.path.join(opt.dataset_test,video,'image')) sorted(frames) mosaic_stream = [] for i in range(opt.T): - _mosaic = impro.imread(os.path.join(test_dir,video,'image',frames[i*opt.S]),loadsize=opt.finesize,rgb=True) + _mosaic = impro.imread(os.path.join(opt.dataset_test,video,'image',frames[i*opt.S]),loadsize=opt.finesize,rgb=True) mosaic_stream.append(_mosaic) - previous = impro.imread(os.path.join(test_dir,video,'image',frames[opt.N*opt.S-1]),loadsize=opt.finesize,rgb=True) + previous = impro.imread(os.path.join(opt.dataset_test,video,'image',frames[opt.N*opt.S-1]),loadsize=opt.finesize,rgb=True) mosaic_stream = (np.array(mosaic_stream).astype(np.float32)/255.0-0.5)/0.5 mosaic_stream = mosaic_stream.reshape(1,opt.T,opt.finesize,opt.finesize,3).transpose((0,4,1,2,3)) mosaic_stream = data.to_tensor(mosaic_stream, opt.use_gpu) -- GitLab