From a6994b523a84f4e178f17a4ed5d315fad8c0a0ee Mon Sep 17 00:00:00 2001 From: hypox64 Date: Thu, 16 Jan 2020 23:56:40 +0800 Subject: [PATCH] add InstanceNorm --- .gitignore | 1 + models/pix2pix_model.py | 2 +- models/unet_model.py | 2 +- models/video_model.py | 26 ++++++++++++------ train/add/train.py | 4 +-- train/clean/train.py | 58 ++++++++++++++++++++--------------------- util/data.py | 8 +++--- util/util.py | 8 +++++- 8 files changed, 62 insertions(+), 47 deletions(-) diff --git a/.gitignore b/.gitignore index 33e3abc..ae22b8c 100644 --- a/.gitignore +++ b/.gitignore @@ -141,6 +141,7 @@ test*/ video_tmp/ result/ #./ +/pix2pix /pix2pixHD /tmp /to_make_show diff --git a/models/pix2pix_model.py b/models/pix2pix_model.py index 412844e..7db18c8 100755 --- a/models/pix2pix_model.py +++ b/models/pix2pix_model.py @@ -97,7 +97,7 @@ def init_weights(net, init_type='normal', init_gain=0.02): init.normal_(m.weight.data, 1.0, init_gain) init.constant_(m.bias.data, 0.0) - print('initialize network with %s' % init_type) + #print('initialize network with %s' % init_type) net.apply(init_func) # apply the initialization function diff --git a/models/unet_model.py b/models/unet_model.py index 18804b4..a0b5870 100755 --- a/models/unet_model.py +++ b/models/unet_model.py @@ -31,4 +31,4 @@ class UNet(nn.Module): x = self.up3(x, x2) x = self.up4(x, x1) x = self.outc(x) - return torch.sigmoid(x) + return torch.Tanh(x) \ No newline at end of file diff --git a/models/video_model.py b/models/video_model.py index 8363aa7..33db97c 100644 --- a/models/video_model.py +++ b/models/video_model.py @@ -4,13 +4,23 @@ import torch.nn.functional as F from .unet_parts import * from .pix2pix_model import * +Norm = 'batch' +if Norm == 'instance': + NormLayer_2d = nn.InstanceNorm2d + NormLayer_3d = nn.InstanceNorm3d + use_bias = False +else: + NormLayer_2d = nn.BatchNorm2d + NormLayer_3d = nn.BatchNorm3d + use_bias = True + class encoder_2d(nn.Module): """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations. We adapt Torch code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style) """ - def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, padding_type='reflect'): + def __init__(self, input_nc, output_nc, ngf=64, norm_layer=NormLayer_2d, use_dropout=False, n_blocks=6, padding_type='reflect'): """Construct a Resnet-based generator Parameters: @@ -55,7 +65,7 @@ class decoder_2d(nn.Module): We adapt Torch code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style) """ - def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, padding_type='reflect'): + def __init__(self, input_nc, output_nc, ngf=64, norm_layer=NormLayer_2d, use_dropout=False, n_blocks=6, padding_type='reflect'): """Construct a Resnet-based generator Parameters: @@ -114,8 +124,8 @@ class conv_3d(nn.Module): def __init__(self,inchannel,outchannel,kernel_size=3,stride=2,padding=1): super(conv_3d, self).__init__() self.conv = nn.Sequential( - nn.Conv3d(inchannel, outchannel, kernel_size=kernel_size, stride=stride, padding=padding, bias=False), - nn.BatchNorm3d(outchannel), + nn.Conv3d(inchannel, outchannel, kernel_size=kernel_size, stride=stride, padding=padding, bias=use_bias), + NormLayer_3d(outchannel), nn.ReLU(inplace=True), ) @@ -128,8 +138,8 @@ class conv_2d(nn.Module): super(conv_2d, self).__init__() self.conv = nn.Sequential( nn.ReflectionPad2d(padding), - nn.Conv2d(inchannel, outchannel, kernel_size=kernel_size, stride=stride, padding=0, bias=False), - nn.BatchNorm2d(outchannel), + nn.Conv2d(inchannel, outchannel, kernel_size=kernel_size, stride=stride, padding=0, bias=use_bias), + NormLayer_2d(outchannel), nn.ReLU(inplace=True), ) @@ -145,8 +155,8 @@ class encoder_3d(nn.Module): self.down2 = conv_3d(64, 128, 3, 2, 1) self.down3 = conv_3d(128, 256, 3, 1, 1) self.conver2d = nn.Sequential( - nn.Conv2d(256*int(in_channel/4), 256, kernel_size=3, stride=1, padding=1, bias=False), - nn.BatchNorm2d(256), + nn.Conv2d(256*int(in_channel/4), 256, kernel_size=3, stride=1, padding=1, bias=use_bias), + NormLayer_2d(256), nn.ReLU(inplace=True), ) diff --git a/train/add/train.py b/train/add/train.py index f616afb..4629e95 100644 --- a/train/add/train.py +++ b/train/add/train.py @@ -50,8 +50,8 @@ def Toinputshape(imgs,masks,finesize): # print(imgs[i].shape,masks[i].shape) img,mask = data.random_transform_image(imgs[i], masks[i], finesize) # print(img.shape,mask.shape) - mask = mask.reshape(1,finesize,finesize)/255.0 - img = img.transpose((2, 0, 1))/255.0 + mask = (mask.reshape(1,finesize,finesize)/255.0-0.5)/0.5 + img = (img.transpose((2, 0, 1))/255.0-0.5)/0.5 result_imgs.append(img) result_masks.append(mask) result_imgs = np.array(result_imgs) diff --git a/train/clean/train.py b/train/clean/train.py index b251ba9..1992a7d 100644 --- a/train/clean/train.py +++ b/train/clean/train.py @@ -18,21 +18,21 @@ import torch.backends.cudnn as cudnn N = 25 ITER = 10000000 -LR = 0.001 +LR = 0.0002 beta1 = 0.5 use_gpu = True use_gan = False use_L2 = True -CONTINUE = True -lambda_L1 = 1.0#100.0 -lambda_gan = 1.0 +CONTINUE = False +lambda_L1 = 100.0 +lambda_gan = 1 SAVE_FRE = 10000 start_iter = 0 finesize = 128 loadsize = int(finesize*1.1) -batchsize = 8 -perload_num = 32 +batchsize = 1 +perload_num = 16 savename = 'MosaicNet_test' dir_checkpoint = 'checkpoints/'+savename util.makedirs(dir_checkpoint) @@ -45,6 +45,7 @@ opt = Options().getparse() videos = os.listdir('./dataset') videos.sort() lengths = [] +print('check dataset...') for video in videos: video_images = os.listdir('./dataset/'+video+'/ori') lengths.append(len(video_images)) @@ -55,7 +56,8 @@ netG = video_model.MosaicNet(3*N+1, 3) loadmodel.show_paramsnumber(netG,'netG') # netG = unet_model.UNet(3*N+1, 3) if use_gan: - netD = pix2pix_model.define_D(3*2+1, 64, 'basic', n_layers_D=3, norm='instance', init_type='normal', init_gain=0.02, gpu_ids=[]) + #netD = pix2pix_model.define_D(3*2+1, 64, 'pixel', norm='instance') + netD = pix2pix_model.define_D(3*2+1, 64, 'basic', norm='instance') #netD = pix2pix_model.define_D(3*2+1, 64, 'n_layers', n_layers_D=5, norm='instance', init_type='normal', init_gain=0.02, gpu_ids=[]) if CONTINUE: @@ -104,26 +106,19 @@ def loaddata(): return input_img,ground_true print('preloading data, please wait 5s...') -# input_imgs=[] -# ground_trues=[] -input_imgs = torch.rand(batchsize,N*3+1,finesize,finesize).cuda() -ground_trues = torch.rand(batchsize,3,finesize,finesize).cuda() + +if perload_num <= batchsize: + perload_num = batchsize*2 +input_imgs = torch.rand(perload_num,N*3+1,finesize,finesize).cuda() +ground_trues = torch.rand(perload_num,3,finesize,finesize).cuda() load_cnt = 0 def preload(): global load_cnt while 1: try: - # input_img,ground_true = loaddata() - # input_imgs.append(input_img) - # ground_trues.append(ground_true) - ran = random.randint(0, batchsize-1) + ran = random.randint(0, perload_num-1) input_imgs[ran],ground_trues[ran] = loaddata() - - - # if len(input_imgs)>perload_num: - # del(input_imgs[0]) - # del(ground_trues[0]) load_cnt += 1 # time.sleep(0.1) except Exception as e: @@ -133,21 +128,24 @@ import threading t = threading.Thread(target=preload,args=()) #t为新创建的线程 t.daemon = True t.start() -while load_cnt < batchsize*2: + +time_start=time.time() +while load_cnt < perload_num: time.sleep(0.1) +time_end=time.time() +print('load speed:',round((time_end-time_start)/perload_num,3),'s/it') + +util.copyfile('./train.py', os.path.join(dir_checkpoint,'train.py')) +util.copyfile('../../models/video_model.py', os.path.join(dir_checkpoint,'model.py')) netG.train() time_start=time.time() print("Begin training...") for iter in range(start_iter+1,ITER): - # inputdata,target = loaddata() - # ran = random.randint(1, perload_num-2) - # inputdata = inputdatas[ran] - # target = targets[ran] - - inputdata = input_imgs.clone() - target = ground_trues.clone() + ran = random.randint(0, perload_num-batchsize-1) + inputdata = input_imgs[ran:ran+batchsize].clone() + target = ground_trues[ran:ran+batchsize].clone() pred = netG(inputdata) @@ -262,13 +260,13 @@ for iter in range(start_iter+1,ITER): netG.eval() test_names = os.listdir('./test') + test_names.sort() result = np.zeros((finesize*2,finesize*len(test_names),3), dtype='uint8') for cnt,test_name in enumerate(test_names,0): img_names = os.listdir(os.path.join('./test',test_name,'image')) img_names.sort() inputdata = np.zeros((finesize,finesize,3*N+1), dtype='uint8') - img_names.sort() for i in range(0,N): img = impro.imread(os.path.join('./test',test_name,'image',img_names[i])) img = impro.resize(img,finesize) @@ -286,4 +284,4 @@ for iter in range(start_iter+1,ITER): result[finesize:finesize*2,finesize*cnt:finesize*(cnt+1),:] = pred cv2.imwrite(os.path.join(dir_checkpoint,str(iter+1)+'_test.png'), result) - netG.train() \ No newline at end of file + netG.train() diff --git a/util/data.py b/util/data.py index 4b0402d..64746f0 100755 --- a/util/data.py +++ b/util/data.py @@ -74,11 +74,11 @@ def random_transform_video(src,target,finesize,N): target = target[:,::-1,:] #random color - alpha = random.uniform(-0.2,0.2) + alpha = random.uniform(-0.3,0.3) beta = random.uniform(-0.2,0.2) - b = random.uniform(-0.1,0.1) - g = random.uniform(-0.1,0.1) - r = random.uniform(-0.1,0.1) + b = random.uniform(-0.05,0.05) + g = random.uniform(-0.05,0.05) + r = random.uniform(-0.05,0.05) for i in range(N): src[:,:,i*3:(i+1)*3] = color_adjust(src[:,:,i*3:(i+1)*3],alpha,beta,b,g,r) target = color_adjust(target,alpha,beta,b,g,r) diff --git a/util/util.py b/util/util.py index 3deaea3..2dc30de 100755 --- a/util/util.py +++ b/util/util.py @@ -79,4 +79,10 @@ def get_bar(percent,num = 25): else: bar += '-' bar += ']' - return bar+' '+str(round(percent,2))+'%' \ No newline at end of file + return bar+' '+str(round(percent,2))+'%' + +def copyfile(scr,dst): + try: + shutil.copyfile(src, dst) + except Exception as e: + print(e) -- GitLab