From 9aca31da459356f6a6580e5a70fdcaf82cc91ae1 Mon Sep 17 00:00:00 2001
From: HypoX64 <hypox128@gmail.com>
Date: Tue, 14 Jan 2020 22:02:06 +0800
Subject: [PATCH] make more training data

---
 .gitignore                                    |   1 +
 README.md                                     |   2 +-
 README_CN.md                                  |   2 +-
 make_datasets/get_image_from_video.py         |  18 +++
 .../use_addmosaic_model_make_video_dataset.py |  14 +-
 .../use_irregular_holes_mask_make_dataset.py  |  35 ++--
 models/loadmodel.py                           |  15 +-
 models/video_model.py                         |   4 +-
 train/add/train.py                            | 153 ++++--------------
 train/clean/train.py                          |  65 ++------
 util/data.py                                  | 105 ++++++++++--
 util/ffmpeg.py                                |   8 +-
 util/util.py                                  |  10 ++
 13 files changed, 219 insertions(+), 213 deletions(-)
 create mode 100644 make_datasets/get_image_from_video.py
diff --git a/.gitignore b/.gitignore
index 6d0e725..33e3abc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -141,6 +141,7 @@ test*/
 video_tmp/
 result/
 #./
+/pix2pixHD
 /tmp
 /to_make_show
 /test_media
diff --git a/README.md b/README.md
index 9e9822b..6ac5fb3 100755
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 # <img src="./imgs/icon.jpg" width="48">DeepMosaics
 You can use it to automatically remove the mosaics in images and videos, or add mosaics to them.<br>
 This porject based on ‘semantic segmentation’ and ‘Image-to-Image Translation’.<br>
-
+Master is not stable. Please use a [stable version](https://github.com/HypoX64/DeepMosaics/tree/stable)<br>
 * [中文版](./README_CN.md)<br>
 
 ### More example
diff --git a/README_CN.md b/README_CN.md
index aae9ca5..e6661f0 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -1,7 +1,7 @@
 ![image](./imgs/hand.gif)
 # <img src="./imgs/icon.jpg" width="48">DeepMosaics
 这是一个通过深度学习自动的为图片/视频添加马赛克,或消除马赛克的项目.<br>它基于“语义分割”以及“图像翻译”.<br>
-
+主分支并不稳定,请移步[稳定版本](https://github.com/HypoX64/DeepMosaics/tree/stable)<br>
 ### 更多例子
 原始 | 自动打码 |  自动去码  
 :-:|:-:|:-:
diff --git a/make_datasets/get_image_from_video.py b/make_datasets/get_image_from_video.py
new file mode 100644
index 0000000..b7f96be
--- /dev/null
+++ b/make_datasets/get_image_from_video.py
@@ -0,0 +1,18 @@
+import os
+import numpy as np
+import cv2
+import random
+import csv
+
+import sys
+sys.path.append("..")
+from util import util,ffmpeg
+from util import image_processing as impro
+
+files = util.Traversal('/media/hypo/Media/download')
+videos = util.is_videos(files)
+output_dir = './dataset/v2im'
+FPS = 1
+util.makedirs(output_dir)
+for video in videos:
+    ffmpeg.continuous_screenshot(video, output_dir, FPS)
\ No newline at end of file
diff --git a/make_datasets/use_addmosaic_model_make_video_dataset.py b/make_datasets/use_addmosaic_model_make_video_dataset.py
index af5b3b3..c972fed 100644
--- a/make_datasets/use_addmosaic_model_make_video_dataset.py
+++ b/make_datasets/use_addmosaic_model_make_video_dataset.py
@@ -22,7 +22,7 @@ Area_Type  = 'normal'
 suffix = ''
 
 net = loadmodel.unet(opt)
-for path in videos:
+for i,path in enumerate(videos,0):
     try:
         path = os.path.join('./video',path)
         util.clean_tempfiles()
@@ -37,14 +37,14 @@ for path in videos:
         mask_avg = np.zeros((impro.resize(img_ori_example, 128)).shape[:2])
         for imagepath in imagepaths:
             imagepath = os.path.join('./tmp/video2image',imagepath)
-            print('Find ROI location:',imagepath)
+            #print('Find ROI location:',imagepath)
             img = impro.imread(imagepath)
-            x,y,size,mask = runmodel.get_mosaic_position(img,net,opt,threshold = 64)
+            x,y,size,mask = runmodel.get_mosaic_position(img,net,opt,threshold = 80)
             cv2.imwrite(os.path.join('./tmp/ROI_mask',
                               os.path.basename(imagepath)),mask)
             positions.append([x,y,size])
             mask_avg = mask_avg + mask
-        print('Optimize ROI locations...')
+        #print('Optimize ROI locations...')
         mask_index = filt.position_medfilt(np.array(positions), 13)
 
         mask = np.clip(mask_avg/len(imagepaths),0,255).astype('uint8')
@@ -62,7 +62,7 @@ for path in videos:
             os.makedirs(mask_path)
             os.makedirs(ori_path)
             os.makedirs(mosaic_path)
-            print('Add mosaic to images...')
+            #print('Add mosaic to images...')
             mosaic_size = mosaic.get_autosize(img_ori_example,mask,area_type = Area_Type)*random.uniform(1,2)
             models = ['squa_avg','rect_avg','squa_mid']
             mosaic_type = random.randint(0,len(models)-1)
@@ -81,4 +81,6 @@ for path in videos:
                 cv2.imwrite(os.path.join(mosaic_path,os.path.basename(imagepaths[i])),img_mosaic_crop)
                 cv2.imwrite(os.path.join(mask_path,os.path.basename(imagepaths[i])),mask_crop)
     except Exception as e:
-        print(e)
\ No newline at end of file
+        print(e)
+
+    print(util.get_bar(100*i/len(videos),num=50))
\ No newline at end of file
diff --git a/make_datasets/use_irregular_holes_mask_make_dataset.py b/make_datasets/use_irregular_holes_mask_make_dataset.py
index 0d4bff7..9321aef 100644
--- a/make_datasets/use_irregular_holes_mask_make_dataset.py
+++ b/make_datasets/use_irregular_holes_mask_make_dataset.py
@@ -11,19 +11,20 @@ from util import util,mosaic
 import datetime
 
 ir_mask_path = './Irregular_Holes_mask'
-img_path ='/home/hypo/桌面/FaceRankSample' 
-output_dir = './datasets_img'
-util.makedirs(output_dir)
+img_dir ='/home/hypo/MyProject/Haystack/CV/output/all/face' 
 MOD = 'HD' #HD | pix2pix | mosaic
 MASK = False # if True, output mask,too
-BOUNDING = False # if true the mosaic size will be more big
+BOUNDING = True # if true the mosaic size will be more big
+suffix = ''
+output_dir = os.path.join('./dataset_img',MOD)
+util.makedirs(output_dir)
 
-if MOD='HD':
+if MOD == 'HD':
     train_A_path = os.path.join(output_dir,'train_A')
     train_B_path = os.path.join(output_dir,'train_B')
     util.makedirs(train_A_path)
     util.makedirs(train_B_path)
-elif MOD='pix2pix':
+elif MOD == 'pix2pix':
     train_path = os.path.join(output_dir,'train')
     util.makedirs(train_path)
 if MASK:
@@ -42,12 +43,12 @@ transform_img = transforms.Compose([
  ])
 
 mask_names = os.listdir(ir_mask_path)
-img_names = os.listdir(img_path)
+img_names = os.listdir(img_dir)
 print('Find images:',len(img_names))
 
 for i,img_name in enumerate(img_names,1):
-    try:
-        img = Image.open(os.path.join(img_path,img_name))
+    try:        
+        img = Image.open(os.path.join(img_dir,img_name))
         img = transform_img(img)
         img = np.array(img)
         img = img[...,::-1]
@@ -60,22 +61,20 @@ for i,img_name in enumerate(img_names,1):
                 mask = np.array(mask)
                 mosaic_area = impro.mask_area(mask)
             mosaic_img = mosaic.addmosaic_random(img, mask,'bounding') 
-            BOUNDING_flag = '_bound'
         else:
             mask = Image.open(os.path.join(ir_mask_path,random.choices(mask_names)[0]))
             mask = transform_mask(mask)
             mask = np.array(mask)
             mosaic_img = mosaic.addmosaic_random(img, mask)
-            BOUNDING_flag = ''    
-        
-        if HD:#[128:384,128:384,:] --->256
-            cv2.imwrite(os.path.join(train_A_path,'%05d' % i+BOUNDING_flag+'.jpg'), mosaic_img)
-            cv2.imwrite(os.path.join(train_B_path,'%05d' % i+BOUNDING_flag+'.jpg'), img)
+                 
+        if MOD == 'HD':#[128:384,128:384,:] --->256
+            cv2.imwrite(os.path.join(train_A_path,'%05d' % i+suffix+'.jpg'), mosaic_img)
+            cv2.imwrite(os.path.join(train_B_path,'%05d' % i+suffix+'.jpg'), img)
         else:
             merge_img = impro.makedataset(mosaic_img, img)
-            cv2.imwrite(os.path.join(train_path,'%05d' % i+BOUNDING_flag+'.jpg'), merge_img)
+            cv2.imwrite(os.path.join(train_path,'%05d' % i+suffix+'.jpg'), merge_img)
         if MASK:
-            cv2.imwrite(os.path.join(mask_path,'%05d' % i+BOUNDING_flag+'.png'), mask)
-        print("Processing:",img_name," ","Remain:",len(img_names)-i)
+            cv2.imwrite(os.path.join(mask_path,'%05d' % i+suffix+'.png'), mask)
+        print('\r','Proc/all:'+str(i)+'/'+str(len(img_names)),util.get_bar(100*i/len(img_names),num=40),end='')
     except Exception as e:
         print(img_name,e)
diff --git a/models/loadmodel.py b/models/loadmodel.py
index 2e91a7c..1633181 100755
--- a/models/loadmodel.py
+++ b/models/loadmodel.py
@@ -2,7 +2,13 @@ import torch
 from .pix2pix_model import define_G
 from .pix2pixHD_model import define_G as define_G_HD
 from .unet_model import UNet
-from .video_model import HypoNet
+from .video_model import MosaicNet
+
+def show_paramsnumber(net,netname='net'):
+    parameters = sum(param.numel() for param in net.parameters())
+    parameters = round(parameters/1e6,2)
+    print(netname+' parameters: '+str(parameters)+'M')
+
 
 def pix2pix(opt):
     # print(opt.model_path,opt.netG)
@@ -10,7 +16,7 @@ def pix2pix(opt):
         netG = define_G_HD(3, 3, 64, 'global' ,4)
     else:
         netG = define_G(3, 3, 64, opt.netG, norm='batch',use_dropout=True, init_type='normal', gpu_ids=[])
-
+    show_paramsnumber(netG,'netG')
     netG.load_state_dict(torch.load(opt.model_path))
     netG.eval()
     if opt.use_gpu:
@@ -18,7 +24,8 @@ def pix2pix(opt):
     return netG
 
 def video(opt):
-    netG = HypoNet(3*25+1, 3)
+    netG = MosaicNet(3*25+1, 3)
+    show_paramsnumber(netG,'netG')
     netG.load_state_dict(torch.load(opt.model_path))
     netG.eval()
     if opt.use_gpu:
@@ -28,6 +35,7 @@ def video(opt):
 
 def unet_clean(opt):
     net = UNet(n_channels = 3, n_classes = 1)
+    show_paramsnumber(net,'segment')
     net.load_state_dict(torch.load(opt.mosaic_position_model_path))
     net.eval()
     if opt.use_gpu:
@@ -36,6 +44,7 @@ def unet_clean(opt):
 
 def unet(opt):
     net = UNet(n_channels = 3, n_classes = 1)
+    show_paramsnumber(net,'segment')
     net.load_state_dict(torch.load(opt.model_path))
     net.eval()
     if opt.use_gpu:
diff --git a/models/video_model.py b/models/video_model.py
index 0cbcab4..6802e9b 100644
--- a/models/video_model.py
+++ b/models/video_model.py
@@ -151,9 +151,9 @@ class encoder_3d(nn.Module):
 
 
 
-class HypoNet(nn.Module):
+class MosaicNet(nn.Module):
     def __init__(self, in_channel, out_channel):
-        super(HypoNet, self).__init__()
+        super(MosaicNet, self).__init__()
 
         self.encoder_2d = encoder_2d(4,-1,64,n_blocks=9)
         self.encoder_3d = encoder_3d(in_channel)
diff --git a/train/add/train.py b/train/add/train.py
index 49e0888..f616afb 100644
--- a/train/add/train.py
+++ b/train/add/train.py
@@ -11,15 +11,28 @@ import torch.backends.cudnn as cudnn
 import torch.nn as nn
 from torch import optim
 
-from unet import UNet
+import sys
+sys.path.append("..")
+sys.path.append("../..")
+from util import mosaic,util,ffmpeg,filt,data
+from util import image_processing as impro
+from models import unet_model
+from matplotlib import pyplot as plt
+import torch.backends.cudnn as cudnn
+
+LR = 0.0002
+EPOCHS = 100
+BATCHSIZE = 16
+LOADSIZE = 256
+FINESIZE = 224
+CONTINUE = False
+use_gpu = True
+SAVE_FRE = 5
+cudnn.benchmark = False
 
-def resize(img,size):
-    h, w = img.shape[:2]
-    if w >= h:
-        res = cv2.resize(img,(int(size*w/h), size))
-    else:
-        res = cv2.resize(img,(size, int(size*h/w)))
-    return res
+dir_img = './datasets/av/origin_image/'
+dir_mask = './datasets/av/mask/'
+dir_checkpoint = 'checkpoints/'
 
 
 def Totensor(img,use_gpu=True):
@@ -29,20 +42,15 @@ def Totensor(img,use_gpu=True):
         img = img.cuda()
     return img
 
-def random_color(img,random_num):
-    for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+random.randint(-random_num,random_num),0,255).astype('uint8')
-    bright = random.randint(-random_num*2,random_num*2)
-    for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+bright,0,255).astype('uint8')
-    return img
 
 def Toinputshape(imgs,masks,finesize):
     batchsize = len(imgs)
     result_imgs=[];result_masks=[]
     for i in range(batchsize):
         # print(imgs[i].shape,masks[i].shape)
-        img,mask = random_transform(imgs[i], masks[i], finesize)
+        img,mask = data.random_transform_image(imgs[i], masks[i], finesize)
         # print(img.shape,mask.shape)
-        mask = mask[:,:,0].reshape(1,finesize,finesize)/255.0
+        mask = mask.reshape(1,finesize,finesize)/255.0
         img = img.transpose((2, 0, 1))/255.0
         result_imgs.append(img)
         result_masks.append(mask)
@@ -50,65 +58,6 @@ def Toinputshape(imgs,masks,finesize):
     result_masks  = np.array(result_masks)
     return result_imgs,result_masks
 
-
-
-def random_transform(img,mask,finesize):
-
-    
-    # randomsize = int(finesize*(1.2+0.2*random.random())+2)
-
-    h,w = img.shape[:2]
-    loadsize = min((h,w))
-    a = (float(h)/float(w))*random.uniform(0.9, 1.1)
-
-    if h<w:
-        mask = cv2.resize(mask, (int(loadsize/a),loadsize))
-        img = cv2.resize(img, (int(loadsize/a),loadsize))
-    else:
-        mask = cv2.resize(mask, (loadsize,int(loadsize*a)))
-        img = cv2.resize(img, (loadsize,int(loadsize*a)))
-
-    # mask = randomsize(mask,loadsize)
-    # img = randomsize(img,loadsize)
-
-
-    #random crop
-    h,w = img.shape[:2]
-
-    h_move = int((h-finesize)*random.random())
-    w_move = int((w-finesize)*random.random())
-    # print(h,w,h_move,w_move)
-    img_crop = img[h_move:h_move+finesize,w_move:w_move+finesize]
-    mask_crop = mask[h_move:h_move+finesize,w_move:w_move+finesize]
-    
-    #random rotation
-    if random.random()<0.2:
-        h,w = img_crop.shape[:2]
-        M = cv2.getRotationMatrix2D((w/2,h/2),90*int(4*random.random()),1)
-        img = cv2.warpAffine(img_crop,M,(w,h))
-        mask = cv2.warpAffine(mask_crop,M,(w,h))
-    else:
-        img,mask = img_crop,mask_crop
-
-    #random color
-    img=random_color(img, 15)
-    
-    #random flip
-    if random.random()<0.5:
-        if random.random()<0.5:
-            img = cv2.flip(img,0)
-            mask = cv2.flip(mask,0)
-        else:
-            img = cv2.flip(img,1)
-            mask = cv2.flip(mask,1)
-    return img,mask
-
-def randomresize(img):
-    size = np.min(img.shape[:2])
-    img = resize(img, int(size*random.uniform(1,1.2)))
-    img = resize(img, size)
-    return img
-
 def batch_generator(images,masks,batchsize):
     dataset_images = []
     dataset_masks = []
@@ -125,16 +74,17 @@ def batch_generator(images,masks,batchsize):
 def loadimage(dir_img,dir_mask,loadsize,eval_p):
     t1 = datetime.datetime.now()
     imgnames = os.listdir(dir_img)
+    # imgnames = imgnames[:100]
     print('images num:',len(imgnames))
     random.shuffle(imgnames)
     imgnames = (f[:-4] for f in imgnames)
     images = []
     masks = []
     for imgname in imgnames:
-        img = cv2.imread(dir_img+imgname+'.jpg')
-        mask = cv2.imread(dir_mask+imgname+'.png')
-        img = resize(img,loadsize)
-        mask = resize(mask,loadsize)
+        img = impro.imread(dir_img+imgname+'.jpg')
+        mask = impro.imread(dir_mask+imgname+'.png',mod = 'gray')
+        img = impro.resize(img,loadsize)
+        mask = impro.resize(mask,loadsize)
         images.append(img)
         masks.append(mask)
     train_images,train_masks = images[0:int(len(masks)*(1-eval_p))],masks[0:int(len(masks)*(1-eval_p))]
@@ -143,39 +93,7 @@ def loadimage(dir_img,dir_mask,loadsize,eval_p):
     print('load data cost time:',(t2 - t1).seconds,'s')
     return train_images,train_masks,eval_images,eval_masks
 
-def showresult(img,mask,mask_pred):
-    img = (img.cpu().detach().numpy()*255)
-    mask = (mask.cpu().detach().numpy()*255)
-    mask_pred = (mask_pred.cpu().detach().numpy()*255)
-    batchsize = img.shape[0]
-    size = img.shape[3]
-    ran =int(batchsize*random.random())
-    showimg=np.zeros((size,size*3,3))
-    showimg[0:size,0:size] =img[ran].transpose((1, 2, 0))
-    showimg[0:size,size:size*2,1] = mask[ran].reshape(size,size)
-    showimg[0:size,size*2:size*3,1] = mask_pred[ran].reshape(size,size)
-
-    # cv2.imshow("", showimg.astype('uint8'))
-    # key = cv2.waitKey(1)
-    # if key == ord('q'):
-    #     exit()
-    cv2.imwrite('./result.jpg', showimg)
-
-
-
-LR = 0.001
-EPOCHS = 100
-BATCHSIZE = 12
-LOADSIZE = 144
-FINESIZE = 128
-CONTINUE = True
-use_gpu = True
-SAVE_FRE = 5
-cudnn.benchmark = False
 
-dir_img = './origin_image/'
-dir_mask = './mask/'
-dir_checkpoint = 'checkpoints/'
 
 print('loading data......')
 train_images,train_masks,eval_images,eval_masks = loadimage(dir_img,dir_mask,LOADSIZE,0.2)
@@ -183,7 +101,7 @@ dataset_eval_images,dataset_eval_masks = batch_generator(eval_images,eval_masks,
 dataset_train_images,dataset_train_masks = batch_generator(train_images,train_masks,BATCHSIZE)
 
 
-net = UNet(n_channels = 3, n_classes = 1)
+net = unet_model.UNet(n_channels = 3, n_classes = 1)
 
 
 if CONTINUE:
@@ -192,7 +110,7 @@ if use_gpu:
     net.cuda()
 
 
-optimizer = torch.optim.Adam(net.parameters(), lr=LR, betas=(0.9, 0.99))
+optimizer = torch.optim.Adam(net.parameters(), lr=LR, betas=(0.9, 0.999))
 
 criterion = nn.BCELoss()
 # criterion = nn.L1Loss()
@@ -220,8 +138,8 @@ for epoch in range(EPOCHS):
         loss.backward()
         optimizer.step()
 
-        if i%10 == 0:
-            showresult(img,mask,mask_pred)
+        if i%100 == 0:
+            data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'result.png'))
 
     # torch.cuda.empty_cache()
     # # net.eval()
@@ -243,11 +161,8 @@ for epoch in range(EPOCHS):
         epoch_loss_eval/len(dataset_eval_images),
         (endtime - starttime).seconds)),
     torch.save(net.cpu().state_dict(),dir_checkpoint+'last.pth')
-    # print('--- Epoch loss: {0:.6f}'.format(epoch_loss/i))
-    # print('Cost time: ',(endtime - starttime).seconds,'s')
+
     if (epoch+1)%SAVE_FRE == 0:
         torch.save(net.cpu().state_dict(),dir_checkpoint+'epoch'+str(epoch+1)+'.pth')
+        data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'epoch_'+str(epoch+1)+'.png'))
         print('network saved.')
-# torch.save(net.cpu().state_dict(),dir_checkpoint+'last.pth')        
-# print('network saved.')
-
diff --git a/train/clean/train.py b/train/clean/train.py
index b754aba..67bf51a 100644
--- a/train/clean/train.py
+++ b/train/clean/train.py
@@ -9,11 +9,10 @@ import time
 import sys
 sys.path.append("..")
 sys.path.append("../..")
-from models import runmodel,loadmodel
 from util import mosaic,util,ffmpeg,filt,data
 from util import image_processing as impro
 from cores import Options
-from models import pix2pix_model,video_model,unet_model
+from models import pix2pix_model,video_model,unet_model,loadmodel
 from matplotlib import pyplot as plt
 import torch.backends.cudnn as cudnn
 
@@ -32,8 +31,8 @@ SAVE_FRE = 10000
 start_iter = 0
 finesize = 128
 loadsize = int(finesize*1.1)
-
-savename = 'MosaicNet_test'
+perload_num = 32
+savename = 'MosaicNet_noL2'
 dir_checkpoint = 'checkpoints/'+savename
 util.makedirs(dir_checkpoint)
 
@@ -51,7 +50,8 @@ for video in videos:
 #unet_128
 #resnet_9blocks
 #netG = pix2pix_model.define_G(3*N+1, 3, 128, 'resnet_6blocks', norm='instance',use_dropout=True, init_type='normal', gpu_ids=[])
-netG = video_model.HypoNet(3*N+1, 3)
+netG = video_model.MosaicNet(3*N+1, 3)
+loadmodel.show_paramsnumber(netG,'netG')
 # netG = unet_model.UNet(3*N+1, 3)
 if use_gan:
     netD = pix2pix_model.define_D(3*2+1, 64, 'basic', n_layers_D=3, norm='instance', init_type='normal', init_gain=0.02, gpu_ids=[])
@@ -77,43 +77,6 @@ if use_gan:
     optimizer_D = torch.optim.Adam(netG.parameters(), lr=LR,betas=(beta1, 0.999))
     criterionGAN = pix2pix_model.GANLoss(gan_mode='lsgan').cuda()
 
-def random_transform(src,target,finesize):
-
-    #random crop
-    h,w = target.shape[:2]
-    h_move = int((h-finesize)*random.random())
-    w_move = int((w-finesize)*random.random())
-    # print(h,w,h_move,w_move)
-    target = target[h_move:h_move+finesize,w_move:w_move+finesize,:]
-    src = src[h_move:h_move+finesize,w_move:w_move+finesize,:]
-
-    #random flip
-    if random.random()<0.5:
-        src = src[:,::-1,:]
-        target = target[:,::-1,:]
-
-    #random color
-    random_num = 15
-    bright = random.randint(-random_num*2,random_num*2)
-    for i in range(N*3): src[:,:,i]=np.clip(src[:,:,i].astype('int')+bright,0,255).astype('uint8')
-    for i in range(3): target[:,:,i]=np.clip(target[:,:,i].astype('int')+bright,0,255).astype('uint8')
-
-    return src,target
-
-
-def showresult(img1,img2,img3,name):
-    img1 = (img1.cpu().detach().numpy()*255)
-    img2 = (img2.cpu().detach().numpy()*255)
-    img3 = (img3.cpu().detach().numpy()*255)
-    batchsize = img1.shape[0]
-    size = img1.shape[3]
-    ran =int(batchsize*random.random())
-    showimg=np.zeros((size,size*3,3))
-    showimg[0:size,0:size] =img1[ran].transpose((1, 2, 0))
-    showimg[0:size,size:size*2] = img2[ran].transpose((1, 2, 0))
-    showimg[0:size,size*2:size*3] = img3[ran].transpose((1, 2, 0))
-    cv2.imwrite(os.path.join(dir_checkpoint,name), showimg)
-
 
 def loaddata():
     video_index = random.randint(0,len(videos)-1)
@@ -121,7 +84,7 @@ def loaddata():
     img_index = random.randint(N,lengths[video_index]- N)
     input_img = np.zeros((loadsize,loadsize,3*N+1), dtype='uint8')
     for i in range(0,N):
-        # print('./dataset/'+video+'/mosaic/output_'+'%05d'%(img_index+i-int(N/2))+'.png')
+    
         img = cv2.imread('./dataset/'+video+'/mosaic/output_'+'%05d'%(img_index+i-int(N/2))+'.png')
         img = impro.resize(img,loadsize)
         input_img[:,:,i*3:(i+1)*3] = img
@@ -133,7 +96,7 @@ def loaddata():
     ground_true = cv2.imread('./dataset/'+video+'/ori/output_'+'%05d'%(img_index)+'.png')
     ground_true = impro.resize(ground_true,loadsize)
 
-    input_img,ground_true = random_transform(input_img,ground_true,finesize)
+    input_img,ground_true = data.random_transform_video(input_img,ground_true,finesize,N)
     input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False)
     ground_true = data.im2tensor(ground_true,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False)
     
@@ -150,7 +113,7 @@ def preload():
             input_img,ground_true = loaddata()
             input_imgs.append(input_img)
             ground_trues.append(ground_true)
-            if len(input_imgs)>10:
+            if len(input_imgs)>perload_num:
                 del(input_imgs[0])
                 del(ground_trues[0])
             load_cnt += 1
@@ -162,7 +125,7 @@ import threading
 t = threading.Thread(target=preload,args=())  #t为新创建的线程
 t.daemon = True
 t.start()
-while load_cnt < 10:
+while load_cnt < perload_num:
     time.sleep(0.1)
 
 netG.train()
@@ -171,7 +134,7 @@ print("Begin training...")
 for iter in range(start_iter+1,ITER):
 
     # input_img,ground_true = loaddata()
-    ran = random.randint(1, 8)
+    ran = random.randint(1, perload_num-2)
     input_img = input_imgs[ran]
     ground_true = ground_trues[ran]
 
@@ -231,7 +194,8 @@ for iter in range(start_iter+1,ITER):
 
     if (iter+1)%100 == 0:
         try:
-            showresult(input_img[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:], ground_true, pred,'result_train.png')
+            data.showresult(input_img[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:],
+             ground_true, pred,os.path.join(dir_checkpoint,'result_train.png'))
         except Exception as e:
             print(e)
      
@@ -266,7 +230,6 @@ for iter in range(start_iter+1,ITER):
         time_start=time.time()
 
 
-
     if (iter+1)%SAVE_FRE == 0:
         if iter+1 != SAVE_FRE:
             os.rename(os.path.join(dir_checkpoint,'last_G.pth'),os.path.join(dir_checkpoint,str(iter+1-SAVE_FRE)+'G.pth'))
@@ -282,7 +245,6 @@ for iter in range(start_iter+1,ITER):
         f = open(os.path.join(dir_checkpoint,'iter'),'w+')
         f.write(str(iter+1))
         f.close()
-        # torch.save(netG.cpu().state_dict(),dir_checkpoint+'iter'+str(iter+1)+'.pth')
         print('network saved.')
 
         #test
@@ -292,6 +254,7 @@ for iter in range(start_iter+1,ITER):
 
         for cnt,test_name in enumerate(test_names,0):
             img_names = os.listdir(os.path.join('./test',test_name,'image'))
+            img_names.sort()
             input_img = np.zeros((finesize,finesize,3*N+1), dtype='uint8')
             img_names.sort()
             for i in range(0,N):
@@ -307,7 +270,7 @@ for iter in range(start_iter+1,ITER):
             input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False)
             pred = netG(input_img)
  
-            pred = (pred.cpu().detach().numpy()*255)[0].transpose((1, 2, 0))
+            pred = data.tensor2im(pred,rgb2bgr = False, is0_1 = True)
             result[finesize:finesize*2,finesize*cnt:finesize*(cnt+1),:] = pred
 
         cv2.imwrite(os.path.join(dir_checkpoint,str(iter+1)+'_test.png'), result)
diff --git a/util/data.py b/util/data.py
index ee79e05..50aeab7 100755
--- a/util/data.py
+++ b/util/data.py
@@ -1,6 +1,8 @@
+import random
 import numpy as np
 import torch
 import torchvision.transforms as transforms
+import cv2
 
 transform = transforms.Compose([  
     transforms.ToTensor(),  
@@ -8,7 +10,7 @@ transform = transforms.Compose([
     ]  
 )  
 
-def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True):
+def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True ,is0_1 = False):
     image_tensor =image_tensor.data
     image_numpy = image_tensor[0].cpu().float().numpy()
     # if gray:
@@ -16,7 +18,12 @@ def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True):
     # else:
     if image_numpy.shape[0] == 1:
         image_numpy = np.tile(image_numpy, (3, 1, 1))
-    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
+
+    image_numpy = image_numpy.transpose((1, 2, 0))
+
+    if not is0_1:
+        image_numpy = (image_numpy + 1)/2.0
+    image_numpy = np.clip(image_numpy * 255.0,0,255)  
     if rgb2bgr and not gray:
         image_numpy = image_numpy[...,::-1]-np.zeros_like(image_numpy)
     return image_numpy.astype(imtype)
@@ -46,12 +53,88 @@ def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape =
         image_tensor = image_tensor.cuda()
     return image_tensor
 
-# def im2tensor(image_numpy, use_gpu=False):
-#     h, w ,ch = image_numpy.shape
-#     image_numpy = image_numpy/255.0
-#     image_numpy = image_numpy.transpose((2, 0, 1))
-#     image_numpy = image_numpy.reshape(-1,ch,h,w)
-#     img_tensor = torch.from_numpy(image_numpy).float()
-#     if use_gpu:
-#         img_tensor = img_tensor.cuda()
-#     return img_tensor
\ No newline at end of file
+
+def random_transform_video(src,target,finesize,N):
+
+    #random crop
+    h,w = target.shape[:2]
+    h_move = int((h-finesize)*random.random())
+    w_move = int((w-finesize)*random.random())
+    # print(h,w,h_move,w_move)
+    target = target[h_move:h_move+finesize,w_move:w_move+finesize,:]
+    src = src[h_move:h_move+finesize,w_move:w_move+finesize,:]
+
+    #random flip
+    if random.random()<0.5:
+        src = src[:,::-1,:]
+        target = target[:,::-1,:]
+
+    #random color
+    random_num = 15
+    bright = random.randint(-random_num*2,random_num*2)
+    for i in range(N*3): src[:,:,i]=np.clip(src[:,:,i].astype('int')+bright,0,255).astype('uint8')
+    for i in range(3): target[:,:,i]=np.clip(target[:,:,i].astype('int')+bright,0,255).astype('uint8')
+
+    return src,target
+
+
+def random_transform_image(img,mask,finesize):
+
+    # randomsize = int(finesize*(1.2+0.2*random.random())+2)
+
+    h,w = img.shape[:2]
+    loadsize = min((h,w))
+    a = (float(h)/float(w))*random.uniform(0.9, 1.1)
+
+    if h<w:
+        mask = cv2.resize(mask, (int(loadsize/a),loadsize))
+        img = cv2.resize(img, (int(loadsize/a),loadsize))
+    else:
+        mask = cv2.resize(mask, (loadsize,int(loadsize*a)))
+        img = cv2.resize(img, (loadsize,int(loadsize*a)))
+
+    # mask = randomsize(mask,loadsize)
+    # img = randomsize(img,loadsize)
+
+
+    #random crop
+    h,w = img.shape[:2]
+
+    h_move = int((h-finesize)*random.random())
+    w_move = int((w-finesize)*random.random())
+    # print(h,w,h_move,w_move)
+    img_crop = img[h_move:h_move+finesize,w_move:w_move+finesize]
+    mask_crop = mask[h_move:h_move+finesize,w_move:w_move+finesize]
+    
+    #random rotation
+    if random.random()<0.2:
+        h,w = img_crop.shape[:2]
+        M = cv2.getRotationMatrix2D((w/2,h/2),90*int(4*random.random()),1)
+        img = cv2.warpAffine(img_crop,M,(w,h))
+        mask = cv2.warpAffine(mask_crop,M,(w,h))
+    else:
+        img,mask = img_crop,mask_crop
+
+    #random color
+    random_num = 15
+    for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+random.randint(-random_num,random_num),0,255).astype('uint8')
+    bright = random.randint(-random_num*2,random_num*2)
+    for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+bright,0,255).astype('uint8')
+
+    #random flip
+    if random.random()<0.5:
+        if random.random()<0.5:
+            img = img[:,::-1,:]
+            mask = mask[:,::-1]
+        else:
+            img = img[::-1,:,:]
+            mask = mask[::-1,:]
+    return img,mask
+
+def showresult(img1,img2,img3,name):
+    size = img1.shape[3]
+    showimg=np.zeros((size,size*3,3))
+    showimg[0:size,0:size] = tensor2im(img1,rgb2bgr = False, is0_1 = True)
+    showimg[0:size,size:size*2] = tensor2im(img2,rgb2bgr = False, is0_1 = True)
+    showimg[0:size,size*2:size*3] = tensor2im(img3,rgb2bgr = False, is0_1 = True)
+    cv2.imwrite(name, showimg)
diff --git a/util/ffmpeg.py b/util/ffmpeg.py
index 403a1c5..76d254e 100755
--- a/util/ffmpeg.py
+++ b/util/ffmpeg.py
@@ -39,4 +39,10 @@ def cut_video(in_path,start_time,last_time,out_path,vcodec='h265'):
         os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+in_path+'" -vcodec libx265 -b 12M '+out_path)
 
 def continuous_screenshot(videopath,savedir,fps):
-    os.system('ffmpeg -i '+videopath+' -vf fps='+str(fps)+' '+savedir+'/'+'%05d.jpg')
+    '''
+    videopath: input video path
+    savedir:   images will save here
+    fps:       save how many images per second
+    '''
+    videoname = os.path.splitext(os.path.basename(videopath))[0]
+    os.system('ffmpeg -i '+videopath+' -vf fps='+str(fps)+' '+savedir+'/'+videoname+'%05d.jpg')
diff --git a/util/util.py b/util/util.py
index 06bd09c..3deaea3 100755
--- a/util/util.py
+++ b/util/util.py
@@ -70,3 +70,13 @@ def file_init(opt):
         os.makedirs(opt.result_dir)
         print('makedir:',opt.result_dir)
     clean_tempfiles()
+
+def get_bar(percent,num = 25):
+    bar = '['
+    for i in range(num):
+        if i < round(percent/(100/num)):
+            bar += '#'
+        else:
+            bar += '-'
+    bar += ']'
+    return bar+' '+str(round(percent,2))+'%'
\ No newline at end of file
-- 
GitLab