From 9aca31da459356f6a6580e5a70fdcaf82cc91ae1 Mon Sep 17 00:00:00 2001 From: HypoX64 Date: Tue, 14 Jan 2020 22:02:06 +0800 Subject: [PATCH] make more training data --- .gitignore | 1 + README.md | 2 +- README_CN.md | 2 +- make_datasets/get_image_from_video.py | 18 +++ .../use_addmosaic_model_make_video_dataset.py | 14 +- .../use_irregular_holes_mask_make_dataset.py | 35 ++-- models/loadmodel.py | 15 +- models/video_model.py | 4 +- train/add/train.py | 153 ++++-------------- train/clean/train.py | 65 ++------ util/data.py | 105 ++++++++++-- util/ffmpeg.py | 8 +- util/util.py | 10 ++ 13 files changed, 219 insertions(+), 213 deletions(-) create mode 100644 make_datasets/get_image_from_video.py diff --git a/.gitignore b/.gitignore index 6d0e725..33e3abc 100644 --- a/.gitignore +++ b/.gitignore @@ -141,6 +141,7 @@ test*/ video_tmp/ result/ #./ +/pix2pixHD /tmp /to_make_show /test_media diff --git a/README.md b/README.md index 9e9822b..6ac5fb3 100755 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # DeepMosaics You can use it to automatically remove the mosaics in images and videos, or add mosaics to them.
This porject based on ‘semantic segmentation’ and ‘Image-to-Image Translation’.
- +Master is not stable. Please use a [stable version](https://github.com/HypoX64/DeepMosaics/tree/stable)
* [中文版](./README_CN.md)
### More example diff --git a/README_CN.md b/README_CN.md index aae9ca5..e6661f0 100644 --- a/README_CN.md +++ b/README_CN.md @@ -1,7 +1,7 @@ ![image](./imgs/hand.gif) # DeepMosaics 这是一个通过深度学习自动的为图片/视频添加马赛克,或消除马赛克的项目.
它基于“语义分割”以及“图像翻译”.
- +主分支并不稳定,请移步[稳定版本](https://github.com/HypoX64/DeepMosaics/tree/stable)
### 更多例子 原始 | 自动打码 | 自动去码 :-:|:-:|:-: diff --git a/make_datasets/get_image_from_video.py b/make_datasets/get_image_from_video.py new file mode 100644 index 0000000..b7f96be --- /dev/null +++ b/make_datasets/get_image_from_video.py @@ -0,0 +1,18 @@ +import os +import numpy as np +import cv2 +import random +import csv + +import sys +sys.path.append("..") +from util import util,ffmpeg +from util import image_processing as impro + +files = util.Traversal('/media/hypo/Media/download') +videos = util.is_videos(files) +output_dir = './dataset/v2im' +FPS = 1 +util.makedirs(output_dir) +for video in videos: + ffmpeg.continuous_screenshot(video, output_dir, FPS) \ No newline at end of file diff --git a/make_datasets/use_addmosaic_model_make_video_dataset.py b/make_datasets/use_addmosaic_model_make_video_dataset.py index af5b3b3..c972fed 100644 --- a/make_datasets/use_addmosaic_model_make_video_dataset.py +++ b/make_datasets/use_addmosaic_model_make_video_dataset.py @@ -22,7 +22,7 @@ Area_Type = 'normal' suffix = '' net = loadmodel.unet(opt) -for path in videos: +for i,path in enumerate(videos,0): try: path = os.path.join('./video',path) util.clean_tempfiles() @@ -37,14 +37,14 @@ for path in videos: mask_avg = np.zeros((impro.resize(img_ori_example, 128)).shape[:2]) for imagepath in imagepaths: imagepath = os.path.join('./tmp/video2image',imagepath) - print('Find ROI location:',imagepath) + #print('Find ROI location:',imagepath) img = impro.imread(imagepath) - x,y,size,mask = runmodel.get_mosaic_position(img,net,opt,threshold = 64) + x,y,size,mask = runmodel.get_mosaic_position(img,net,opt,threshold = 80) cv2.imwrite(os.path.join('./tmp/ROI_mask', os.path.basename(imagepath)),mask) positions.append([x,y,size]) mask_avg = mask_avg + mask - print('Optimize ROI locations...') + #print('Optimize ROI locations...') mask_index = filt.position_medfilt(np.array(positions), 13) mask = np.clip(mask_avg/len(imagepaths),0,255).astype('uint8') @@ -62,7 +62,7 @@ for path in videos: os.makedirs(mask_path) os.makedirs(ori_path) os.makedirs(mosaic_path) - print('Add mosaic to images...') + #print('Add mosaic to images...') mosaic_size = mosaic.get_autosize(img_ori_example,mask,area_type = Area_Type)*random.uniform(1,2) models = ['squa_avg','rect_avg','squa_mid'] mosaic_type = random.randint(0,len(models)-1) @@ -81,4 +81,6 @@ for path in videos: cv2.imwrite(os.path.join(mosaic_path,os.path.basename(imagepaths[i])),img_mosaic_crop) cv2.imwrite(os.path.join(mask_path,os.path.basename(imagepaths[i])),mask_crop) except Exception as e: - print(e) \ No newline at end of file + print(e) + + print(util.get_bar(100*i/len(videos),num=50)) \ No newline at end of file diff --git a/make_datasets/use_irregular_holes_mask_make_dataset.py b/make_datasets/use_irregular_holes_mask_make_dataset.py index 0d4bff7..9321aef 100644 --- a/make_datasets/use_irregular_holes_mask_make_dataset.py +++ b/make_datasets/use_irregular_holes_mask_make_dataset.py @@ -11,19 +11,20 @@ from util import util,mosaic import datetime ir_mask_path = './Irregular_Holes_mask' -img_path ='/home/hypo/桌面/FaceRankSample' -output_dir = './datasets_img' -util.makedirs(output_dir) +img_dir ='/home/hypo/MyProject/Haystack/CV/output/all/face' MOD = 'HD' #HD | pix2pix | mosaic MASK = False # if True, output mask,too -BOUNDING = False # if true the mosaic size will be more big +BOUNDING = True # if true the mosaic size will be more big +suffix = '' +output_dir = os.path.join('./dataset_img',MOD) +util.makedirs(output_dir) -if MOD='HD': +if MOD == 'HD': train_A_path = os.path.join(output_dir,'train_A') train_B_path = os.path.join(output_dir,'train_B') util.makedirs(train_A_path) util.makedirs(train_B_path) -elif MOD='pix2pix': +elif MOD == 'pix2pix': train_path = os.path.join(output_dir,'train') util.makedirs(train_path) if MASK: @@ -42,12 +43,12 @@ transform_img = transforms.Compose([ ]) mask_names = os.listdir(ir_mask_path) -img_names = os.listdir(img_path) +img_names = os.listdir(img_dir) print('Find images:',len(img_names)) for i,img_name in enumerate(img_names,1): - try: - img = Image.open(os.path.join(img_path,img_name)) + try: + img = Image.open(os.path.join(img_dir,img_name)) img = transform_img(img) img = np.array(img) img = img[...,::-1] @@ -60,22 +61,20 @@ for i,img_name in enumerate(img_names,1): mask = np.array(mask) mosaic_area = impro.mask_area(mask) mosaic_img = mosaic.addmosaic_random(img, mask,'bounding') - BOUNDING_flag = '_bound' else: mask = Image.open(os.path.join(ir_mask_path,random.choices(mask_names)[0])) mask = transform_mask(mask) mask = np.array(mask) mosaic_img = mosaic.addmosaic_random(img, mask) - BOUNDING_flag = '' - - if HD:#[128:384,128:384,:] --->256 - cv2.imwrite(os.path.join(train_A_path,'%05d' % i+BOUNDING_flag+'.jpg'), mosaic_img) - cv2.imwrite(os.path.join(train_B_path,'%05d' % i+BOUNDING_flag+'.jpg'), img) + + if MOD == 'HD':#[128:384,128:384,:] --->256 + cv2.imwrite(os.path.join(train_A_path,'%05d' % i+suffix+'.jpg'), mosaic_img) + cv2.imwrite(os.path.join(train_B_path,'%05d' % i+suffix+'.jpg'), img) else: merge_img = impro.makedataset(mosaic_img, img) - cv2.imwrite(os.path.join(train_path,'%05d' % i+BOUNDING_flag+'.jpg'), merge_img) + cv2.imwrite(os.path.join(train_path,'%05d' % i+suffix+'.jpg'), merge_img) if MASK: - cv2.imwrite(os.path.join(mask_path,'%05d' % i+BOUNDING_flag+'.png'), mask) - print("Processing:",img_name," ","Remain:",len(img_names)-i) + cv2.imwrite(os.path.join(mask_path,'%05d' % i+suffix+'.png'), mask) + print('\r','Proc/all:'+str(i)+'/'+str(len(img_names)),util.get_bar(100*i/len(img_names),num=40),end='') except Exception as e: print(img_name,e) diff --git a/models/loadmodel.py b/models/loadmodel.py index 2e91a7c..1633181 100755 --- a/models/loadmodel.py +++ b/models/loadmodel.py @@ -2,7 +2,13 @@ import torch from .pix2pix_model import define_G from .pix2pixHD_model import define_G as define_G_HD from .unet_model import UNet -from .video_model import HypoNet +from .video_model import MosaicNet + +def show_paramsnumber(net,netname='net'): + parameters = sum(param.numel() for param in net.parameters()) + parameters = round(parameters/1e6,2) + print(netname+' parameters: '+str(parameters)+'M') + def pix2pix(opt): # print(opt.model_path,opt.netG) @@ -10,7 +16,7 @@ def pix2pix(opt): netG = define_G_HD(3, 3, 64, 'global' ,4) else: netG = define_G(3, 3, 64, opt.netG, norm='batch',use_dropout=True, init_type='normal', gpu_ids=[]) - + show_paramsnumber(netG,'netG') netG.load_state_dict(torch.load(opt.model_path)) netG.eval() if opt.use_gpu: @@ -18,7 +24,8 @@ def pix2pix(opt): return netG def video(opt): - netG = HypoNet(3*25+1, 3) + netG = MosaicNet(3*25+1, 3) + show_paramsnumber(netG,'netG') netG.load_state_dict(torch.load(opt.model_path)) netG.eval() if opt.use_gpu: @@ -28,6 +35,7 @@ def video(opt): def unet_clean(opt): net = UNet(n_channels = 3, n_classes = 1) + show_paramsnumber(net,'segment') net.load_state_dict(torch.load(opt.mosaic_position_model_path)) net.eval() if opt.use_gpu: @@ -36,6 +44,7 @@ def unet_clean(opt): def unet(opt): net = UNet(n_channels = 3, n_classes = 1) + show_paramsnumber(net,'segment') net.load_state_dict(torch.load(opt.model_path)) net.eval() if opt.use_gpu: diff --git a/models/video_model.py b/models/video_model.py index 0cbcab4..6802e9b 100644 --- a/models/video_model.py +++ b/models/video_model.py @@ -151,9 +151,9 @@ class encoder_3d(nn.Module): -class HypoNet(nn.Module): +class MosaicNet(nn.Module): def __init__(self, in_channel, out_channel): - super(HypoNet, self).__init__() + super(MosaicNet, self).__init__() self.encoder_2d = encoder_2d(4,-1,64,n_blocks=9) self.encoder_3d = encoder_3d(in_channel) diff --git a/train/add/train.py b/train/add/train.py index 49e0888..f616afb 100644 --- a/train/add/train.py +++ b/train/add/train.py @@ -11,15 +11,28 @@ import torch.backends.cudnn as cudnn import torch.nn as nn from torch import optim -from unet import UNet +import sys +sys.path.append("..") +sys.path.append("../..") +from util import mosaic,util,ffmpeg,filt,data +from util import image_processing as impro +from models import unet_model +from matplotlib import pyplot as plt +import torch.backends.cudnn as cudnn + +LR = 0.0002 +EPOCHS = 100 +BATCHSIZE = 16 +LOADSIZE = 256 +FINESIZE = 224 +CONTINUE = False +use_gpu = True +SAVE_FRE = 5 +cudnn.benchmark = False -def resize(img,size): - h, w = img.shape[:2] - if w >= h: - res = cv2.resize(img,(int(size*w/h), size)) - else: - res = cv2.resize(img,(size, int(size*h/w))) - return res +dir_img = './datasets/av/origin_image/' +dir_mask = './datasets/av/mask/' +dir_checkpoint = 'checkpoints/' def Totensor(img,use_gpu=True): @@ -29,20 +42,15 @@ def Totensor(img,use_gpu=True): img = img.cuda() return img -def random_color(img,random_num): - for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+random.randint(-random_num,random_num),0,255).astype('uint8') - bright = random.randint(-random_num*2,random_num*2) - for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+bright,0,255).astype('uint8') - return img def Toinputshape(imgs,masks,finesize): batchsize = len(imgs) result_imgs=[];result_masks=[] for i in range(batchsize): # print(imgs[i].shape,masks[i].shape) - img,mask = random_transform(imgs[i], masks[i], finesize) + img,mask = data.random_transform_image(imgs[i], masks[i], finesize) # print(img.shape,mask.shape) - mask = mask[:,:,0].reshape(1,finesize,finesize)/255.0 + mask = mask.reshape(1,finesize,finesize)/255.0 img = img.transpose((2, 0, 1))/255.0 result_imgs.append(img) result_masks.append(mask) @@ -50,65 +58,6 @@ def Toinputshape(imgs,masks,finesize): result_masks = np.array(result_masks) return result_imgs,result_masks - - -def random_transform(img,mask,finesize): - - - # randomsize = int(finesize*(1.2+0.2*random.random())+2) - - h,w = img.shape[:2] - loadsize = min((h,w)) - a = (float(h)/float(w))*random.uniform(0.9, 1.1) - - if h10: + if len(input_imgs)>perload_num: del(input_imgs[0]) del(ground_trues[0]) load_cnt += 1 @@ -162,7 +125,7 @@ import threading t = threading.Thread(target=preload,args=()) #t为新创建的线程 t.daemon = True t.start() -while load_cnt < 10: +while load_cnt < perload_num: time.sleep(0.1) netG.train() @@ -171,7 +134,7 @@ print("Begin training...") for iter in range(start_iter+1,ITER): # input_img,ground_true = loaddata() - ran = random.randint(1, 8) + ran = random.randint(1, perload_num-2) input_img = input_imgs[ran] ground_true = ground_trues[ran] @@ -231,7 +194,8 @@ for iter in range(start_iter+1,ITER): if (iter+1)%100 == 0: try: - showresult(input_img[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:], ground_true, pred,'result_train.png') + data.showresult(input_img[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:], + ground_true, pred,os.path.join(dir_checkpoint,'result_train.png')) except Exception as e: print(e) @@ -266,7 +230,6 @@ for iter in range(start_iter+1,ITER): time_start=time.time() - if (iter+1)%SAVE_FRE == 0: if iter+1 != SAVE_FRE: os.rename(os.path.join(dir_checkpoint,'last_G.pth'),os.path.join(dir_checkpoint,str(iter+1-SAVE_FRE)+'G.pth')) @@ -282,7 +245,6 @@ for iter in range(start_iter+1,ITER): f = open(os.path.join(dir_checkpoint,'iter'),'w+') f.write(str(iter+1)) f.close() - # torch.save(netG.cpu().state_dict(),dir_checkpoint+'iter'+str(iter+1)+'.pth') print('network saved.') #test @@ -292,6 +254,7 @@ for iter in range(start_iter+1,ITER): for cnt,test_name in enumerate(test_names,0): img_names = os.listdir(os.path.join('./test',test_name,'image')) + img_names.sort() input_img = np.zeros((finesize,finesize,3*N+1), dtype='uint8') img_names.sort() for i in range(0,N): @@ -307,7 +270,7 @@ for iter in range(start_iter+1,ITER): input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False) pred = netG(input_img) - pred = (pred.cpu().detach().numpy()*255)[0].transpose((1, 2, 0)) + pred = data.tensor2im(pred,rgb2bgr = False, is0_1 = True) result[finesize:finesize*2,finesize*cnt:finesize*(cnt+1),:] = pred cv2.imwrite(os.path.join(dir_checkpoint,str(iter+1)+'_test.png'), result) diff --git a/util/data.py b/util/data.py index ee79e05..50aeab7 100755 --- a/util/data.py +++ b/util/data.py @@ -1,6 +1,8 @@ +import random import numpy as np import torch import torchvision.transforms as transforms +import cv2 transform = transforms.Compose([ transforms.ToTensor(), @@ -8,7 +10,7 @@ transform = transforms.Compose([ ] ) -def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True): +def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True ,is0_1 = False): image_tensor =image_tensor.data image_numpy = image_tensor[0].cpu().float().numpy() # if gray: @@ -16,7 +18,12 @@ def tensor2im(image_tensor, imtype=np.uint8, gray=False, rgb2bgr = True): # else: if image_numpy.shape[0] == 1: image_numpy = np.tile(image_numpy, (3, 1, 1)) - image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 + + image_numpy = image_numpy.transpose((1, 2, 0)) + + if not is0_1: + image_numpy = (image_numpy + 1)/2.0 + image_numpy = np.clip(image_numpy * 255.0,0,255) if rgb2bgr and not gray: image_numpy = image_numpy[...,::-1]-np.zeros_like(image_numpy) return image_numpy.astype(imtype) @@ -46,12 +53,88 @@ def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape = image_tensor = image_tensor.cuda() return image_tensor -# def im2tensor(image_numpy, use_gpu=False): -# h, w ,ch = image_numpy.shape -# image_numpy = image_numpy/255.0 -# image_numpy = image_numpy.transpose((2, 0, 1)) -# image_numpy = image_numpy.reshape(-1,ch,h,w) -# img_tensor = torch.from_numpy(image_numpy).float() -# if use_gpu: -# img_tensor = img_tensor.cuda() -# return img_tensor \ No newline at end of file + +def random_transform_video(src,target,finesize,N): + + #random crop + h,w = target.shape[:2] + h_move = int((h-finesize)*random.random()) + w_move = int((w-finesize)*random.random()) + # print(h,w,h_move,w_move) + target = target[h_move:h_move+finesize,w_move:w_move+finesize,:] + src = src[h_move:h_move+finesize,w_move:w_move+finesize,:] + + #random flip + if random.random()<0.5: + src = src[:,::-1,:] + target = target[:,::-1,:] + + #random color + random_num = 15 + bright = random.randint(-random_num*2,random_num*2) + for i in range(N*3): src[:,:,i]=np.clip(src[:,:,i].astype('int')+bright,0,255).astype('uint8') + for i in range(3): target[:,:,i]=np.clip(target[:,:,i].astype('int')+bright,0,255).astype('uint8') + + return src,target + + +def random_transform_image(img,mask,finesize): + + # randomsize = int(finesize*(1.2+0.2*random.random())+2) + + h,w = img.shape[:2] + loadsize = min((h,w)) + a = (float(h)/float(w))*random.uniform(0.9, 1.1) + + if h