提交 57467a8f 编写于 作者: H hypox64

New version V0.3.0 #5 #8

上级 700c1859
...@@ -154,6 +154,7 @@ result/ ...@@ -154,6 +154,7 @@ result/
/pretrained_models_old /pretrained_models_old
/deepmosaic_window /deepmosaic_window
/sftp-config.json /sftp-config.json
/exe
#./make_datasets #./make_datasets
/make_datasets/video /make_datasets/video
/make_datasets/tmp /make_datasets/tmp
......
...@@ -6,25 +6,19 @@ This porject based on "semantic segmentation" and "Image-to-Image Translation".< ...@@ -6,25 +6,19 @@ This porject based on "semantic segmentation" and "Image-to-Image Translation".<
* [中文版README](./README_CN.md)<br> * [中文版README](./README_CN.md)<br>
### More example ### More example
origin | auto add mosaic | auto clean mosaic origin | auto add mosaic | auto clean mosaic
:-:|:-:|:-: :-:|:-:|:-:
![image](./imgs/example/lena.jpg) | ![image](./imgs/example/lena_add.jpg) | ![image](./imgs/example/lena_clean.jpg) ![image](./imgs/example/lena.jpg) | ![image](./imgs/example/lena_add.jpg) | ![image](./imgs/example/lena_clean.jpg)
![image](./imgs/example/youknow.png) | ![image](./imgs/example/youknow_add.png) | ![image](./imgs/example/youknow_clean.png) ![image](./imgs/example/youknow.png) | ![image](./imgs/example/youknow_add.png) | ![image](./imgs/example/youknow_clean.png)
* Compared with [DeepCreamPy](https://github.com/deeppomf/DeepCreamPy) * Compared with [DeepCreamPy](https://github.com/deeppomf/DeepCreamPy)
mosaic image | DeepCreamPy | ours mosaic image | DeepCreamPy | ours
:-:|:-:|:-: :-:|:-:|:-:
![image](./imgs/example/face_a_mosaic.jpg) | ![image](./imgs/example/a_dcp.png) | ![image](./imgs/example/face_a_clean.jpg) ![image](./imgs/example/face_a_mosaic.jpg) | ![image](./imgs/example/a_dcp.png) | ![image](./imgs/example/face_a_clean.jpg)
![image](./imgs/example/face_b_mosaic.jpg) | ![image](./imgs/example/b_dcp.png) | ![image](./imgs/example/face_b_clean.jpg) ![image](./imgs/example/face_b_mosaic.jpg) | ![image](./imgs/example/b_dcp.png) | ![image](./imgs/example/face_b_clean.jpg)
* Style Transfer * Style Transfer
origin | to Van Gogh | to winter origin | to Van Gogh | to winter
:-:|:-:|:-: :-:|:-:|:-:
![image](./imgs/example/SZU.jpg) | ![image](./imgs/example/SZU_vangogh.jpg) | ![image](./imgs/example/SZU_summer2winter.jpg) ![image](./imgs/example/SZU.jpg) | ![image](./imgs/example/SZU_vangogh.jpg) | ![image](./imgs/example/SZU_summer2winter.jpg)
An interesting example:[Ricardo Milos to cat](https://www.bilibili.com/video/BV1Q7411W7n6) An interesting example:[Ricardo Milos to cat](https://www.bilibili.com/video/BV1Q7411W7n6)
## Run DeepMosaics ## Run DeepMosaics
...@@ -33,6 +27,7 @@ You can either run DeepMosaics via pre-built binary package or from source.<br> ...@@ -33,6 +27,7 @@ You can either run DeepMosaics via pre-built binary package or from source.<br>
### Pre-built binary package ### Pre-built binary package
For windows, we bulid a GUI version for easy test.<br> For windows, we bulid a GUI version for easy test.<br>
Download this version and pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br> Download this version and pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br>
* [[How to use]](./docs/exe_help.md)<br> * [[How to use]](./docs/exe_help.md)<br>
![image](./imgs/GUI.png)<br> ![image](./imgs/GUI.png)<br>
...@@ -64,11 +59,11 @@ You can download pre_trained models and put them into './pretrained_models'.<br> ...@@ -64,11 +59,11 @@ You can download pre_trained models and put them into './pretrained_models'.<br>
[[Introduction to pre-trained models]](./docs/pre-trained_models_introduction.md)<br> [[Introduction to pre-trained models]](./docs/pre-trained_models_introduction.md)<br>
#### Simple example #### Simple example
* Add Mosaic (output video will save in './result')<br> * Add Mosaic (output media will save in './result')<br>
```bash ```bash
python3 deepmosaic.py --media_path ./imgs/ruoruo.jpg --model_path ./pretrained_models/mosaic/add_face.pth --use_gpu -1 python3 deepmosaic.py --media_path ./imgs/ruoruo.jpg --model_path ./pretrained_models/mosaic/add_face.pth --use_gpu -1
``` ```
* Clean Mosaic (output video will save in './result')<br> * Clean Mosaic (output media will save in './result')<br>
```bash ```bash
python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretrained_models/mosaic/clean_face_HD.pth --use_gpu -1 python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretrained_models/mosaic/clean_face_HD.pth --use_gpu -1
``` ```
...@@ -76,5 +71,9 @@ python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretra ...@@ -76,5 +71,9 @@ python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretra
If you want to test other image or video, please refer to this file.<br> If you want to test other image or video, please refer to this file.<br>
[[options_introduction.md]](./docs/options_introduction.md) <br> [[options_introduction.md]](./docs/options_introduction.md) <br>
## Training with your own dataset
If you want to train with your own dataset, please refer to [training_with_your_own_dataset.md](./docs/training_with_your_own_dataset.md)
## Acknowledgments ## Acknowledgments
This code borrows heavily from [[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet)[[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD). This code borrows heavily from [[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet) [[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD) [[BiSeNet]](https://github.com/ooooverflow/BiSeNet).
...@@ -3,25 +3,19 @@ ...@@ -3,25 +3,19 @@
这是一个通过深度学习自动的为图片/视频添加马赛克,或消除马赛克的项目.<br>它基于“语义分割”以及“图像翻译”.<br> 这是一个通过深度学习自动的为图片/视频添加马赛克,或消除马赛克的项目.<br>它基于“语义分割”以及“图像翻译”.<br>
### 更多例子 ### 更多例子
原始 | 自动打码 | 自动去码 原始 | 自动打码 | 自动去码
:-:|:-:|:-: :-:|:-:|:-:
![image](./imgs/example/lena.jpg) | ![image](./imgs/example/lena_add.jpg) | ![image](./imgs/example/lena_clean.jpg) ![image](./imgs/example/lena.jpg) | ![image](./imgs/example/lena_add.jpg) | ![image](./imgs/example/lena_clean.jpg)
![image](./imgs/example/youknow.png) | ![image](./imgs/example/youknow_add.png) | ![image](./imgs/example/youknow_clean.png) ![image](./imgs/example/youknow.png) | ![image](./imgs/example/youknow_add.png) | ![image](./imgs/example/youknow_clean.png)
*[DeepCreamPy](https://github.com/deeppomf/DeepCreamPy)相比较 *[DeepCreamPy](https://github.com/deeppomf/DeepCreamPy)相比较
马赛克图片 | DeepCreamPy | ours 马赛克图片 | DeepCreamPy | ours
:-:|:-:|:-: :-:|:-:|:-:
![image](./imgs/example/face_a_mosaic.jpg) | ![image](./imgs/example/a_dcp.png) | ![image](./imgs/example/face_a_clean.jpg) ![image](./imgs/example/face_a_mosaic.jpg) | ![image](./imgs/example/a_dcp.png) | ![image](./imgs/example/face_a_clean.jpg)
![image](./imgs/example/face_b_mosaic.jpg) | ![image](./imgs/example/b_dcp.png) | ![image](./imgs/example/face_b_clean.jpg) ![image](./imgs/example/face_b_mosaic.jpg) | ![image](./imgs/example/b_dcp.png) | ![image](./imgs/example/face_b_clean.jpg)
* 风格转换 * 风格转换
原始 | 梵高风格 | 转化为冬天 原始 | 梵高风格 | 转化为冬天
:-:|:-:|:-: :-:|:-:|:-:
![image](./imgs/example/SZU.jpg) | ![image](./imgs/example/SZU_vangogh.jpg) | ![image](./imgs/example/SZU_summer2winter.jpg) ![image](./imgs/example/SZU.jpg) | ![image](./imgs/example/SZU_vangogh.jpg) | ![image](./imgs/example/SZU_summer2winter.jpg)
一个有意思的尝试:[香蕉君♂猫](https://www.bilibili.com/video/BV1Q7411W7n6) 一个有意思的尝试:[香蕉君♂猫](https://www.bilibili.com/video/BV1Q7411W7n6)
## 如何运行 ## 如何运行
...@@ -74,5 +68,9 @@ python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretra ...@@ -74,5 +68,9 @@ python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretra
如果想要测试其他的图片或视频,请参照以下文件输入参数.<br> 如果想要测试其他的图片或视频,请参照以下文件输入参数.<br>
[[options_introduction_CN.md]](./docs/options_introduction_CN.md) <br> [[options_introduction_CN.md]](./docs/options_introduction_CN.md) <br>
## 使用自己的数据训练模型
如果需要使用自己的数据训练模型,请参照 [training_with_your_own_dataset.md](./docs/training_with_your_own_dataset.md)
## 鸣谢 ## 鸣谢
代码大量的参考了以下项目:[[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet)[[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD). 代码大量的参考了以下项目:[[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet) [[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD) [[BiSeNet]](https://github.com/ooooverflow/BiSeNet).
\ No newline at end of file
...@@ -38,7 +38,7 @@ def addmosaic_video(opt,netS): ...@@ -38,7 +38,7 @@ def addmosaic_video(opt,netS):
positions = [] positions = []
for i,imagepath in enumerate(imagepaths,1): for i,imagepath in enumerate(imagepaths,1):
img = impro.imread(os.path.join('./tmp/video2image',imagepath)) img = impro.imread(os.path.join('./tmp/video2image',imagepath))
mask,x,y,area = runmodel.get_ROI_position(img,netS,opt) mask,x,y,size,area = runmodel.get_ROI_position(img,netS,opt)
positions.append([x,y,area]) positions.append([x,y,area])
cv2.imwrite(os.path.join('./tmp/ROI_mask',imagepath),mask) cv2.imwrite(os.path.join('./tmp/ROI_mask',imagepath),mask)
print('\r','Find ROI location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='') print('\r','Find ROI location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
...@@ -110,7 +110,7 @@ def cleanmosaic_img(opt,netG,netM): ...@@ -110,7 +110,7 @@ def cleanmosaic_img(opt,netG,netM):
print('Clean Mosaic:',path) print('Clean Mosaic:',path)
img_origin = impro.imread(path) img_origin = impro.imread(path)
x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt) x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt)
#cv2.imwrite('./mask/'+os.path.basename(path), mask) cv2.imwrite('./mask/'+os.path.basename(path), mask)
img_result = img_origin.copy() img_result = img_origin.copy()
if size != 0 : if size != 0 :
img_mosaic = img_origin[y-size:y+size,x-size:x+size] img_mosaic = img_origin[y-size:y+size,x-size:x+size]
...@@ -118,7 +118,7 @@ def cleanmosaic_img(opt,netG,netM): ...@@ -118,7 +118,7 @@ def cleanmosaic_img(opt,netG,netM):
img_fake = runmodel.traditional_cleaner(img_mosaic,opt) img_fake = runmodel.traditional_cleaner(img_mosaic,opt)
else: else:
img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt) img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt)
img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather) img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather)
else: else:
print('Do not find mosaic') print('Do not find mosaic')
impro.imwrite(os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.jpg'),img_result) impro.imwrite(os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.jpg'),img_result)
...@@ -126,7 +126,7 @@ def cleanmosaic_img(opt,netG,netM): ...@@ -126,7 +126,7 @@ def cleanmosaic_img(opt,netG,netM):
def cleanmosaic_video_byframe(opt,netG,netM): def cleanmosaic_video_byframe(opt,netG,netM):
path = opt.media_path path = opt.media_path
fps,imagepaths = video_init(opt,path)[:2] fps,imagepaths = video_init(opt,path)[:2]
positions = get_mosaic_positions(opt,netM,imagepaths,savemask=False) positions = get_mosaic_positions(opt,netM,imagepaths,savemask=True)
# clean mosaic # clean mosaic
for i,imagepath in enumerate(imagepaths,0): for i,imagepath in enumerate(imagepaths,0):
x,y,size = positions[i][0],positions[i][1],positions[i][2] x,y,size = positions[i][0],positions[i][1],positions[i][2]
...@@ -138,7 +138,8 @@ def cleanmosaic_video_byframe(opt,netG,netM): ...@@ -138,7 +138,8 @@ def cleanmosaic_video_byframe(opt,netG,netM):
img_fake = runmodel.traditional_cleaner(img_mosaic,opt) img_fake = runmodel.traditional_cleaner(img_mosaic,opt)
else: else:
img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt) img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt)
img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather) mask = cv2.imread(os.path.join('./tmp/mosaic_mask',imagepath),0)
img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather)
cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result) cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result)
print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='') print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
print() print()
...@@ -178,13 +179,13 @@ def cleanmosaic_video_fusion(opt,netG,netM): ...@@ -178,13 +179,13 @@ def cleanmosaic_video_fusion(opt,netG,netM):
mosaic_input = np.zeros((INPUT_SIZE,INPUT_SIZE,3*N+1), dtype='uint8') mosaic_input = np.zeros((INPUT_SIZE,INPUT_SIZE,3*N+1), dtype='uint8')
mosaic_input[:,:,0:N*3] = impro.resize(img_pool[y-size:y+size,x-size:x+size,:], INPUT_SIZE) mosaic_input[:,:,0:N*3] = impro.resize(img_pool[y-size:y+size,x-size:x+size,:], INPUT_SIZE)
mask = impro.resize(mask,np.min(img_origin.shape[:2]))[y-size:y+size,x-size:x+size] mask_input = impro.resize(mask,np.min(img_origin.shape[:2]))[y-size:y+size,x-size:x+size]
mosaic_input[:,:,-1] = impro.resize(mask, INPUT_SIZE) mosaic_input[:,:,-1] = impro.resize(mask_input, INPUT_SIZE)
mosaic_input = data.im2tensor(mosaic_input,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1 = False) mosaic_input = data.im2tensor(mosaic_input,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1 = False)
unmosaic_pred = netG(mosaic_input) unmosaic_pred = netG(mosaic_input)
img_fake = data.tensor2im(unmosaic_pred,rgb2bgr = False ,is0_1 = False) img_fake = data.tensor2im(unmosaic_pred,rgb2bgr = False ,is0_1 = False)
img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather) img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather)
cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result) cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result)
print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='') print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
print() print()
......
...@@ -16,17 +16,17 @@ class Options(): ...@@ -16,17 +16,17 @@ class Options():
self.parser.add_argument('--mode', type=str, default='auto',help='Program running mode. auto | add | clean | style') self.parser.add_argument('--mode', type=str, default='auto',help='Program running mode. auto | add | clean | style')
self.parser.add_argument('--model_path', type=str, default='./pretrained_models/mosaic/add_face.pth',help='pretrained model path') self.parser.add_argument('--model_path', type=str, default='./pretrained_models/mosaic/add_face.pth',help='pretrained model path')
self.parser.add_argument('--result_dir', type=str, default='./result',help='output media will be saved here') self.parser.add_argument('--result_dir', type=str, default='./result',help='output media will be saved here')
self.parser.add_argument('--tempimage_type', type=str, default='png',help='type of temp image, png | jpg, png is better but occupy more storage space') self.parser.add_argument('--tempimage_type', type=str, default='jpg',help='type of temp image, png | jpg, png is better but occupy more storage space')
self.parser.add_argument('--netG', type=str, default='auto', self.parser.add_argument('--netG', type=str, default='auto',
help='select model to use for netG(Clean mosaic and Transfer style) -> auto | unet_128 | unet_256 | resnet_9blocks | HD | video') help='select model to use for netG(Clean mosaic and Transfer style) -> auto | unet_128 | unet_256 | resnet_9blocks | HD | video')
self.parser.add_argument('--fps', type=int, default=0,help='read and output fps, if 0-> origin') self.parser.add_argument('--fps', type=int, default=0,help='read and output fps, if 0-> origin')
self.parser.add_argument('--output_size', type=int, default=0,help='size of output media, if 0 -> origin') self.parser.add_argument('--output_size', type=int, default=0,help='size of output media, if 0 -> origin')
self.parser.add_argument('--mask_threshold', type=int, default=64,help='threshold of recognize clean or add mosaic position 0~255')
#AddMosaic #AddMosaic
self.parser.add_argument('--mosaic_mod', type=str, default='squa_avg',help='type of mosaic -> squa_avg | squa_random | squa_avg_circle_edge | rect_avg | random') self.parser.add_argument('--mosaic_mod', type=str, default='squa_avg',help='type of mosaic -> squa_avg | squa_random | squa_avg_circle_edge | rect_avg | random')
self.parser.add_argument('--mosaic_size', type=int, default=0,help='mosaic size,if 0 auto size') self.parser.add_argument('--mosaic_size', type=int, default=0,help='mosaic size,if 0 auto size')
self.parser.add_argument('--mask_extend', type=int, default=10,help='extend mosaic area') self.parser.add_argument('--mask_extend', type=int, default=10,help='extend mosaic area')
self.parser.add_argument('--mask_threshold', type=int, default=64,help='threshold of recognize mosaic position 0~255')
#CleanMosaic #CleanMosaic
self.parser.add_argument('--mosaic_position_model_path', type=str, default='auto',help='name of model use to find mosaic position') self.parser.add_argument('--mosaic_position_model_path', type=str, default='auto',help='name of model use to find mosaic position')
......
...@@ -15,7 +15,7 @@ def main(): ...@@ -15,7 +15,7 @@ def main():
else: else:
files = [opt.media_path] files = [opt.media_path]
if opt.mode == 'add': if opt.mode == 'add':
netS = loadmodel.unet(opt) netS = loadmodel.bisenet(opt,'roi')
for file in files: for file in files:
opt.media_path = file opt.media_path = file
if util.is_img(file): if util.is_img(file):
...@@ -26,7 +26,7 @@ def main(): ...@@ -26,7 +26,7 @@ def main():
print('This type of file is not supported') print('This type of file is not supported')
elif opt.mode == 'clean': elif opt.mode == 'clean':
netM = loadmodel.unet_clean(opt) netM = loadmodel.bisenet(opt,'mosaic')
if opt.traditional: if opt.traditional:
netG = None netG = None
elif opt.netG == 'video': elif opt.netG == 'video':
......
DeepMosaics V0.3.0
Core program building with windows10_1703_x86_64
+ python 3.68
+ pyinstaller 3.5
GUI building with C#
For more detail, please view on github: https://github.com/HypoX64/DeepMosaics
Releases History
V0.3.0
1. Support BiSeNet(Better recognition of mosaics).
2. New videoHD model.
3. Better feathering method.
V0.2.0
1. Add video model.
2. Now you can input chinese path
3. Support style transfer
4. Support fps limit
V0.1.2
1. Support pix2pixHD model
V0.1.1
1. Check path, can't input illegal path
V0.1.0
1. Initial release.
\ No newline at end of file
## DeepMosaics.exe Instructions ## DeepMosaics.exe Instructions
[[中文版]](./exe_help_CN.md) [[中文版]](./exe_help_CN.md)
This is a GUI version compiled in Windows.<br> This is a GUI version compiled in Windows.<br>
Download this version and pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br> Download this version and pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br>
Attentions:<br> Attentions:<br>
- Require Windows_x86_64, Windows10 is better.<br> - Require Windows_x86_64, Windows10 is better.<br>
- Different pre-trained models are suitable for different effects.<br> - Different pre-trained models are suitable for different effects.<br>
- Run time depends on computer performance.<br> - Run time depends on computer performance.<br>
- If output video cannot be played, you can try with [potplayer](https://daumpotplayer.com/download/).<br> - If output video cannot be played, you can try with [potplayer](https://daumpotplayer.com/download/).<br>
- GUI version update slower than source.<br> - GUI version update slower than source.<br>
### How to use ### How to use
* step 1: Choose image or video. * step 1: Choose image or video.
* step 2: Choose model(Different pre-trained models are suitable for different effects) * step 2: Choose model(Different pre-trained models are suitable for different effects)
* step3: Run program and wait. * step3: Run program and wait.
* step4: Cheek reult in './result'. * step4: Cheek reult in './result'.
### Introduction to pre-trained models ### Introduction to pre-trained models
* Mosaic * Mosaic
| Name | Description | | Name | Description |
| :------------------------------: | :---------------------------------------------------------: | | :------------------------------: | :---------------------------------------------------------: |
| add_face.pth | Add mosaic to all faces in images/videos. | | add_face.pth | Add mosaic to all faces in images/videos. |
| clean_face_HD.pth | Clean mosaic to all faces in images/video.<br>(RAM > 8GB). | | clean_face_HD.pth | Clean mosaic to all faces in images/video.<br>(RAM > 8GB). |
| add_youknow.pth | Add mosaic to all (FBI Warning) in images/videos. | | add_youknow.pth | Add mosaic to all (FBI Warning) in images/videos. |
| clean_youknow_resnet_9blocks.pth | Clean mosaic to all (FBI Warning) in images/videos. | | clean_youknow_resnet_9blocks.pth | Clean mosaic to all (FBI Warning) in images/videos. |
| clean_youknow_video.pth | Clean mosaic to all (FBI Warning) in videos. | | clean_youknow_video.pth | Clean mosaic to all (FBI Warning) in videos. |
| clean_youknow_video_HD.pth | Clean mosaic to all (FBI Warning) in videos.<br>(RAM > 8GB) | | clean_youknow_video_HD.pth | Clean mosaic to all (FBI Warning) in videos.<br>(RAM > 8GB) |
* Style Transfer * Style Transfer
| Name | Description | | Name | Description |
| :---------------------: | :-------------------------------------------------------: | | :---------------------: | :-------------------------------------------------------: |
| style_apple2orange.pth | Convert apples to oranges. | | style_apple2orange.pth | Convert apples to oranges. |
| style_orange2apple.pth | Convert oranges to apples | | style_orange2apple.pth | Convert oranges to apples |
| style_summer2winter.pth | Convert summer to winter. | | style_summer2winter.pth | Convert summer to winter. |
| style_winter2summer.pth | Convert winter to summer. | | style_winter2summer.pth | Convert winter to summer. |
| style_cezanne.pth | Convert photos/video to Paul Cézanne style. | | style_cezanne.pth | Convert photos/video to Paul Cézanne style. |
| style_monet.pth | Convert photos/video to Claude Monet style. | | style_monet.pth | Convert photos/video to Claude Monet style. |
| style_ukiyoe.pth | Convert photos/video to Ukiyoe style. | | style_ukiyoe.pth | Convert photos/video to Ukiyoe style. |
| style_vangogh.pth | Convert photos/video to Van Gogh style. | | style_vangogh.pth | Convert photos/video to Van Gogh style. |
### Annotation ### Annotation
![image](../imgs/GUI_Instructions.jpg)<br> ![image](../imgs/GUI_Instructions.jpg)<br>
* 1. Choose image or video. * 1. Choose image or video.
* 2. Choose model(Different pre-trained models are suitable for different effects). * 2. Choose model(Different pre-trained models are suitable for different effects).
* 3. Program running mode. (auto | add | clean | style) * 3. Program running mode. (auto | add | clean | style)
* 4. Use GPU to run deep learning model. (The current version does not support gpu, if you need to use gpu please run source). * 4. Use GPU to run deep learning model. (The current version does not support gpu, if you need to use gpu please run source).
* 5. Limit the fps of the output video(0->original fps). * 5. Limit the fps of the output video(0->original fps).
* 6. More options. * 6. More options.
* 7. More options can be input. * 7. More options can be input.
* 8. Run program. * 8. Run program.
* 9. Open help file. * 9. Open help file.
* 10. Sponsor our project. * 10. Sponsor our project.
* 11. Version information. * 11. Version information.
* 12. Open the URL on github. * 12. Open the URL on github.
### Introduction to options ### Introduction to options
If you need more effects, use '--option your-parameters' to enter what you need. If you need more effects, use '--option your-parameters' to enter what you need.
* Base * Base
| Option | Description | Default | | Option | Description | Default |
| :----------: | :----------------------------------------: | :-------------------------------------: | | :----------: | :----------------------------------------: | :-------------------------------------: |
| --use_gpu | if -1, do not use gpu | 0 | | --use_gpu | if -1, do not use gpu | 0 |
| --media_path | your videos or images path | ./imgs/ruoruo.jpg | | --media_path | your videos or images path | ./imgs/ruoruo.jpg |
| --mode | program running mode(auto/clean/add/style) | 'auto' | | --mode | program running mode(auto/clean/add/style) | 'auto' |
| --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth | | --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth |
| --result_dir | output media will be saved here | ./result | | --result_dir | output media will be saved here | ./result |
| --fps | read and output fps, if 0-> origin | 0 | | --fps | read and output fps, if 0-> origin | 0 |
* AddMosaic * AddMosaic
| Option | Description | Default | | Option | Description | Default |
| :--------------: | :----------------------------------------------------------: | :------: | | :--------------: | :----------------------------------------------------------: | :------: |
| --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | | --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg |
| --mosaic_size | mosaic size,if 0 -> auto size | 0 | | --mosaic_size | mosaic size,if 0 -> auto size | 0 |
| --mask_extend | extend mosaic area | 10 | | --mask_extend | extend mosaic area | 10 |
| --mask_threshold | threshold of recognize mosaic position 0~255 | 64 | | --mask_threshold | threshold of recognize mosaic position 0~255 | 64 |
* CleanMosaic * CleanMosaic
| Option | Description | Default | | Option | Description | Default |
| :-----------: | :----------------------------------------------------------: | :-----: | | :-----------: | :----------------------------------------------------------: | :-----: |
| --traditional | if specified, use traditional image processing methods to clean mosaic | | | --traditional | if specified, use traditional image processing methods to clean mosaic | |
| --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 | | --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 |
| --tr_down | downsample when using traditional method,it will affect final quality | 10 | | --tr_down | downsample when using traditional method,it will affect final quality | 10 |
| --medfilt_num | medfilt window of mosaic movement in the video | 11 | | --medfilt_num | medfilt window of mosaic movement in the video | 11 |
* Style Transfer * Style Transfer
| Option | Description | Default | | Option | Description | Default |
| :-----------: | :----------------------------------: | :-----: | | :-----------: | :----------------------------------: | :-----: |
| --output_size | size of output media, if 0 -> origin | 512 | | --output_size | size of output media, if 0 -> origin | 512 |
\ No newline at end of file
## DeepMosaics.exe 使用说明 ## DeepMosaics.exe 使用说明
下载程序以及预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br> 下载程序以及预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br>
注意事项:<br> 注意事项:<br>
- 程序的运行要求在64位Windows操作系统,我仅在Windows10运行过,其他版本暂未经过测试<br> - 程序的运行要求在64位Windows操作系统,我仅在Windows10运行过,其他版本暂未经过测试<br>
- 请根据需求选择合适的预训练模型进行测试<br> - 请根据需求选择合适的预训练模型进行测试<br>
- 运行时间取决于电脑性能,对于视频文件,我们建议使用源码以及GPU运行<br> - 运行时间取决于电脑性能,对于视频文件,我们建议使用源码以及GPU运行<br>
- 如果输出的视频无法播放,这边建议您尝试[potplayer](https://daumpotplayer.com/download/).<br> - 如果输出的视频无法播放,这边建议您尝试[potplayer](https://daumpotplayer.com/download/).<br>
- 相比于源码,该版本的更新将会延后. - 相比于源码,该版本的更新将会延后.
### 如何使用 ### 如何使用
* step 1: 选择需要处理的图片或视频 * step 1: 选择需要处理的图片或视频
* step 2: 选择预训练模型(不同的预训练模型有不同的效果) * step 2: 选择预训练模型(不同的预训练模型有不同的效果)
* step3: 运行程序并等待 * step3: 运行程序并等待
* step4: 查看结果(储存在result文件夹下) * step4: 查看结果(储存在result文件夹下)
## 预训练模型说明 ## 预训练模型说明
当前的预训练模型分为两类——添加/移除马赛克以及风格转换. 当前的预训练模型分为两类——添加/移除马赛克以及风格转换.
* 马赛克 * 马赛克
| 文件名 | 描述 | | 文件名 | 描述 |
| :------------------------------: | :-------------------------------------------: | | :------------------------------: | :-------------------------------------------: |
| add_face.pth | 对图片或视频中的脸部打码 | | add_face.pth | 对图片或视频中的脸部打码 |
| clean_face_HD.pth | 对图片或视频中的脸部去码<br>(要求内存 > 8GB). | | clean_face_HD.pth | 对图片或视频中的脸部去码<br>(要求内存 > 8GB). |
| add_youknow.pth | 对图片或视频中的十八禁内容打码 | | add_youknow.pth | 对图片或视频中的十八禁内容打码 |
| clean_youknow_resnet_9blocks.pth | 对图片或视频中的十八禁内容去码 | | clean_youknow_resnet_9blocks.pth | 对图片或视频中的十八禁内容去码 |
| clean_youknow_video.pth | 对视频中的十八禁内容去码 | | clean_youknow_video.pth | 对视频中的十八禁内容去码 |
| clean_youknow_video_HD.pth | 对视频中的十八禁内容去码<br>(要求内存 > 8GB) | | clean_youknow_video_HD.pth | 对视频中的十八禁内容去码<br>(要求内存 > 8GB) |
* 风格转换 * 风格转换
| 文件名 | 描述 | | 文件名 | 描述 |
| :---------------------: | :-------------------------------------------------------: | | :---------------------: | :-------------------------------------------------------: |
| style_apple2orange.pth | 苹果变橙子 | | style_apple2orange.pth | 苹果变橙子 |
| style_orange2apple.pth | 橙子变苹果 | | style_orange2apple.pth | 橙子变苹果 |
| style_summer2winter.pth | 夏天变冬天 | | style_summer2winter.pth | 夏天变冬天 |
| style_winter2summer.pth | 冬天变夏天 | | style_winter2summer.pth | 冬天变夏天 |
| style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 | | style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 |
| style_monet.pth | 转化为Claude Monet的绘画风格 | | style_monet.pth | 转化为Claude Monet的绘画风格 |
| style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 | | style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 |
| style_vangogh.pth | 转化为Van Gogh的绘画风格 | | style_vangogh.pth | 转化为Van Gogh的绘画风格 |
### GUI界面注释 ### GUI界面注释
![image](../imgs/GUI_Instructions.jpg)<br> ![image](../imgs/GUI_Instructions.jpg)<br>
* 1. 选择需要处理的图片或视频 * 1. 选择需要处理的图片或视频
* 2. 选择预训练模型 * 2. 选择预训练模型
* 3. 程序运行模式 (auto | add | clean | style) * 3. 程序运行模式 (auto | add | clean | style)
* 4. 使用GPU (该版本目前不支持GPU,若需要使用GPU请使用源码运行). * 4. 使用GPU (该版本目前不支持GPU,若需要使用GPU请使用源码运行).
* 5. 限制输出的视频帧率(0->原始帧率). * 5. 限制输出的视频帧率(0->原始帧率).
* 6. 更多的选项以及参数 * 6. 更多的选项以及参数
* 7. 自行输入更多参数,详见下文 * 7. 自行输入更多参数,详见下文
* 8. 运行 * 8. 运行
* 9. 打开帮助文件 * 9. 打开帮助文件
* 10. 支持我们 * 10. 支持我们
* 11. 版本信息 * 11. 版本信息
* 12. 打开项目的github页面 * 12. 打开项目的github页面
### 参数说明 ### 参数说明
如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数 如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数
* 基本 * 基本
| 选项 | 描述 | 默认 | | 选项 | 描述 | 默认 |
| :----------: | :------------------------: | :-------------------------------------: | | :----------: | :------------------------: | :-------------------------------------: |
| --use_gpu | if -1, do not use gpu | 0 | | --use_gpu | if -1, do not use gpu | 0 |
| --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg | | --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg |
| --mode | 运行模式(auto/clean/add/style) | 'auto' | | --mode | 运行模式(auto/clean/add/style) | 'auto' |
| --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth | | --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth |
| --result_dir | 保存路径 | ./result | | --result_dir | 保存路径 | ./result |
| --fps | 限制视频输出的fps,0则为默认 | 0 | | --fps | 限制视频输出的fps,0则为默认 | 0 |
* 添加马赛克 * 添加马赛克
| 选项 | 描述 | 默认 | | 选项 | 描述 | 默认 |
| :----------: | :------------------------: | :-------------------------------------: | | :----------: | :------------------------: | :-------------------------------------: |
| --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | | --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg |
| --mosaic_size | 马赛克大小,0则为自动 | 0 | | --mosaic_size | 马赛克大小,0则为自动 | 0 |
| --mask_extend | 拓展马赛克区域 | 10 | | --mask_extend | 拓展马赛克区域 | 10 |
| --mask_threshold | 马赛克区域识别阈值 0~255 | 64 | | --mask_threshold | 马赛克区域识别阈值 0~255 | 64 |
* 去除马赛克 * 去除马赛克
| 选项 | 描述 | 默认 | | 选项 | 描述 | 默认 |
| :----------: | :------------------------: | :-------------------------------------: | | :----------: | :------------------------: | :-------------------------------------: |
| --traditional | 如果输入这个参数则使用传统方法清除马赛克 | | | --traditional | 如果输入这个参数则使用传统方法清除马赛克 | |
| --tr_blur | 传统方法模糊尺寸 | 10 | | --tr_blur | 传统方法模糊尺寸 | 10 |
| --tr_down | 传统方法下采样尺寸 | 10 | | --tr_down | 传统方法下采样尺寸 | 10 |
| --medfilt_num | medfilt window of mosaic movement in the video | 11 | | --medfilt_num | medfilt window of mosaic movement in the video | 11 |
* 风格转换 * 风格转换
| 选项 | 描述 | 默认 | | 选项 | 描述 | 默认 |
| :----------: | :------------------------: | :-------------------------------------: | | :----------: | :------------------------: | :-------------------------------------: |
| --output_size | 输出媒体的尺寸,如果是0则为原始尺寸 |512| | --output_size | 输出媒体的尺寸,如果是0则为原始尺寸 |512|
\ No newline at end of file
### make datasets
## Introduction to options ## Introduction to options
If you need more effects, use '--option your-parameters' to enter what you need. If you need more effects, use '--option your-parameters' to enter what you need.
### Base ### Base
| Option | Description | Default | | Option | Description | Default |
| :----------: | :------------------------: | :-------------------------------------: | | :----------: | :------------------------: | :-------------------------------------: |
| --use_gpu | if -1, do not use gpu | 0 | | --use_gpu | if -1, do not use gpu | 0 |
| --media_path | your videos or images path | ./imgs/ruoruo.jpg | | --media_path | your videos or images path | ./imgs/ruoruo.jpg |
| --mode | program running mode(auto/clean/add/style) | 'auto' | | --mode | program running mode(auto/clean/add/style) | 'auto' |
| --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth | | --model_path | pretrained model path | ./pretrained_models/mosaic/add_face.pth |
| --result_dir | output media will be saved here| ./result | | --result_dir | output media will be saved here| ./result |
| --fps | read and output fps, if 0-> origin | 0 | | --fps | read and output fps, if 0-> origin | 0 |
### AddMosaic ### AddMosaic
| Option | Description | Default | | Option | Description | Default |
| :----------: | :------------------------: | :-------------------------------------: | | :----------: | :------------------------: | :-------------------------------------: |
| --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | | --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg |
| --mosaic_size | mosaic size,if 0 -> auto size | 0 | | --mosaic_size | mosaic size,if 0 -> auto size | 0 |
| --mask_extend | extend mosaic area | 10 | | --mask_extend | extend mosaic area | 10 |
| --mask_threshold | threshold of recognize mosaic position 0~255 | 64 | | --mask_threshold | threshold of recognize mosaic position 0~255 | 64 |
### CleanMosaic ### CleanMosaic
| Option | Description | Default | | Option | Description | Default |
| :----------: | :------------------------: | :-------------------------------------: | | :----------: | :------------------------: | :-------------------------------------: |
| --traditional | if specified, use traditional image processing methods to clean mosaic | | | --traditional | if specified, use traditional image processing methods to clean mosaic | |
| --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 | | --tr_blur | ksize of blur when using traditional method, it will affect final quality | 10 |
| --tr_down | downsample when using traditional method,it will affect final quality | 10 | | --tr_down | downsample when using traditional method,it will affect final quality | 10 |
| --medfilt_num | medfilt window of mosaic movement in the video | 11 | | --medfilt_num | medfilt window of mosaic movement in the video | 11 |
### Style Transfer ### Style Transfer
| Option | Description | Default | | Option | Description | Default |
| :----------: | :------------------------: | :-------------------------------------: | | :----------: | :------------------------: | :-------------------------------------: |
| --output_size | size of output media, if 0 -> origin |512| | --output_size | size of output media, if 0 -> origin |512|
\ No newline at end of file
## 参数说明 ## 参数说明
如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数 如果需要更多的效果, 请按照 '--option your-parameters' 输入所需要的参数
### 基本 ### 基本
| 选项 | 描述 | 默认 | | 选项 | 描述 | 默认 |
| :----------: | :------------------------: | :-------------------------------------: | | :----------: | :------------------------: | :-------------------------------------: |
| --use_gpu | if -1, do not use gpu | 0 | | --use_gpu | if -1, do not use gpu | 0 |
| --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg | | --media_path | 需要处理的视频或者照片的路径 | ./imgs/ruoruo.jpg |
| --mode | 运行模式(auto/clean/add/style) | 'auto' | | --mode | 运行模式(auto/clean/add/style) | 'auto' |
| --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth | | --model_path | 预训练模型的路径 | ./pretrained_models/mosaic/add_face.pth |
| --result_dir | 保存路径 | ./result | | --result_dir | 保存路径 | ./result |
| --fps | 限制视频输出的fps,0则为默认 | 0 | | --fps | 限制视频输出的fps,0则为默认 | 0 |
### 添加马赛克 ### 添加马赛克
| 选项 | 描述 | 默认 | | 选项 | 描述 | 默认 |
| :----------: | :------------------------: | :-------------------------------------: | | :----------: | :------------------------: | :-------------------------------------: |
| --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg | | --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg |
| --mosaic_size | 马赛克大小,0则为自动 | 0 | | --mosaic_size | 马赛克大小,0则为自动 | 0 |
| --mask_extend | 拓展马赛克区域 | 10 | | --mask_extend | 拓展马赛克区域 | 10 |
| --mask_threshold | 马赛克区域识别阈值 0~255 | 64 | | --mask_threshold | 马赛克区域识别阈值 0~255 | 64 |
### 去除马赛克 ### 去除马赛克
| 选项 | 描述 | 默认 | | 选项 | 描述 | 默认 |
| :----------: | :------------------------: | :-------------------------------------: | | :----------: | :------------------------: | :-------------------------------------: |
| --traditional | 如果输入这个参数则使用传统方法清除马赛克 | | | --traditional | 如果输入这个参数则使用传统方法清除马赛克 | |
| --tr_blur | 传统方法模糊尺寸 | 10 | | --tr_blur | 传统方法模糊尺寸 | 10 |
| --tr_down | 传统方法下采样尺寸 | 10 | | --tr_down | 传统方法下采样尺寸 | 10 |
| --medfilt_num | medfilt window of mosaic movement in the video | 11 | | --medfilt_num | medfilt window of mosaic movement in the video | 11 |
### 风格转换 ### 风格转换
| 选项 | 描述 | 默认 | | 选项 | 描述 | 默认 |
| :----------: | :------------------------: | :-------------------------------------: | | :----------: | :------------------------: | :-------------------------------------: |
| --output_size | 输出媒体的尺寸,如果是0则为原始尺寸 |512| | --output_size | 输出媒体的尺寸,如果是0则为原始尺寸 |512|
\ No newline at end of file
## Introduction to pre-trained models ## Introduction to pre-trained models
The current pre-trained models are divided into two categories(Add/Clean mosaic and StyleTransfer). The current pre-trained models are divided into two categories(Add/Clean mosaic and StyleTransfer).
Download pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br> Download pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br>
### Mosaic ### Mosaic
| Name | Description | | Name | Description |
| :------------------------------: | :---------------------------------------------------------: | | :------------------------------: | :-----------------------------------------------------: |
| add_face.pth | Add mosaic to all faces in images/videos. | | add_face.pth | Add mosaic to faces in images/videos. |
| clean_face_HD.pth | Clean mosaic to all faces in images/video.<br>(RAM > 8GB). | | clean_face_HD.pth | Clean mosaic to faces in images/video.<br>(RAM > 8GB). |
| add_youknow.pth | Add mosaic to all (FBI Warning) in images/videos. | | add_youknow.pth | Add mosaic to ... in images/videos. |
| clean_youknow_resnet_9blocks.pth | Clean mosaic to all (FBI Warning) in images/videos. | | clean_youknow_resnet_9blocks.pth | Clean mosaic to ... in images/videos. |
| clean_youknow_video.pth | Clean mosaic to all (FBI Warning) in videos. | | clean_youknow_video.pth | Clean mosaic to ... in videos. |
| clean_youknow_video_HD.pth | Clean mosaic to all (FBI Warning) in videos.<br>(RAM > 8GB) | | clean_youknow_video_HD.pth | Clean mosaic to ... in videos.<br>(RAM > 8GB) |
### Style Transfer ### Style Transfer
| Name | Description | | Name | Description |
| :---------------------: | :-------------------------------------------------------: | | :---------------------: | :-------------------------------------------------------: |
| style_apple2orange.pth | Convert apples to oranges. | | style_apple2orange.pth | Convert apples to oranges. |
| style_orange2apple.pth | Convert oranges to apples | | style_orange2apple.pth | Convert oranges to apples |
| style_summer2winter.pth | Convert summer to winter. | | style_summer2winter.pth | Convert summer to winter. |
| style_winter2summer.pth | Convert winter to summer. | | style_winter2summer.pth | Convert winter to summer. |
| style_cezanne.pth | Convert photos/video to Paul Cézanne style. | | style_cezanne.pth | Convert photos/video to Paul Cézanne style. |
| style_monet.pth | Convert photos/video to Claude Monet style. | | style_monet.pth | Convert photos/video to Claude Monet style. |
| style_ukiyoe.pth | Convert photos/video to Ukiyoe style. | | style_ukiyoe.pth | Convert photos/video to Ukiyoe style. |
| style_vangogh.pth | Convert photos/video to Van Gogh style. | | style_vangogh.pth | Convert photos/video to Van Gogh style. |
## 预训练模型说明 ## 预训练模型说明
当前的预训练模型分为两类——添加/移除马赛克以及风格转换. 当前的预训练模型分为两类——添加/移除马赛克以及风格转换.
可以通过以下方式下载预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br> 可以通过以下方式下载预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs) [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br>
### 马赛克 ### 马赛克
| 文件名 | 描述 | | 文件名 | 描述 |
| :------------------------------: | :-------------------------------------------: | | :------------------------------: | :-------------------------------------------: |
| add_face.pth | 对图片或视频中的脸部打码 | | add_face.pth | 对图片或视频中的脸部打码 |
| clean_face_HD.pth | 对图片或视频中的脸部去码<br>(要求内存 > 8GB). | | clean_face_HD.pth | 对图片或视频中的脸部去码<br>(要求内存 > 8GB). |
| add_youknow.pth | 对图片或视频中的十八禁内容打码 | | add_youknow.pth | 对图片或视频中的...内容打码 |
| clean_youknow_resnet_9blocks.pth | 对图片或视频中的十八禁内容去码 | | clean_youknow_resnet_9blocks.pth | 对图片或视频中的...内容去码 |
| clean_youknow_video.pth | 对视频中的十八禁内容去码 | | clean_youknow_video.pth | 对视频中的...内容去码 |
| clean_youknow_video_HD.pth | 对视频中的十八禁内容去码<br>(要求内存 > 8GB) | | clean_youknow_video_HD.pth | 对视频中的...内容去码<br>(要求内存 > 8GB) |
### 风格转换 ### 风格转换
| 文件名 | 描述 | | 文件名 | 描述 |
| :---------------------: | :-------------------------------------------------------: | | :---------------------: | :-------------------------------------------------------: |
| style_apple2orange.pth | 苹果变橙子 | | style_apple2orange.pth | 苹果变橙子 |
| style_orange2apple.pth | 橙子变苹果 | | style_orange2apple.pth | 橙子变苹果 |
| style_summer2winter.pth | 夏天变冬天 | | style_summer2winter.pth | 夏天变冬天 |
| style_winter2summer.pth | 冬天变夏天 | | style_winter2summer.pth | 冬天变夏天 |
| style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 | | style_cezanne.pth | 转化为Paul Cézanne 的绘画风格 |
| style_monet.pth | 转化为Claude Monet的绘画风格 | | style_monet.pth | 转化为Claude Monet的绘画风格 |
| style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 | | style_ukiyoe.pth | 转化为Ukiyoe的绘画风格 |
| style_vangogh.pth | 转化为Van Gogh的绘画风格 | | style_vangogh.pth | 转化为Van Gogh的绘画风格 |
# Training with your own dataset
Training with your own dataset requires a GPU with 6G memory (above GTX1060).<br>
We will make "face" as an example. If you don't have any picture, you can download [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) or [WIDER](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/WiderFace_Results.html).
## Getting Started
#### Prerequisites
- Linux, Mac OS, Windows
- Python 3.6+
- [ffmpeg 3.4.6](http://ffmpeg.org/)
- [Pytorch 1.0+](https://pytorch.org/)
- NVIDIA GPU(with more than 6G memory) + CUDA CuDNN<br>
#### Dependencies
This code depends on opencv-python, torchvision, matplotlib available via pip install.
#### Clone this repo
```bash
git clone https://github.com/HypoX64/DeepMosaics
cd DeepMosaics
```
## Make training datasets
```bash
cd make_datasets
```
### Add mosaic dataset
Please generate mask from images which you want to add mosaic(number of images should be above 1000). And then put the images in ```face/origin_image```, and masks in ```face/mask```.<br>
* You can use ```draw_mask.py```to generate them.
```bash
python draw_mask.py --datadir 'dir for your pictures' --savedir ../datasets/draw/face
#Press the left mouse button to draw the mask . Press 'S' to save mask, 'A' to reduce brush size, 'D' to increase brush size, 'W' to cancel drawing.
```
* If you want to get images from videos, you can use ```get_image_from_video.py```
```bash
python get_image_from_video.py --datadir 'dir for your videos' --savedir ../datasets/video2image --fps 1
```
### Clean mosaic dataset
We provide several methods for generating clean mosaic datasets. However, for better effect, we recommend train a addmosaic model in a small data first and use it to automatically generate datasets in a big data.(recommend: Method 2(for image) & Method 4(for video))
* Method 1: Use drawn mask to make pix2pix(HD) datasets(Require``` origin_image``` and ```mask```)
```bash
python make_pix2pix_dataset.py --datadir ../datasets/draw/face --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod drawn --minsize 128 --square
```
* Method 2: Use addmosaic model to make pix2pix(HD) datasets(Require addmosaic pre-trained model)
```bash
python make_pix2pix_dataset.py --datadir 'dir for your pictures' --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod network --model_path ../pretrained_models/mosaic/add_face.pth --minsize 128 --square --mask_threshold 128
```
* Method 3: Use Irregular Masks to make pix2pix(HD) datasets(Require [Irregular Masks](https://nv-adlr.github.io/publication/partialconv-inpainting))
```bash
python make_pix2pix_dataset.py --datadir 'dir for your pictures' --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod irregular --irrholedir ../datasets/Irregular_Holes_mask --square
```
* Method 4: Use addmosaic model to make video datasets(Require addmosaic pre-trained model. This is better for processing video mosaics)
```bash
python make_video_dataset.py --datadir 'dir for your videos' --model_path ../pretrained_models/mosaic/add_face.pth --mask_threshold 96 --savedir ../datasets/video/face
```
## Training
### Add
```bash
cd train/add
python train.py --gpu_id 0 --dataset ../../datasets/draw/face --savename face --loadsize 512 --finesize 360 --batchsize 16
```
### Clean
* For image datasets(generated by ```make_pix2pix_dataset.py```)
We use [pix2pix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) or [pix2pixHD](https://github.com/NVIDIA/pix2pixHD) to train model. We just take pix2pixHD as an example.
```bash
git clone https://github.com/NVIDIA/pix2pixHD
cd pix2pixHD
pip install dominate
python train.py --name face --resize_or_crop resize_and_crop --loadSize 563 --fineSize 512 --label_nc 0 --no_instance --dataroot ../datasets/pix2pix/face
```
* For video datasets(generated by ```make_video_dataset.py```)
```bash
cd train/clean
python train.py --dataset ../../datasets/video/face --savename face --savefreq 100000 --gan --hd --lr 0.0002 --lambda_gan 1 --gpu_id 0 --perload_num 8
```
## Testing
Put saved network to ```./pretrained_models/mosaic/``` and rename it as ```add_face.pth``` or ```clean_face_HD.pth``` or ```clean_face_video_HD.pth```
010412_249-1pon-whole1_hd.avi,00:12:00,00:13:33,00:14:26,00:15:06,00:19:35,00:24:30,00:25:53,00:29:29,00:29:55,00:30:30,00:31:43,00:32:54,00:33:39,00:35:55,00:38:30,00:38:49,00:39:47,00:41:15,00:42:35,00:43:15,00:43:50,00:45:30,00:46:33,00:47:35,00:49:10,00:49:20,00:51:04,00:51:20,00:53:10,00:55:05
011013_511-1pon-whole1_hd.avi,00:16:09,00:16:43,00:19:12,00:19:54,00:24:52,00:26:23,00:29:20,00:31:40,00:32:16,00:36:45,00:37:15,00:37:35,00:38:00,00:38:40,00:41:40,00:46:09,00:57:50,00:58:10
012514_744-1pon-whole1_hd.mp4,00:08:12,00:12:00,00:12:30,00:17:40,00:19:35,00:20:50,00:21:50,00:24:35,00:29:10,00:30:25,00:33:10,00:39:35,00:40:35,00:42:25,00:42:35,00:57:05,00:58:25,00:59:15
020916_242-1pon-1080p.mp4,00:13:35,00:15:10,00:18:20,00:26:50,00:31:25,00:33:15,00:34:55,00:37:15,00:38:25,00:39:35,00:41:05,00:41:55,00:42:10,00:43:10,00:43:20,00:45:15,00:45:20,00:46:10,00:47:50,00:49:10,00:50:00,00:50:20,00:52:10,00:56:55,00:57:05,00:57:35,00:59:15,00:59:30
031516_262-1pon-1080p.mp4,00:09:30,00:13:00,00:13:50,00:14:50,00:16:00,00:20:55,00:25:50,00:26:35,00:30:30,00:32:40,00:38:20,00:38:30,00:39:55,00:42:10,00:43:45,00:45:40,00:46:20,00:47:50,00:48:05,00:49:50,00:51:45,00:51:50,00:57:00
031716_001-1pon-1080p.mp4,00:02:30,00:02:40,00:02:55,00:04:00,00:04:20,00:05:40,00:06:05,00:06:50,00:08:10,00:08:20,00:08:30,00:08:47,00:10:00,00:10:05,00:10:20,00:10:30,00:11:50,00:12:00,00:12:35,00:13:20,00:14:20,00:15:35
032113_554-1pon-whole1_hd.avi,00:13:20,00:21:20,00:23:15,00:23:35,00:24:00,00:25:10,0:25:30,00:25:50,00:26:35,00:26:50,00:31:40,00:35:15,00:35:25,00:37:10,00:45:35,00:46:05,00:48:00,00:49:50,00:50:30,00:51:50,00:52:30,00:52:40,00:52:50,00:58:20,00:58:30,00:59:30,00:59:45,01:01:45,01:02:00,01:03:50,01:04:05,01:04:20,01:04:30,01:05:35,01:07:40
032313_556-1pon-whole1_hd.avi,00:04:05,00:05:00,00:06:40,00:06:50,00:07:50,00:09:10,00:10:30,00:13:15,00:16:05,00:17:35,00:18:20,00:20:25,00:20:30,00:22:30,00:26:50,00:27:30,00:35:30,00:42:40,00:44:09,00:50:00,00:52:50,00:53:40,00:54:15,00:58:00,00:58:25,01:04:05,01:05:05,01:06:15,01:06:50,01:07:51,01:08:10
032715_001-1pon-1080p.mp4,00:09:20,00:10:35,00:10:45,00:13:25,00:21:20,00:24:50,00:28:10,00:29:26,00:29:52,00:30:55,00:31:10,00:31:55,00:32:20,00:32:40,00:33:10,00:34:30,00:35:40,00:35:50,00:48:30,00:48:50,00:49:45,00:50:15,00:53:55,00:57:13,00:57:20,00:59:00,00:59:55
032715_004-1pon-1080p.mp4,00:22:30,00:22:55,00:24:44,00:26:15,00:28:00,00:28:40,00:30:40,00:35:40,00:38:20,00:38:50,00:39:50,00:41:30,00:42:10,00:42:30,00:43:40,00:44:05,00:44:35,00:45:17,00:45:36,00:46:23,00:46:55,00:47:20,00:47:40,00:48:05,00:48:30,00:50:50,00:52:00,00:53:30,00:53:45,00:54:25,00:54:45,00:57:40,00:58:00,00:58:40,00:58:50
040111_063-1pon-whole1_hd.avi,00:08:25,00:08:45,00:09:00,00:10:55,00:16:40,00:17:05,00:17:35,00:19:10,00:27:00,00:28:05,00:29:05,00:31:40,00:36:00,00:37:50,00:45:30,00:46:15,00:47:45,00:50:15,00:52:50,00:53:47,00:53:58,00:55:05,00:56:15,00:58:40,00:59:00,00:59:20,00:59:45
040814_786-1pon-whole1_hd.avi,00:04:40,00:05:00,00:06:50,00:10:20,00:21:00,00:23:35,00:24:10,00:26:40,00:28:35,00:29:15,00:29:20,00:31:15,00:32:50,00:36:10,00:39:40,00:42:00,00:42:50,00:44:00,00:44:15,00:44:36,00:45:00,00:45:20,00:47:20,00:48:10,00:48:30,00:53:50,00:54:43,00:55:20,00:59:15,00:59:30
050915_077-1pon-1080p,00:11:00,00:12:30,00:19:20,00:19:50,00:21:00,00:22:00,00:23:40,00:24:30,00:28:20,00:33:50,00:36:00,00:37:30,00:38:50,00:39:30,00:41:50,00:44:20,00:48:45,00:49:25,00:50:45,00:51:00,00:53:05,00:54:00,00:54:27,00:57:30,00:59:10,01:00:30,01:04:10,01:04:20,01:04:30,01:04:50,01:05:20
052215_084-1pon-1080p.mp4,00:26:50,00:27:15,00:30:20,00:33:20,00:34:00,00:37:00,00:41:00,00:43:00,00:44:30,00:47:40,00:50:35,00:50:40,00:51:40,00:55:20,00:55:50,00:55:55,00:56:20,00:57:30,00:57:40,00:59:10,00:59:15,01:00:05
062015_101-1pon-1080p.mp4,00:11:00,00:12:47,00:13:10,00:14:20,00:15:20,00:16:20,00:17:10,00:17:25,00:19:45,00:21:05,00:23:40,00:27:40,00:28:10,00:37:15,00:41:30,00:43:20,00:44:25,00:46:51,00:47:20,00:49:00,00:50:40,00:51:50,00:52:50,00:55:00,00:56:20,00:58:10,00:59:00,10:00:00
062715_105-1pon-1080p.mp4,00:11:30,00:11:55,00:12:00,00:12:30,00:13:45,00:16:50,00:18:25,00:19:20,00:20:40,00:25:15,00:36:20,00:36:40,00:37:25,00:39:05,00:39:50,00:40:55,00:41:55,00:45:40,00:43:30,00:44:15,00:45:30,00:47:40,00:50:05,00:50:10,00:50:20,00:50:30,00:55:10,00:56:35,00:58:40,01:00:15,01:05:05,01:05:15,01:05:30,01:05:50
1pondo_070315_108_1080p.mp4,00:11:10,00:11:50,00:13:50,00:14:20,00:14:35,00:15:50,00:17:20,00:18:35,00:20:45,00:24:35,00:25:05,00:29:15,00:30:40,00:31:55,00:35:20,00:42:55,00:43:05,00:46:15,00:48:00,00:51:45,00:52:33,00:54:20,00:59:25,00:59:40,01:00:05
071114_842-1pon-whole1_hd.mp4,00:09:50,00:11:25,00:16:35,00:18:20,00:22:10,00:25:25,00:26:35,00:33:50,00:35:40,00:43:10
071715_116-1pon-1080p.mp4,00:10:50,00:11:30,00:12:50,00:15:10,00:16:45,00:17:05,00:25:20,00:26:45,00:28:30,00:30:20,00:32:55,00:34:30,00:37:40,00:38:40,00:40:20,00:41:20,00:44:10,00:47:15,00:55:00,00:59:40,00:59:50
071815_117-1pon-1080p.mp4,00:14:50,00:15:10,00:18:05,00:14:50,00:25:55,00:26:25,00:32:45,00:33:40,00:43:15,00:45:05,00:45:45,00:48:40,00:48:50,00:55:45,10:00:20,01:00:35,01:01:00,01:01:10
080815_130-1pon-1080p,00:14:50,00:17:15,00:17:20,00:23:55,00:25:30,00:25:55,00:28:20,00:28:30,00:30:10,00:31:00,00:33:25,00:33:35,00:33:45,00:33:50,00:39:25,00:39:50,00:40:25,00:44:05,00:45:00,00:45:40,00:45:50,00:46:55,00:49:15,00:49:25,00:46:40,00:50:10,00:50:15,00:51:25,00:51:50,00:53:14,00:53:20,00:54:15,00:56:15,00:56:25,00:56:45,00:57:45,00:57:30,00:58:00,00:56:45,00:56:55,01:00:00,01:00:05,01:00:25,01:00:30
081514_863-1pon-whole1_hd.avi,00:10:30,00:26:00,00:30:00,00:38:21,00:40:15,00:40:30,00:49:10,00:50:05,00:57:10,00:59:00
090614_877-1pon-whole1_hd.mp4,00:04:45,00:05:15,00:12:25,00:12:40,00:15:00,00:15:15,00:16:25,00:20:50,00:21:45,00:26:10,00:33:35,00:35:55,00:37:50,00:37:55,00:38:12,00:39:55,00:41:50,00:44:27,00:44:37,00:46:30,00:47:35,00:47:40,00:48:20,00:59:50
091215_152-1pon-1080p.mp4,00:05:30,00:06:10,00:06:20,00:08:15,00:10:10,00:11:15,00:12:15,00:12:55,0:15:15,00:15:35,00:18:00,00:24:45,00:25:45,00:33:45,00:35:32,00:37:35,00:37:55,00:38:50,00:42:15,00:45:00,00:47:55,00:48:20,00:48:35,00:48:42,00:49:43,00:50:15,00:51:10,00:55:35,00:57:00,00:57:55,01:03:30,01:05:00
092813_670-1pon-whole1_hd.avi,00:16:32,00:19:00,00:22:10,00:23:20,00:23:40,00:30:20,00:32:00,00:35:00,00:36:50,00:41:40,00:44:50,00:52:45,00:54:00
103015_180-1pon-1080p.mp4,00:24:50,00:31:25,00:41:20,00:48:10,00:48:50,00:49:20,00:50:15,00:52:45,00:53:30,01:02:40,01:03:35,01:09:50,01:15:05,01:16:50
110615_185-1pon-1080p.mp4,00:15:00,00:15:40,00:34:15,00:34:50,00:35:30,00:37:05,00:39:35,00:40:30,00:41:40,00:47:35,00:50:15,00:51:01,00:51:35,00:54:15,00:55:40,00:55:50,00:57:20,00:59:35,01:00:00,01:00:25
120310_979-1pon-whole1_hd.avi,00:15:10,00:14:25,00:14:30,00:14:50,00:15:45,00:16:35,00:16:55,00:17:25,00:19:25,00:20:45,00:27:05,00:30:17,00:32:00,00:33:50,00:35:45,00:38:55,00:40:25,00:40:40,00:41:10,00:42:50,00:44:35,00:45:15,00:46:15,00:48:00,00:49:10,00:50:10,00:54:00,00:55:23,00:55:30,00:55:50
021315-806-carib-1080p.mp4,00:13:30,00:15:20,00:17:40,00:21:50,00:22:25,00:24:35,00:28:50,00:28:52,00:31:00,00:37:25,00:37:35,00:38:20,00:38:45,00:43:30,00:48:35,00:51:30,00:51:50,00:52:19,00:56:20,00:58:35
021715-809-carib-1080p.mp4,00:17:30,00:20:35,00:21:00,00:22:00,00:23:55,00:24:15,00:28:40,00:37:20,00:39:05,00:40:05,00:40:50,00:42:45,00:45:00,00:46:40,00:48:00,00:48:20,00:51:30,00:52:10,00:53:35,00:54:10,00:54:20,00:56:45,00:56:55,00:59:10,00:59:35,00:59:55
022715-817-carib-1080p.mp4,00:57:52,00:08:50,00:10:00,00:12:50,00:14:05,00:18:25,00:20:45,00:20:57,00:22:15,00:23:30,00:23:55,00:24:18,00:24:50,00:25:25,00:26:30,00:26:55,00:28:50,00:31:55,00:34:00,00:34:35,00:42:45,00:44:33
030914-558-carib-high_1.mp4,00:10:45,00:12:45,00:14:40,00:16:33,00:19:40,00:21:35,00:21:55,00:23:05,00:26:15,00:27:30,00:29:55,00:31:10,00:31:40,00:36:40,00:41:40,00:42:40,00:44:50,00:49:50,00:52:25,00:53:50,00:54:30,00:55:20,00:55:10,00:57:05,00:57:25,00:59:05,01:00:15,01:02:11,01:03:55,01:05:10
031815-830-carib-1080p.mp4,00:13:15,00:13:25,00:13:55,00:14:40,00:15:40,00:17:30,00:18:20,00:19:10,00:21:00,00:22:10,00:22:25,00:23:25,00:27:10,00:28:33,00:35:05,00:35:40,00:37:50,00:38:00,00:39:35,00:41:35,00:42:40,00:47:40,00:50:33,00:55:50,01:02:10,01:05:20,01:05:30
032016-121-carib-1080p.mp4,00:27:20,00:28:40,00:28:55,00:30:35,00:36:10,00:39:10,00:40:30,00:43:00,00:46:05,00:50:00,00:56:05,00:56:20,00:59:20
032913-301-carib-whole_hd1.wmv,00:06:00,00:09:40,00:11:00,00:13:00,00:15:05,00:16:40,00:18:05,00:20:00,00:39:31,00:34:35,00:44:50,00:47:25,00:49:50,00:51:20,00:54:58,00:56:55,00:59:50,01:00:50
032914-571-carib-high_1.mp4,00:13:30,00:13:55,00:16:40,00:15:25,00:20:40,00:26:45,00:32:05,00:33:15,00:36:40,00:38:55,00:39:00,00:39:25,00:47:30,00:49:20
042514-588-carib-high_1.mp4,00:10:30,00:11:15,00:19:15,00:20:00,00:20:30,00:22:05,00:22:45,00:22:53,00:24:15,00:30:50,00:32:25,00:34:15,00:34:45,00:34:55,0:36:05,00:37:20,00:37:40,00:38:30,00:39:35,00:41:00,00:43:30,00:43:40
052315-884-carib-1080p.mp4,00:09:35,00:14:10,00:14:30,00:14:40,00:17:10,00:17:50,00:19:00,00:20:20,01:21:55,00:22:40,00:23:05,00:24:00,00:26:00,00:27:15,00:30:25,00:32:50,00:37:55,0:39:35,00:40:10,00:41:40,00:43:15,00:43:40,00:47:55,00:49:30,00:49:55,00:58:55,01:00:40
053114-612-carib-high_1.mp4,00:08:35,00:13:35,00:15:25,00:16:40,00:20:35,00:22:25,00:26:10,00:29:10,00:32:55,00:34:10,00:37:05,00:37:40,00:39:40,00:40:52,00:42:08,00:42:15
062615-908-carib-1080p.mp4,00:13:45,00:14:40,00:15:45,00:16:11,00:17:00,00:22:10,00:23:40,00:26:10,00:27:15,00:27:50,00:31:30,00:35:00,00:40:20,00:43:10,00:44:35,00:47:17,00:50:25,00:51:15,00:52:20,00:54:10,00:55:30,01:00:20
\ No newline at end of file
...@@ -6,18 +6,25 @@ import random ...@@ -6,18 +6,25 @@ import random
import sys import sys
sys.path.append("..") sys.path.append("..")
from cores import Options
from util import util from util import util
from util import image_processing as impro from util import image_processing as impro
image_dir = './datasets_img/v2im'
mask_dir = './datasets_img/v2im_mask'
util.makedirs(mask_dir)
files = os.listdir(image_dir) opt = Options()
files_new =files.copy() opt.parser.add_argument('--datadir',type=str,default=' ', help='your images dir')
print('find image:',len(files)) opt.parser.add_argument('--savedir',type=str,default='../datasets/draw/face', help='')
masks = os.listdir(mask_dir) opt = opt.getparse()
print('mask:',len(masks))
mask_savedir = os.path.join(opt.savedir,'mask')
img_savedir = os.path.join(opt.savedir,'origin_image')
util.makedirs(mask_savedir)
util.makedirs(img_savedir)
filepaths = util.Traversal(opt.datadir)
filepaths = util.is_imgs(filepaths)
random.shuffle(filepaths)
print('find image:',len(filepaths))
# mouse callback function # mouse callback function
drawing = False # true if mouse is pressed drawing = False # true if mouse is pressed
...@@ -32,68 +39,58 @@ def draw_circle(event,x,y,flags,param): ...@@ -32,68 +39,58 @@ def draw_circle(event,x,y,flags,param):
elif event == cv2.EVENT_MOUSEMOVE: elif event == cv2.EVENT_MOUSEMOVE:
if drawing == True: if drawing == True:
cv2.circle(img,(x,y),brushsize,(0,255,0),-1) cv2.circle(img_drawn,(x,y),brushsize,(0,255,0),-1)
elif event == cv2.EVENT_LBUTTONUP: elif event == cv2.EVENT_LBUTTONUP:
drawing = False drawing = False
cv2.circle(img,(x,y),brushsize,(0,255,0),-1) cv2.circle(img_drawn,(x,y),brushsize,(0,255,0),-1)
def makemask(img): def makemask(img_drawn):
# starttime = datetime.datetime.now() # starttime = datetime.datetime.now()
mask = np.zeros(img.shape, np.uint8) mask = np.zeros(img_drawn.shape, np.uint8)
for row in range(img.shape[0]): for row in range(img_drawn.shape[0]):
for col in range(img.shape[1]): for col in range(img_drawn.shape[1]):
# if (img[row,col,:] == [0,255,0]).all(): #too slow # if (img_drawn[row,col,:] == [0,255,0]).all(): #too slow
if img[row,col,0] == 0: if img_drawn[row,col,0] == 0:
if img[row,col,1] == 255: if img_drawn[row,col,1] == 255:
if img[row,col,2] == 0: if img_drawn[row,col,2] == 0:
mask[row,col,:] = [255,255,255] mask[row,col,:] = [255,255,255]
# endtime = datetime.datetime.now()
# print('Cost time:',(endtime-starttime))
return mask return mask
for i in range(len(masks)):
masks[i]=masks[i].replace('.png','.jpg')
for file in files:
if file in masks:
files_new.remove(file)
files = files_new
# files = list(set(files)) #Distinct
print('remain:',len(files))
random.shuffle(files)
# files.sort()
cnt = 0 cnt = 0
for file in filepaths:
try:
cnt += 1
img = impro.imread(file,loadsize=512)
img_drawn = img.copy()
cv2.namedWindow('image')
cv2.setMouseCallback('image',draw_circle) #MouseCallback
while(1):
for file in files: cv2.imshow('image',img_drawn)
cnt += 1 k = cv2.waitKey(1) & 0xFF
img = cv2.imread(os.path.join(image_dir,file)) if k == ord('s'):
img = impro.resize(img,512)
cv2.namedWindow('image') img_drawn = impro.resize(img_drawn,256)
cv2.setMouseCallback('image',draw_circle) #MouseCallback mask = makemask(img_drawn)
while(1): cv2.imwrite(os.path.join(mask_savedir,os.path.splitext(os.path.basename(file))[0]+'.png'),mask)
cv2.imwrite(os.path.join(img_savedir,os.path.basename(file)),img)
cv2.imshow('image',img) print('Saved:',os.path.join(mask_savedir,os.path.splitext(os.path.basename(file))[0]+'.png'),mask)
k = cv2.waitKey(1) & 0xFF # cv2.destroyAllWindows()
if k == ord(' '): print('remain:',len(filepaths)-cnt)
img = impro.resize(img,256) brushsize = 20
mask = makemask(img) break
cv2.imwrite(os.path.join(mask_dir,os.path.splitext(file)[0]+'.png'),mask) elif k == ord('a'):
print(os.path.join(mask_dir,os.path.splitext(file)[0]+'.png')) brushsize -= 5
# cv2.destroyAllWindows() if brushsize<5:
print('remain:',len(files)-cnt) brushsize = 5
brushsize = 20 print('brushsize:',brushsize)
break elif k == ord('d'):
elif k == ord('a'): brushsize += 5
brushsize -= 5 print('brushsize:',brushsize)
if brushsize<5: elif k == ord('w'):
brushsize = 5 print('remain:',len(filepaths)-cnt)
print('brushsize:',brushsize) break
elif k == ord('d'): except Exception as e:
brushsize += 5 print(file,e)
print('brushsize:',brushsize)
elif k == ord('w'):
print('remain:',len(files)-cnt)
break
# cv2.destroyAllWindows()
\ No newline at end of file
import os import os
import numpy as np
import cv2
import random
import csv
import sys import sys
sys.path.append("..") sys.path.append("..")
from cores import Options
from util import util,ffmpeg from util import util,ffmpeg
from util import image_processing as impro
files = util.Traversal('./videos') opt = Options()
opt.parser.add_argument('--datadir',type=str,default='', help='your video dir')
opt.parser.add_argument('--savedir',type=str,default='../datasets/video2image', help='')
opt = opt.getparse()
files = util.Traversal(opt.datadir)
videos = util.is_videos(files) videos = util.is_videos(files)
output_dir = './datasets_img/v2im'
util.makedirs(output_dir) util.makedirs(opt.savedir)
FPS = 1
util.makedirs(output_dir)
for video in videos: for video in videos:
ffmpeg.continuous_screenshot(video, output_dir, FPS) ffmpeg.continuous_screenshot(video, opt.savedir, opt.fps)
\ No newline at end of file \ No newline at end of file
import os
import random
import sys
import datetime
import time
import shutil
import threading
import warnings
warnings.filterwarnings(action='ignore')
import numpy as np
import cv2
sys.path.append("..")
from models import runmodel,loadmodel
import util.image_processing as impro
from util import util,mosaic,data
from cores import Options
opt = Options()
opt.parser.add_argument('--datadir',type=str,default='../datasets/draw/face', help='')
opt.parser.add_argument('--savedir',type=str,default='../datasets/pix2pix/face', help='')
opt.parser.add_argument('--name',type=str,default='', help='save name')
opt.parser.add_argument('--mod',type=str,default='drawn', help='drawn | network | irregular | drawn,irregular | network,irregular')
opt.parser.add_argument('--square', action='store_true', help='if specified, crop to square')
opt.parser.add_argument('--irrholedir',type=str,default='../datasets/Irregular_Holes_mask', help='')
opt.parser.add_argument('--hd', action='store_true', help='if false make dataset for pix2pix, if Ture for pix2pix_HD')
opt.parser.add_argument('--savemask', action='store_true', help='if specified,save mask')
opt.parser.add_argument('--outsize', type=int ,default= 512,help='')
opt.parser.add_argument('--fold', type=int ,default= 1,help='')
opt.parser.add_argument('--start', type=int ,default= 0,help='')
opt.parser.add_argument('--minsize', type=int ,default= 128,help='when [square], minimal roi size')
opt.parser.add_argument('--quality', type=int ,default= 40,help='when [square], minimal quality')
opt = opt.getparse()
util.makedirs(opt.savedir)
util.writelog(os.path.join(opt.savedir,'opt.txt'),
str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt))
opt.mod = (opt.mod).split(',')
#save dir
if opt.hd:
train_A_path = os.path.join(opt.savedir,'train_A')
train_B_path = os.path.join(opt.savedir,'train_B')
util.makedirs(train_A_path)
util.makedirs(train_B_path)
else:
train_path = os.path.join(opt.savedir,'train')
util.makedirs(train_path)
if opt.savemask:
mask_save_path = os.path.join(opt.savedir,'mask')
util.makedirs(mask_save_path)
#read dir
if 'drawn' in opt.mod:
imgpaths = util.Traversal(os.path.join(opt.datadir,'origin_image'))
imgpaths.sort()
maskpaths = util.Traversal(os.path.join(opt.datadir,'mask'))
maskpaths.sort()
if 'network' in opt.mod or 'irregular' in opt.mod:
imgpaths = util.Traversal(opt.datadir)
random.shuffle (imgpaths)
if 'irregular' in opt.mod:
irrpaths = util.Traversal(opt.irrholedir)
#def network
if 'network' in opt.mod:
net = loadmodel.bisenet(opt,'roi')
# def checksaveimage(opt,img,mask):
# #check
# saveflag = True
# x,y,size,area = impro.boundingSquare(mask, random.uniform(1.4,1.6))
# if area < 1000:
# saveflag = False
# else:
# if opt.square:
# if size < opt.minsize:
# saveflag = False
# else:
# img = impro.resize(img[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
# mask = impro.resize(mask[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
# if impro.Q_lapulase(img)<opt.quality:
# saveflag = False
# else:
# img = impro.resize(img,opt.outsize,interpolation=cv2.INTER_CUBIC)
# if saveflag:
# # add mosaic
# img_mosaic = mosaic.addmosaic_random(img, mask)
# global savecnt
# savecnt += 1
# if opt.hd:
# cv2.imwrite(os.path.join(train_A_path,opt.name+'%06d' % savecnt+'.jpg'), img_mosaic)
# cv2.imwrite(os.path.join(train_B_path,opt.name+'%06d' % savecnt+'.jpg'), img)
# else:
# merge_img = impro.makedataset(img_mosaic, img)
# cv2.imwrite(os.path.join(train_path,opt.name+'%06d' % savecnt+'.jpg'), merge_img)
# if opt.savemask:
# cv2.imwrite(os.path.join(mask_save_path,opt.name+'%06d' % savecnt+'.png'), mask)
print('Find images:',len(imgpaths))
starttime = datetime.datetime.now()
filecnt = 0
savecnt = opt.start
for fold in range(opt.fold):
for i in range(len(imgpaths)):
filecnt += 1
try:
# load image and get mask
img = impro.imread(imgpaths[i])
if 'drawn' in opt.mod:
mask_drawn = impro.imread(maskpaths[i],'gray')
mask_drawn = impro.resize_like(mask_drawn, img)
mask = mask_drawn
if 'irregular' in opt.mod:
mask_irr = impro.imread(irrpaths[random.randint(0,12000-1)],'gray')
mask_irr = data.random_transform_single(mask_irr, (img.shape[0],img.shape[1]))
mask = mask_irr
if 'network' in opt.mod:
mask_net = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0]
if not opt.all_mosaic_area:
mask_net = impro.find_mostlikely_ROI(mask_net)
mask = mask_net
if opt.mod == ['drawn','irregular']:
mask = cv2.bitwise_and(mask_irr, mask_drawn)
if opt.mod == ['network','irregular']:
mask = cv2.bitwise_and(mask_irr, mask_net)
#checkandsave
# t=threading.Thread(target=checksaveimage,args=(opt,img,mask,))
# t.start()
saveflag = True
x,y,size,area = impro.boundingSquare(mask, random.uniform(1.4,1.6))
if area < 1000:
saveflag = False
else:
if opt.square:
if size < opt.minsize:
saveflag = False
else:
img = impro.resize(img[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
mask = impro.resize(mask[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
if impro.Q_lapulase(img)<opt.quality:
saveflag = False
else:
img = impro.resize(img,opt.outsize,interpolation=cv2.INTER_CUBIC)
if saveflag:
# add mosaic
img_mosaic = mosaic.addmosaic_random(img, mask)
# global savecnt
savecnt += 1
if opt.hd:
cv2.imwrite(os.path.join(train_A_path,opt.name+'%06d' % savecnt+'.jpg'), img_mosaic)
cv2.imwrite(os.path.join(train_B_path,opt.name+'%06d' % savecnt+'.jpg'), img)
else:
merge_img = impro.makedataset(img_mosaic, img)
cv2.imwrite(os.path.join(train_path,opt.name+'%06d' % savecnt+'.jpg'), merge_img)
if opt.savemask:
cv2.imwrite(os.path.join(mask_save_path,opt.name+'%06d' % savecnt+'.png'), mask)
# print("Processing:",imgpaths[i]," ","Remain:",len(imgpaths)*opt.fold-filecnt)
# cv2.namedWindow('image', cv2.WINDOW_NORMAL)
# cv2.imshow('image',img_mosaic)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
except Exception as e:
print(imgpaths[i],e)
if filecnt%10==0:
endtime = datetime.datetime.now()
# used_time = (endtime-starttime).seconds
used_time = (endtime-starttime).seconds
all_length = len(imgpaths)*opt.fold
percent = round(100*filecnt/all_length,1)
all_time = used_time/filecnt*all_length
print('\r','',str(filecnt)+'/'+str(all_length)+' ',
util.get_bar(percent,30),'',
util.second2stamp(used_time)+'/'+util.second2stamp(all_time),
'f:'+str(savecnt),end= " ")
\ No newline at end of file
import os
import random
import sys
import datetime
import time
import shutil
import threading
import numpy as np
import cv2
sys.path.append("..")
from models import runmodel,loadmodel
import util.image_processing as impro
from util import util,mosaic,data,ffmpeg
from cores import Options
opt = Options()
opt.parser.add_argument('--datadir',type=str,default='your video dir', help='')
opt.parser.add_argument('--savedir',type=str,default='../datasets/video/face', help='')
opt.parser.add_argument('--interval',type=int,default=30, help='interval of split video ')
opt.parser.add_argument('--time',type=int,default=5, help='split video time')
opt.parser.add_argument('--minmaskarea',type=int,default=2000, help='')
opt.parser.add_argument('--quality', type=int ,default= 45,help='minimal quality')
opt.parser.add_argument('--outsize', type=int ,default= 286,help='')
opt.parser.add_argument('--startcnt', type=int ,default= 0,help='')
opt.parser.add_argument('--minsize', type=int ,default= 96,help='minimal roi size')
opt = opt.getparse()
util.makedirs(opt.savedir)
util.writelog(os.path.join(opt.savedir,'opt.txt'),
str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt))
videopaths = util.Traversal(opt.datadir)
videopaths = util.is_videos(videopaths)
random.shuffle(videopaths)
# def network
net = loadmodel.bisenet(opt,'roi')
result_cnt = opt.startcnt
video_cnt = 1
starttime = datetime.datetime.now()
for videopath in videopaths:
try:
timestamps=[]
fps,endtime,height,width = ffmpeg.get_video_infos(videopath)
for cut_point in range(1,int((endtime-opt.time)/opt.interval)):
util.clean_tempfiles()
ffmpeg.video2image(videopath, './tmp/video2image/%05d.'+opt.tempimage_type,fps=1,
start_time = util.second2stamp(cut_point*opt.interval),last_time = util.second2stamp(opt.time))
imagepaths = util.Traversal('./tmp/video2image')
cnt = 0
for i in range(opt.time):
img = impro.imread(imagepaths[i])
mask = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0]
if not opt.all_mosaic_area:
mask = impro.find_mostlikely_ROI(mask)
x,y,size,area = impro.boundingSquare(mask,Ex_mul=1)
if area > opt.minmaskarea and size>opt.minsize and impro.Q_lapulase(img)>opt.quality:
cnt +=1
if cnt == opt.time:
# print(second)
timestamps.append(util.second2stamp(cut_point*opt.interval))
util.writelog(os.path.join(opt.savedir,'opt.txt'),videopath+'\n'+str(timestamps))
#print(timestamps)
# util.clean_tempfiles()
# fps,endtime,height,width = ffmpeg.get_video_infos(videopath)
# # print(fps,endtime,height,width)
# ffmpeg.continuous_screenshot(videopath, './tmp/video2image', 1)
# # find where to cut
# print('Find where to cut...')
# timestamps=[]
# imagepaths = util.Traversal('./tmp/video2image')
# for second in range(int(endtime)):
# if second%opt.interval==0:
# cnt = 0
# for i in range(opt.time):
# img = impro.imread(imagepaths[second+i])
# mask = runmodel.get_ROI_position(img,net,opt)[0]
# if not opt.all_mosaic_area:
# mask = impro.find_mostlikely_ROI(mask)
# if impro.mask_area(mask) > opt.minmaskarea and impro.Q_lapulase(img)>opt.quality:
# # print(impro.mask_area(mask))
# cnt +=1
# if cnt == opt.time:
# # print(second)
# timestamps.append(util.second2stamp(second))
#generate datasets
print('Generate datasets...')
for timestamp in timestamps:
savecnt = '%05d' % result_cnt
origindir = os.path.join(opt.savedir,savecnt,'origin_image')
maskdir = os.path.join(opt.savedir,savecnt,'mask')
util.makedirs(origindir)
util.makedirs(maskdir)
util.clean_tempfiles()
ffmpeg.video2image(videopath, './tmp/video2image/%05d.'+opt.tempimage_type,
start_time = timestamp,last_time = util.second2stamp(opt.time))
endtime = datetime.datetime.now()
print(str(video_cnt)+'/'+str(len(videopaths))+' ',
util.get_bar(100*video_cnt/len(videopaths),35),'',
util.second2stamp((endtime-starttime).seconds)+'/'+util.second2stamp((endtime-starttime).seconds/video_cnt*len(videopaths)))
imagepaths = util.Traversal('./tmp/video2image')
imagepaths = sorted(imagepaths)
imgs=[];masks=[]
mask_flag = False
for imagepath in imagepaths:
img = impro.imread(imagepath)
mask = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0]
imgs.append(img)
masks.append(mask)
if not mask_flag:
mask_avg = mask.astype(np.float64)
mask_flag = True
else:
mask_avg += mask.astype(np.float64)
mask_avg = np.clip(mask_avg/len(imagepaths),0,255).astype('uint8')
mask_avg = impro.mask_threshold(mask_avg,20,64)
if not opt.all_mosaic_area:
mask_avg = impro.find_mostlikely_ROI(mask_avg)
x,y,size,area = impro.boundingSquare(mask_avg,Ex_mul=random.uniform(1.1,1.5))
for i in range(len(imagepaths)):
img = impro.resize(imgs[i][y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
mask = impro.resize(masks[i][y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
impro.imwrite(os.path.join(origindir,'%05d'%(i+1)+'.jpg'), img)
impro.imwrite(os.path.join(maskdir,'%05d'%(i+1)+'.png'), mask)
result_cnt+=1
except Exception as e:
video_cnt +=1
util.writelog(os.path.join(opt.savedir,'opt.txt'),
videopath+'\n'+str(result_cnt)+'\n'+str(e))
video_cnt +=1
import sys
import os
import random
import datetime
import numpy as np
import cv2
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
from torch import optim
from unet import UNet
from mosaic import random_mosaic
import image_processing as impro
def runmodel(img,net):
img=impro.image2folat(img,3)
img=img.reshape(1,3,128,128)
img = torch.from_numpy(img)
img=img.cuda()
pred = net(img)
pred = (pred.cpu().detach().numpy()*255)
pred = pred.reshape(128,128).astype('uint8')
return pred
dir_img = './origin_image/'
dir_mosaic = './mosaic/'
dir_mask = './mask/'
dir_dataset = './dataset/'
dir_checkpoint = 'checkpoints/'
net = UNet(n_channels = 3, n_classes = 1)
net.load_state_dict(torch.load(dir_checkpoint+'mosaic_position.pth'))
net.cuda()
net.eval()
# cudnn.benchmark = True
files = os.listdir(dir_mosaic)
for i,file in enumerate(files,1):
orgin_image = cv2.imread(dir_img+file)
mosaic_image = cv2.imread(dir_mosaic+file)
img = impro.resize(mosaic_image,128)
img1,img2 = impro.spiltimage(img)
mask1 =runmodel(img1,net)
mask2 =runmodel(img2,net)
mask = impro.mergeimage(mask1,mask2,img)
# test_mask = mask.copy()
mask = impro.mask_threshold(mask,blur=5,threshold=128)
if impro.mask_area(mask) > 1:
h,w = orgin_image.shape[:2]
mosaic_image = cv2.resize(mosaic_image,(w,h))
# test_mask = cv2.resize(test_mask,(w,h))
# test_mask = impro.ch_one2three(test_mask)
x,y,size,area = impro.boundingSquare(mask,Ex_mul=1.5)
rat = min(orgin_image.shape[:2])/128.0
x,y,size = int(rat*x),int(rat*y),int(rat*size)
orgin_crop = orgin_image[y-size:y+size,x-size:x+size]
mosaic_crop = mosaic_image[y-size:y+size,x-size:x+size]
# mosaic_crop = test_mask[y-size:y+size,x-size:x+size]
result = impro.makedataset(mosaic_crop,orgin_crop)
cv2.imwrite(dir_dataset+file,result)
if i%1000==0:
print(i,'image finished.')
import os
import numpy as np
import cv2
import random
import sys
sys.path.append("..")
from models import runmodel,loadmodel
from util import mosaic,util,ffmpeg,filt
from util import image_processing as impro
from cores import options
opt = options.Options().getparse()
util.file_init(opt)
videos = os.listdir('./video')
videos.sort()
opt.model_path = '../pretrained_models/add_youknow_128.pth'
opt.use_gpu = True
Ex = 1.4
Area_Type = 'normal'
suffix = ''
net = loadmodel.unet(opt)
for i,path in enumerate(videos,0):
try:
path = os.path.join('./video',path)
util.clean_tempfiles()
ffmpeg.video2voice(path,'./tmp/voice_tmp.mp3')
ffmpeg.video2image(path,'./tmp/video2image/output_%05d.'+opt.tempimage_type)
imagepaths=os.listdir('./tmp/video2image')
imagepaths.sort()
# get position
positions = []
img_ori_example = impro.imread(os.path.join('./tmp/video2image',imagepaths[0]))
mask_avg = np.zeros((impro.resize(img_ori_example, 128)).shape[:2])
for imagepath in imagepaths:
imagepath = os.path.join('./tmp/video2image',imagepath)
#print('Find ROI location:',imagepath)
img = impro.imread(imagepath)
x,y,size,mask = runmodel.get_mosaic_position(img,net,opt,threshold = 80)
cv2.imwrite(os.path.join('./tmp/ROI_mask',
os.path.basename(imagepath)),mask)
positions.append([x,y,size])
mask_avg = mask_avg + mask
#print('Optimize ROI locations...')
mask_index = filt.position_medfilt(np.array(positions), 13)
mask = np.clip(mask_avg/len(imagepaths),0,255).astype('uint8')
mask = impro.mask_threshold(mask,20,32)
x,y,size,area = impro.boundingSquare(mask,Ex_mul=Ex)
rat = min(img_ori_example.shape[:2])/128.0
x,y,size = int(rat*x),int(rat*y),int(rat*size)
cv2.imwrite(os.path.join('./tmp/ROI_mask_check',
'test_show.png'),mask)
if size !=0 :
mask_path = './dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix+'/mask'
ori_path = './dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix+'/ori'
mosaic_path = './dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix+'/mosaic'
os.makedirs('./dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix)
os.makedirs(mask_path)
os.makedirs(ori_path)
os.makedirs(mosaic_path)
#print('Add mosaic to images...')
mosaic_size = mosaic.get_autosize(img_ori_example,mask,area_type = Area_Type)*random.uniform(1,2)
models = ['squa_avg','rect_avg','squa_mid']
mosaic_type = random.randint(0,len(models)-1)
rect_rat = random.uniform(1.2,1.6)
for i in range(len(imagepaths)):
mask = impro.imread(os.path.join('./tmp/ROI_mask',imagepaths[mask_index[i]]),mod = 'gray')
img_ori = impro.imread(os.path.join('./tmp/video2image',imagepaths[i]))
img_mosaic = mosaic.addmosaic_normal(img_ori,mask,mosaic_size,model = models[mosaic_type],rect_rat=rect_rat)
mask = impro.resize(mask, min(img_ori.shape[:2]))
img_ori_crop = impro.resize(img_ori[y-size:y+size,x-size:x+size],256)
img_mosaic_crop = impro.resize(img_mosaic[y-size:y+size,x-size:x+size],256)
mask_crop = impro.resize(mask[y-size:y+size,x-size:x+size],256)
cv2.imwrite(os.path.join(ori_path,os.path.basename(imagepaths[i])),img_ori_crop)
cv2.imwrite(os.path.join(mosaic_path,os.path.basename(imagepaths[i])),img_mosaic_crop)
cv2.imwrite(os.path.join(mask_path,os.path.basename(imagepaths[i])),mask_crop)
except Exception as e:
print(e)
print(util.get_bar(100*i/len(videos),num=50))
\ No newline at end of file
import numpy as np
import cv2
import os
from torchvision import transforms
from PIL import Image
import random
import sys
sys.path.append("..")
import util.image_processing as impro
from util import util,mosaic
import datetime
import shutil
mask_dir = '/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/mask'
img_dir ='/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/origin_image'
output_dir = './datasets_img'
util.makedirs(output_dir)
HD = True # if false make dataset for pix2pix, if Ture for pix2pix_HD
MASK = True # if True, output mask,too
OUT_SIZE = 256
FOLD_NUM = 2
Bounding = False
if HD:
train_A_path = os.path.join(output_dir,'train_A')
train_B_path = os.path.join(output_dir,'train_B')
util.makedirs(train_A_path)
util.makedirs(train_B_path)
else:
train_path = os.path.join(output_dir,'train')
util.makedirs(train_path)
if MASK:
mask_path = os.path.join(output_dir,'mask')
util.makedirs(mask_path)
mask_names = os.listdir(mask_dir)
img_names = os.listdir(img_dir)
mask_names.sort()
img_names.sort()
print('Find images:',len(img_names))
cnt = 0
for fold in range(FOLD_NUM):
for img_name,mask_name in zip(img_names,mask_names):
try:
img = impro.imread(os.path.join(img_dir,img_name))
mask = impro.imread(os.path.join(mask_dir,mask_name),'gray')
mask = impro.resize_like(mask, img)
x,y,size,area = impro.boundingSquare(mask, 1.5)
if area > 100:
if Bounding:
img = impro.resize(img[y-size:y+size,x-size:x+size],OUT_SIZE)
mask = impro.resize(mask[y-size:y+size,x-size:x+size],OUT_SIZE)
img_mosaic = mosaic.addmosaic_random(img, mask)
if HD:
cv2.imwrite(os.path.join(train_A_path,'%05d' % cnt+'.jpg'), img_mosaic)
cv2.imwrite(os.path.join(train_B_path,'%05d' % cnt+'.jpg'), img)
else:
merge_img = impro.makedataset(img_mosaic, img)
cv2.imwrite(os.path.join(train_path,'%05d' % cnt+'.jpg'), merge_img)
if MASK:
cv2.imwrite(os.path.join(mask_path,'%05d' % cnt+'.png'), mask)
print("Processing:",img_name," ","Remain:",len(img_names)*FOLD_NUM-cnt)
except Exception as e:
print(img_name,e)
cnt += 1
import numpy as np
import cv2
import os
from torchvision import transforms
from PIL import Image
import random
import sys
sys.path.append("..")
import util.image_processing as impro
from util import util,mosaic
import datetime
ir_mask_path = './Irregular_Holes_mask'
img_dir ='/media/hypo/Hypoyun/Datasets/other/face512'
MOD = 'mosaic' #HD | pix2pix | mosaic
MASK = False # if True, output mask,too
BOUNDING = True # if true the mosaic size will be more big
suffix = '_1'
output_dir = os.path.join('./datasets_img',MOD)
util.makedirs(output_dir)
if MOD == 'HD':
train_A_path = os.path.join(output_dir,'train_A')
train_B_path = os.path.join(output_dir,'train_B')
util.makedirs(train_A_path)
util.makedirs(train_B_path)
elif MOD == 'pix2pix':
train_path = os.path.join(output_dir,'train')
util.makedirs(train_path)
elif MOD == 'mosaic':
ori_path = os.path.join(output_dir,'ori')
mosaic_path = os.path.join(output_dir,'mosaic')
mask_path = os.path.join(output_dir,'mask')
util.makedirs(ori_path)
util.makedirs(mosaic_path)
util.makedirs(mask_path)
if MASK:
mask_path = os.path.join(output_dir,'mask')
util.makedirs(mask_path)
transform_mask = transforms.Compose([
transforms.RandomResizedCrop(size=512, scale=(0.5,1)),
transforms.RandomHorizontalFlip(),
])
transform_img = transforms.Compose([
transforms.Resize(512),
transforms.RandomCrop(512)
])
mask_names = os.listdir(ir_mask_path)
img_paths = util.Traversal(img_dir)
img_paths = util.is_imgs(img_paths)
print('Find images:',len(img_paths))
for i,img_path in enumerate(img_paths,1):
try:
img = Image.open(img_path)
img = transform_img(img)
img = np.array(img)
img = img[...,::-1]
if BOUNDING:
mosaic_area = 0
while mosaic_area < 16384:
mask = Image.open(os.path.join(ir_mask_path,random.choices(mask_names)[0]))
mask = transform_mask(mask)
mask = np.array(mask)
mosaic_area = impro.mask_area(mask)
mosaic_img = mosaic.addmosaic_random(img, mask,'bounding')
else:
mask = Image.open(os.path.join(ir_mask_path,random.choices(mask_names)[0]))
mask = transform_mask(mask)
mask = np.array(mask)
mosaic_img = mosaic.addmosaic_random(img, mask)
if MOD == 'HD':#[128:384,128:384,:] --->256
cv2.imwrite(os.path.join(train_A_path,'%05d' % i+suffix+'.jpg'), mosaic_img)
cv2.imwrite(os.path.join(train_B_path,'%05d' % i+suffix+'.jpg'), img)
if MASK:
cv2.imwrite(os.path.join(mask_path,'%05d' % i+suffix+'.png'), mask)
elif MOD == 'pix2pix':
merge_img = impro.makedataset(mosaic_img, img)
cv2.imwrite(os.path.join(train_path,'%05d' % i+suffix+'.jpg'), merge_img)
elif MOD == 'mosaic':
cv2.imwrite(os.path.join(mosaic_path,'%05d' % i+suffix+'.jpg'), mosaic_img)
cv2.imwrite(os.path.join(ori_path,'%05d' % i+suffix+'.jpg'), img)
cv2.imwrite(os.path.join(mask_path,'%05d' % i+suffix+'.png'), mask)
print('\r','Proc/all:'+str(i)+'/'+str(len(img_paths)),util.get_bar(100*i/len(img_paths),num=40),end='')
except Exception as e:
print(img_path,e)
# This code clone from https://github.com/ooooverflow/BiSeNet
import torch.nn as nn
import torch
import torch.nn.functional as F
from . import components
import warnings
warnings.filterwarnings(action='ignore')
def flatten(tensor):
"""Flattens a given tensor such that the channel axis is first.
The shapes are transformed as follows:
(N, C, D, H, W) -> (C, N * D * H * W)
"""
C = tensor.size(1)
# new axis order
axis_order = (1, 0) + tuple(range(2, tensor.dim()))
# Transpose: (N, C, D, H, W) -> (C, N, D, H, W)
transposed = tensor.permute(axis_order)
# Flatten: (C, N, D, H, W) -> (C, N * D * H * W)
return transposed.contiguous().view(C, -1)
class DiceLoss(nn.Module):
def __init__(self):
super().__init__()
self.epsilon = 1e-5
def forward(self, output, target):
assert output.size() == target.size(), "'input' and 'target' must have the same shape"
output = F.softmax(output, dim=1)
output = flatten(output)
target = flatten(target)
# intersect = (output * target).sum(-1).sum() + self.epsilon
# denominator = ((output + target).sum(-1)).sum() + self.epsilon
intersect = (output * target).sum(-1)
denominator = (output + target).sum(-1)
dice = intersect / denominator
dice = torch.mean(dice)
return 1 - dice
# return 1 - 2. * intersect / denominator
class resnet18(torch.nn.Module):
def __init__(self, pretrained=True):
super().__init__()
self.features = components.resnet18(pretrained=pretrained)
self.conv1 = self.features.conv1
self.bn1 = self.features.bn1
self.relu = self.features.relu
self.maxpool1 = self.features.maxpool
self.layer1 = self.features.layer1
self.layer2 = self.features.layer2
self.layer3 = self.features.layer3
self.layer4 = self.features.layer4
def forward(self, input):
x = self.conv1(input)
x = self.relu(self.bn1(x))
x = self.maxpool1(x)
feature1 = self.layer1(x) # 1 / 4
feature2 = self.layer2(feature1) # 1 / 8
feature3 = self.layer3(feature2) # 1 / 16
feature4 = self.layer4(feature3) # 1 / 32
# global average pooling to build tail
tail = torch.mean(feature4, 3, keepdim=True)
tail = torch.mean(tail, 2, keepdim=True)
return feature3, feature4, tail
class resnet101(torch.nn.Module):
def __init__(self, pretrained=True):
super().__init__()
self.features = components.resnet101(pretrained=pretrained)
self.conv1 = self.features.conv1
self.bn1 = self.features.bn1
self.relu = self.features.relu
self.maxpool1 = self.features.maxpool
self.layer1 = self.features.layer1
self.layer2 = self.features.layer2
self.layer3 = self.features.layer3
self.layer4 = self.features.layer4
def forward(self, input):
x = self.conv1(input)
x = self.relu(self.bn1(x))
x = self.maxpool1(x)
feature1 = self.layer1(x) # 1 / 4
feature2 = self.layer2(feature1) # 1 / 8
feature3 = self.layer3(feature2) # 1 / 16
feature4 = self.layer4(feature3) # 1 / 32
# global average pooling to build tail
tail = torch.mean(feature4, 3, keepdim=True)
tail = torch.mean(tail, 2, keepdim=True)
return feature3, feature4, tail
def build_contextpath(name,pretrained):
model = {
'resnet18': resnet18(pretrained=pretrained),
'resnet101': resnet101(pretrained=pretrained)
}
return model[name]
class ConvBlock(torch.nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=3, stride=2,padding=1):
super().__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU()
def forward(self, input):
x = self.conv1(input)
return self.relu(self.bn(x))
class Spatial_path(torch.nn.Module):
def __init__(self):
super().__init__()
self.convblock1 = ConvBlock(in_channels=3, out_channels=64)
self.convblock2 = ConvBlock(in_channels=64, out_channels=128)
self.convblock3 = ConvBlock(in_channels=128, out_channels=256)
def forward(self, input):
x = self.convblock1(input)
x = self.convblock2(x)
x = self.convblock3(x)
return x
class AttentionRefinementModule(torch.nn.Module):
def __init__(self, in_channels, out_channels):
super().__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
self.bn = nn.BatchNorm2d(out_channels)
self.sigmoid = nn.Sigmoid()
self.in_channels = in_channels
self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
def forward(self, input):
# global average pooling
x = self.avgpool(input)
assert self.in_channels == x.size(1), 'in_channels and out_channels should all be {}'.format(x.size(1))
x = self.conv(x)
# x = self.sigmoid(self.bn(x))
x = self.sigmoid(x)
# channels of input and x should be same
x = torch.mul(input, x)
return x
class FeatureFusionModule(torch.nn.Module):
def __init__(self, num_classes, in_channels):
super().__init__()
# self.in_channels = input_1.channels + input_2.channels
# resnet101 3328 = 256(from context path) + 1024(from spatial path) + 2048(from spatial path)
# resnet18 1024 = 256(from context path) + 256(from spatial path) + 512(from spatial path)
self.in_channels = in_channels
self.convblock = ConvBlock(in_channels=self.in_channels, out_channels=num_classes, stride=1)
self.conv1 = nn.Conv2d(num_classes, num_classes, kernel_size=1)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(num_classes, num_classes, kernel_size=1)
self.sigmoid = nn.Sigmoid()
self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
def forward(self, input_1, input_2):
x = torch.cat((input_1, input_2), dim=1)
assert self.in_channels == x.size(1), 'in_channels of ConvBlock should be {}'.format(x.size(1))
feature = self.convblock(x)
x = self.avgpool(feature)
x = self.relu(self.conv1(x))
x = self.sigmoid(self.conv2(x))
x = torch.mul(feature, x)
x = torch.add(x, feature)
return x
class BiSeNet(torch.nn.Module):
def __init__(self, num_classes, context_path, train_flag=True):
super().__init__()
# build spatial path
self.saptial_path = Spatial_path()
self.sigmoid = nn.Sigmoid()
# build context path
if train_flag:
self.context_path = build_contextpath(name=context_path,pretrained=True)
else:
self.context_path = build_contextpath(name=context_path,pretrained=False)
# build attention refinement module for resnet 101
if context_path == 'resnet101':
self.attention_refinement_module1 = AttentionRefinementModule(1024, 1024)
self.attention_refinement_module2 = AttentionRefinementModule(2048, 2048)
# supervision block
self.supervision1 = nn.Conv2d(in_channels=1024, out_channels=num_classes, kernel_size=1)
self.supervision2 = nn.Conv2d(in_channels=2048, out_channels=num_classes, kernel_size=1)
# build feature fusion module
self.feature_fusion_module = FeatureFusionModule(num_classes, 3328)
elif context_path == 'resnet18':
# build attention refinement module for resnet 18
self.attention_refinement_module1 = AttentionRefinementModule(256, 256)
self.attention_refinement_module2 = AttentionRefinementModule(512, 512)
# supervision block
self.supervision1 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=1)
self.supervision2 = nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1)
# build feature fusion module
self.feature_fusion_module = FeatureFusionModule(num_classes, 1024)
else:
print('Error: unspport context_path network \n')
# build final convolution
self.conv = nn.Conv2d(in_channels=num_classes, out_channels=num_classes, kernel_size=1)
self.init_weight()
self.mul_lr = []
self.mul_lr.append(self.saptial_path)
self.mul_lr.append(self.attention_refinement_module1)
self.mul_lr.append(self.attention_refinement_module2)
self.mul_lr.append(self.supervision1)
self.mul_lr.append(self.supervision2)
self.mul_lr.append(self.feature_fusion_module)
self.mul_lr.append(self.conv)
def init_weight(self):
for name, m in self.named_modules():
if 'context_path' not in name:
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
m.eps = 1e-5
m.momentum = 0.1
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, input):
# output of spatial path
sx = self.saptial_path(input)
# output of context path
cx1, cx2, tail = self.context_path(input)
cx1 = self.attention_refinement_module1(cx1)
cx2 = self.attention_refinement_module2(cx2)
cx2 = torch.mul(cx2, tail)
# upsampling
cx1 = torch.nn.functional.interpolate(cx1, size=sx.size()[-2:], mode='bilinear')
cx2 = torch.nn.functional.interpolate(cx2, size=sx.size()[-2:], mode='bilinear')
cx = torch.cat((cx1, cx2), dim=1)
if self.training == True:
cx1_sup = self.supervision1(cx1)
cx2_sup = self.supervision2(cx2)
cx1_sup = torch.nn.functional.interpolate(cx1_sup, size=input.size()[-2:], mode='bilinear')
cx2_sup = torch.nn.functional.interpolate(cx2_sup, size=input.size()[-2:], mode='bilinear')
# output of feature fusion module
result = self.feature_fusion_module(sx, cx)
# upsampling
result = torch.nn.functional.interpolate(result, scale_factor=8, mode='bilinear')
result = self.conv(result)
if self.training == True:
return self.sigmoid(result), self.sigmoid(cx1_sup), self.sigmoid(cx2_sup)
return self.sigmoid(result)
\ No newline at end of file
from .pix2pix_model import *
from .unet_model import UNet
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None):
super(BasicBlock, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, planes)
self.bn1 = norm_layer(planes)
self.conv2 = conv3x3(planes, planes, stride)
self.bn2 = norm_layer(planes)
self.conv3 = conv1x1(planes, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, norm_layer=None):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self.inplanes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer)
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, norm_layer=None):
if norm_layer is None:
norm_layer = nn.BatchNorm2d
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, norm_layer=norm_layer))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
return model
def resnet34(pretrained=False, **kwargs):
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
return model
def resnet50(pretrained=False, **kwargs):
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
return model
def resnet101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
return model
def resnet152(pretrained=False, **kwargs):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
return model
\ No newline at end of file
...@@ -4,6 +4,7 @@ from .pix2pixHD_model import define_G as define_G_HD ...@@ -4,6 +4,7 @@ from .pix2pixHD_model import define_G as define_G_HD
from .unet_model import UNet from .unet_model import UNet
from .video_model import MosaicNet from .video_model import MosaicNet
from .videoHD_model import MosaicNet as MosaicNet_HD from .videoHD_model import MosaicNet as MosaicNet_HD
from .BiSeNet_model import BiSeNet
def show_paramsnumber(net,netname='net'): def show_paramsnumber(net,netname='net'):
parameters = sum(param.numel() for param in net.parameters()) parameters = sum(param.numel() for param in net.parameters())
...@@ -75,21 +76,35 @@ def video(opt): ...@@ -75,21 +76,35 @@ def video(opt):
netG.cuda() netG.cuda()
return netG return netG
def bisenet(opt,type='roi'):
def unet_clean(opt): '''
net = UNet(n_channels = 3, n_classes = 1) type: roi or mosaic
'''
net = BiSeNet(num_classes=1, context_path='resnet18',train_flag=False)
show_paramsnumber(net,'segment') show_paramsnumber(net,'segment')
net.load_state_dict(torch.load(opt.mosaic_position_model_path)) if type == 'roi':
net.load_state_dict(torch.load(opt.model_path))
elif type == 'mosaic':
net.load_state_dict(torch.load(opt.mosaic_position_model_path))
net.eval() net.eval()
if opt.use_gpu: if opt.use_gpu:
net.cuda() net.cuda()
return net return net
def unet(opt): # def unet_clean(opt):
net = UNet(n_channels = 3, n_classes = 1) # net = UNet(n_channels = 3, n_classes = 1)
show_paramsnumber(net,'segment') # show_paramsnumber(net,'segment')
net.load_state_dict(torch.load(opt.model_path)) # net.load_state_dict(torch.load(opt.mosaic_position_model_path))
net.eval() # net.eval()
if opt.use_gpu: # if opt.use_gpu:
net.cuda() # net.cuda()
return net # return net
# def unet(opt):
# net = UNet(n_channels = 3, n_classes = 1)
# show_paramsnumber(net,'segment')
# net.load_state_dict(torch.load(opt.model_path))
# net.eval()
# if opt.use_gpu:
# net.cuda()
# return net
...@@ -7,7 +7,7 @@ from util import data ...@@ -7,7 +7,7 @@ from util import data
import torch import torch
import numpy as np import numpy as np
def run_unet(img,net,size = 224,use_gpu = True): def run_segment(img,net,size = 360,use_gpu = True):
img = impro.resize(img,size) img = impro.resize(img,size)
img = data.im2tensor(img,use_gpu = use_gpu, bgr2rgb = False,use_transform = False , is0_1 = True) img = data.im2tensor(img,use_gpu = use_gpu, bgr2rgb = False,use_transform = False , is0_1 = True)
mask = net(img) mask = net(img)
...@@ -60,18 +60,26 @@ def run_styletransfer(opt, net, img): ...@@ -60,18 +60,26 @@ def run_styletransfer(opt, net, img):
img = data.tensor2im(img) img = data.tensor2im(img)
return img return img
def get_ROI_position(img,net,opt): def get_ROI_position(img,net,opt,keepsize=True):
mask = run_unet(img,net,size=224,use_gpu = opt.use_gpu) mask = run_segment(img,net,size=360,use_gpu = opt.use_gpu)
mask = impro.mask_threshold(mask,opt.mask_extend,opt.mask_threshold) mask = impro.mask_threshold(mask,opt.mask_extend,opt.mask_threshold)
if keepsize:
mask = impro.resize_like(mask, img)
x,y,halfsize,area = impro.boundingSquare(mask, 1) x,y,halfsize,area = impro.boundingSquare(mask, 1)
return mask,x,y,area return mask,x,y,halfsize,area
def get_mosaic_position(img_origin,net_mosaic_pos,opt,threshold = 128 ): def get_mosaic_position(img_origin,net_mosaic_pos,opt):
mask = run_unet(img_origin,net_mosaic_pos,size=224,use_gpu = opt.use_gpu) h,w = img_origin.shape[:2]
mask = impro.mask_threshold(mask,30,threshold) mask = run_segment(img_origin,net_mosaic_pos,size=360,use_gpu = opt.use_gpu)
# mask_1 = mask.copy()
mask = impro.mask_threshold(mask,ex_mun=int(min(h,w)/20),threshold=opt.mask_threshold)
if not opt.all_mosaic_area: if not opt.all_mosaic_area:
mask = impro.find_mostlikely_ROI(mask) mask = impro.find_mostlikely_ROI(mask)
x,y,size,area = impro.boundingSquare(mask,Ex_mul=opt.ex_mult) x,y,size,area = impro.boundingSquare(mask,Ex_mul=opt.ex_mult)
rat = min(img_origin.shape[:2])/224.0 #Location fix
rat = min(h,w)/360.0
x,y,size = int(rat*x),int(rat*y),int(rat*size) x,y,size = int(rat*x),int(rat*y),int(rat*size)
x,y = np.clip(x, 0, w),np.clip(y, 0, h)
size = np.clip(size, 0, min(w-x,h-y))
# print(x,y,size)
return x,y,size,mask return x,y,size,mask
\ No newline at end of file
# This code clone from https://github.com/milesial/Pytorch-UNet # This code clone from https://github.com/milesial/Pytorch-UNet
# LICENSE file : https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE # LICENSE file : https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE
# full assembly of the sub-parts to form the complete net import torch
import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from .unet_parts import *
class double_conv(nn.Module):
'''(conv => BN => ReLU) * 2'''
def __init__(self, in_ch, out_ch):
super(double_conv, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
nn.Conv2d(out_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True)
)
def forward(self, x):
x = self.conv(x)
return x
class inconv(nn.Module):
def __init__(self, in_ch, out_ch):
super(inconv, self).__init__()
self.conv = double_conv(in_ch, out_ch)
def forward(self, x):
x = self.conv(x)
return x
class down(nn.Module):
def __init__(self, in_ch, out_ch):
super(down, self).__init__()
self.mpconv = nn.Sequential(
nn.MaxPool2d(2),
double_conv(in_ch, out_ch)
)
def forward(self, x):
x = self.mpconv(x)
return x
class Upsample(nn.Module):
def __init__(self, scale_factor):
super(Upsample, self).__init__()
self.scale_factor = scale_factor
def forward(self, x):
return F.interpolate(x, scale_factor=self.scale_factor,mode='bilinear', align_corners=True)
class up(nn.Module):
def __init__(self, in_ch, out_ch, bilinear=True):
super(up, self).__init__()
# would be a nice idea if the upsampling could be learned too,
# but my machine do not have enough memory to handle all those weights
if bilinear:
self.up = Upsample(scale_factor=2)
else:
self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)
self.conv = double_conv(in_ch, out_ch)
def forward(self, x1, x2):
x1 = self.up(x1)
# input is CHW
diffY = x2.size()[2] - x1.size()[2]
diffX = x2.size()[3] - x1.size()[3]
x1 = F.pad(x1, (diffX // 2, diffX - diffX//2,
diffY // 2, diffY - diffY//2))
# for padding issues, see
# https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
# https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
x = torch.cat([x2, x1], dim=1)
x = self.conv(x)
return x
class outconv(nn.Module):
def __init__(self, in_ch, out_ch):
super(outconv, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 1),
nn.Sigmoid()
)
def forward(self, x):
x = self.conv(x)
return x
class UNet(nn.Module): class UNet(nn.Module):
def __init__(self, n_channels, n_classes): def __init__(self, n_channels, n_classes):
......
# This code clone from https://github.com/milesial/Pytorch-UNet
# LICENSE file : https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE
# sub-parts of the U-Net model
import torch
import torch.nn as nn
import torch.nn.functional as F
class double_conv(nn.Module):
'''(conv => BN => ReLU) * 2'''
def __init__(self, in_ch, out_ch):
super(double_conv, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
nn.Conv2d(out_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True)
)
def forward(self, x):
x = self.conv(x)
return x
class inconv(nn.Module):
def __init__(self, in_ch, out_ch):
super(inconv, self).__init__()
self.conv = double_conv(in_ch, out_ch)
def forward(self, x):
x = self.conv(x)
return x
class down(nn.Module):
def __init__(self, in_ch, out_ch):
super(down, self).__init__()
self.mpconv = nn.Sequential(
nn.MaxPool2d(2),
double_conv(in_ch, out_ch)
)
def forward(self, x):
x = self.mpconv(x)
return x
class Upsample(nn.Module):
def __init__(self, scale_factor):
super(Upsample, self).__init__()
self.scale_factor = scale_factor
def forward(self, x):
return F.interpolate(x, scale_factor=self.scale_factor,mode='bilinear', align_corners=True)
class up(nn.Module):
def __init__(self, in_ch, out_ch, bilinear=True):
super(up, self).__init__()
# would be a nice idea if the upsampling could be learned too,
# but my machine do not have enough memory to handle all those weights
if bilinear:
self.up = Upsample(scale_factor=2)
else:
self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)
self.conv = double_conv(in_ch, out_ch)
def forward(self, x1, x2):
x1 = self.up(x1)
# input is CHW
diffY = x2.size()[2] - x1.size()[2]
diffX = x2.size()[3] - x1.size()[3]
x1 = F.pad(x1, (diffX // 2, diffX - diffX//2,
diffY // 2, diffY - diffY//2))
# for padding issues, see
# https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
# https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
x = torch.cat([x2, x1], dim=1)
x = self.conv(x)
return x
class outconv(nn.Module):
def __init__(self, in_ch, out_ch):
super(outconv, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 1),
nn.Sigmoid()
)
def forward(self, x):
x = self.conv(x)
return x
...@@ -15,7 +15,7 @@ class encoder_2d(nn.Module): ...@@ -15,7 +15,7 @@ class encoder_2d(nn.Module):
### downsample ### downsample
for i in range(n_downsampling): for i in range(n_downsampling):
mult = 2**i mult = 2**i
model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1), model += [nn.ReflectionPad2d(1),nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=0),
norm_layer(ngf * mult * 2), activation] norm_layer(ngf * mult * 2), activation]
self.model = nn.Sequential(*model) self.model = nn.Sequential(*model)
...@@ -39,16 +39,6 @@ class decoder_2d(nn.Module): ...@@ -39,16 +39,6 @@ class decoder_2d(nn.Module):
### upsample ### upsample
for i in range(n_downsampling): for i in range(n_downsampling):
mult = 2**(n_downsampling - i) mult = 2**(n_downsampling - i)
# if i%2 ==0:
# model += [ nn.Upsample(scale_factor = 2, mode='nearest'),
# nn.ReflectionPad2d(1),
# nn.Conv2d(ngf * mult, int(ngf * mult / 2),kernel_size=3, stride=1, padding=0),
# norm_layer(int(ngf * mult / 2)),
# nn.ReLU(True)]
# else:
# model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1),
# norm_layer(int(ngf * mult / 2)), activation]
# model += [ nn.Upsample(scale_factor = 2, mode='nearest'), # model += [ nn.Upsample(scale_factor = 2, mode='nearest'),
# nn.ReflectionPad2d(1), # nn.ReflectionPad2d(1),
......
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from .unet_parts import *
from .pix2pix_model import * from .pix2pix_model import *
......
...@@ -2,8 +2,10 @@ import sys ...@@ -2,8 +2,10 @@ import sys
import os import os
import random import random
import datetime import datetime
import time
import numpy as np import numpy as np
from matplotlib import pyplot as plt
import cv2 import cv2
import torch import torch
...@@ -11,137 +13,144 @@ import torch.backends.cudnn as cudnn ...@@ -11,137 +13,144 @@ import torch.backends.cudnn as cudnn
import torch.nn as nn import torch.nn as nn
from torch import optim from torch import optim
import sys
sys.path.append("..") sys.path.append("..")
sys.path.append("../..") sys.path.append("../..")
from cores import Options
from util import mosaic,util,ffmpeg,filt,data from util import mosaic,util,ffmpeg,filt,data
from util import image_processing as impro from util import image_processing as impro
from models import unet_model from models import unet_model,BiSeNet_model
from matplotlib import pyplot as plt
import torch.backends.cudnn as cudnn
'''
LR = 0.0002 --------------------------Get options--------------------------
EPOCHS = 100 '''
BATCHSIZE = 16 opt = Options()
LOADSIZE = 256 opt.parser.add_argument('--gpu_id',type=int,default=0, help='')
FINESIZE = 224 opt.parser.add_argument('--lr',type=float,default=0.001, help='')
CONTINUE = True opt.parser.add_argument('--finesize',type=int,default=360, help='')
use_gpu = True opt.parser.add_argument('--loadsize',type=int,default=400, help='')
SAVE_FRE = 1 opt.parser.add_argument('--batchsize',type=int,default=8, help='')
MAX_LOAD = 30000 opt.parser.add_argument('--model',type=str,default='BiSeNet', help='BiSeNet or UNet')
opt.parser.add_argument('--maxepoch',type=int,default=100, help='')
opt.parser.add_argument('--savefreq',type=int,default=5, help='')
dir_img = './datasets/face/origin_image/' opt.parser.add_argument('--maxload',type=int,default=1000000, help='')
dir_mask = './datasets/face/mask/' opt.parser.add_argument('--continuetrain', action='store_true', help='')
dir_checkpoint = 'checkpoints/face/' opt.parser.add_argument('--startepoch',type=int,default=0, help='')
opt.parser.add_argument('--dataset',type=str,default='./datasets/face/', help='')
opt.parser.add_argument('--savename',type=str,default='face', help='')
'''
--------------------------Init--------------------------
'''
opt = opt.getparse()
dir_img = os.path.join(opt.dataset,'origin_image')
dir_mask = os.path.join(opt.dataset,'mask')
dir_checkpoint = os.path.join('checkpoints/',opt.savename)
util.makedirs(dir_checkpoint)
util.writelog(os.path.join(dir_checkpoint,'loss.txt'),
str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt))
torch.cuda.set_device(opt.gpu_id)
def Totensor(img,use_gpu=True): def Totensor(img,use_gpu=True):
size=img.shape[0] size=img.shape[0]
img = torch.from_numpy(img).float() img = torch.from_numpy(img).float()
if use_gpu: if opt.use_gpu:
img = img.cuda() img = img.cuda()
return img return img
def loadimage(imagepaths,maskpaths,opt,test_flag = False):
def Toinputshape(imgs,masks,finesize,test_flag = False): batchsize = len(imagepaths)
batchsize = len(imgs) images = np.zeros((batchsize,3,opt.finesize,opt.finesize), dtype=np.float32)
result_imgs=[];result_masks=[] masks = np.zeros((batchsize,1,opt.finesize,opt.finesize), dtype=np.float32)
for i in range(batchsize): for i in range(len(imagepaths)):
# print(imgs[i].shape,masks[i].shape) img = impro.resize(impro.imread(imagepaths[i]),opt.loadsize)
img,mask = data.random_transform_image(imgs[i], masks[i], finesize, test_flag) mask = impro.resize(impro.imread(maskpaths[i],mod = 'gray'),opt.loadsize)
# print(img.shape,mask.shape) img,mask = data.random_transform_image(img, mask, opt.finesize, test_flag)
mask = (mask.reshape(1,finesize,finesize)/255.0) images[i] = (img.transpose((2, 0, 1))/255.0)
img = (img.transpose((2, 0, 1))/255.0) masks[i] = (mask.reshape(1,1,opt.finesize,opt.finesize)/255.0)
result_imgs.append(img) images = Totensor(images,opt.use_gpu)
result_masks.append(mask) masks = Totensor(masks,opt.use_gpu)
result_imgs = np.array(result_imgs)
result_masks = np.array(result_masks) return images,masks
return result_imgs,result_masks
def batch_generator(images,masks,batchsize): '''
dataset_images = [] --------------------------checking dataset--------------------------
dataset_masks = [] '''
print('checking dataset...')
for i in range(int(len(images)/batchsize)): imagepaths = sorted(util.Traversal(dir_img))[:opt.maxload]
dataset_images.append(images[i*batchsize:(i+1)*batchsize]) maskpaths = sorted(util.Traversal(dir_mask))[:opt.maxload]
dataset_masks.append(masks[i*batchsize:(i+1)*batchsize]) data.shuffledata(imagepaths, maskpaths)
if len(images)%batchsize != 0: if len(imagepaths) != len(maskpaths) :
dataset_images.append(images[len(images)-len(images)%batchsize:]) print('dataset error!')
dataset_masks.append(masks[len(images)-len(images)%batchsize:]) exit(0)
img_num = len(imagepaths)
return dataset_images,dataset_masks print('find images:',img_num)
imagepaths_train = (imagepaths[0:int(img_num*0.8)]).copy()
def loadimage(dir_img,dir_mask,loadsize,eval_p): maskpaths_train = (maskpaths[0:int(img_num*0.8)]).copy()
t1 = datetime.datetime.now() imagepaths_eval = (imagepaths[int(img_num*0.8):]).copy()
imgnames = os.listdir(dir_img) maskpaths_eval = (maskpaths[int(img_num*0.8):]).copy()
# imgnames = imgnames[:100]
random.shuffle(imgnames) '''
imgnames = imgnames[:MAX_LOAD] --------------------------def network--------------------------
print('load images:',len(imgnames)) '''
imgnames = (f[:-4] for f in imgnames) if opt.model =='UNet':
images = [] net = unet_model.UNet(n_channels = 3, n_classes = 1)
masks = [] elif opt.model =='BiSeNet':
for imgname in imgnames: net = BiSeNet_model.BiSeNet(num_classes=1, context_path='resnet18')
img = impro.imread(dir_img+imgname+'.jpg')
mask = impro.imread(dir_mask+imgname+'.png',mod = 'gray') if opt.continuetrain:
img = impro.resize(img,loadsize)
mask = impro.resize(mask,loadsize)
images.append(img)
masks.append(mask)
train_images,train_masks = images[0:int(len(masks)*(1-eval_p))],masks[0:int(len(masks)*(1-eval_p))]
eval_images,eval_masks = images[int(len(masks)*(1-eval_p)):len(masks)],masks[int(len(masks)*(1-eval_p)):len(masks)]
t2 = datetime.datetime.now()
print('load data cost time:',(t2 - t1).seconds,'s')
return train_images,train_masks,eval_images,eval_masks
util.makedirs(dir_checkpoint)
print('loading data......')
train_images,train_masks,eval_images,eval_masks = loadimage(dir_img,dir_mask,LOADSIZE,0.2)
dataset_eval_images,dataset_eval_masks = batch_generator(eval_images,eval_masks,BATCHSIZE)
dataset_train_images,dataset_train_masks = batch_generator(train_images,train_masks,BATCHSIZE)
net = unet_model.UNet(n_channels = 3, n_classes = 1)
if CONTINUE:
if not os.path.isfile(os.path.join(dir_checkpoint,'last.pth')): if not os.path.isfile(os.path.join(dir_checkpoint,'last.pth')):
CONTINUE = False opt.continuetrain = False
print('can not load last.pth, training on init weight.') print('can not load last.pth, training on init weight.')
if CONTINUE: if opt.continuetrain:
net.load_state_dict(torch.load(dir_checkpoint+'last.pth')) net.load_state_dict(torch.load(os.path.join(dir_checkpoint,'last.pth')))
if use_gpu: f = open(os.path.join(dir_checkpoint,'epoch_log.txt'),'r')
opt.startepoch = int(f.read())
f.close()
if opt.use_gpu:
net.cuda() net.cuda()
cudnn.benchmark = True cudnn.benchmark = True
optimizer = torch.optim.Adam(net.parameters(), lr=opt.lr)
optimizer = torch.optim.Adam(net.parameters(), lr=LR, betas=(0.9, 0.999)) if opt.model =='UNet':
criterion = nn.BCELoss()
criterion = nn.BCELoss() elif opt.model =='BiSeNet':
# criterion = nn.L1Loss() criterion = nn.BCELoss()
# criterion = BiSeNet_model.DiceLoss()
'''
--------------------------train--------------------------
'''
loss_plot = {'train':[],'eval':[]}
print('begin training......') print('begin training......')
for epoch in range(EPOCHS): for epoch in range(opt.startepoch,opt.maxepoch):
random_save = random.randint(0, len(dataset_train_images)) random_save = random.randint(0, int(img_num*0.8/opt.batchsize))
data.shuffledata(imagepaths_train, maskpaths_train)
starttime = datetime.datetime.now() starttime = datetime.datetime.now()
print('Epoch {}/{}.'.format(epoch + 1, EPOCHS)) util.writelog(os.path.join(dir_checkpoint,'loss.txt'),'Epoch {}/{}.'.format(epoch + 1, opt.maxepoch),True)
net.train() net.train()
if use_gpu: if opt.use_gpu:
net.cuda() net.cuda()
epoch_loss = 0 epoch_loss = 0
for i,(img,mask) in enumerate(zip(dataset_train_images,dataset_train_masks)): for i in range(int(img_num*0.8/opt.batchsize)):
# print(epoch,i,img.shape,mask.shape) img,mask = loadimage(imagepaths_train[i*opt.batchsize:(i+1)*opt.batchsize], maskpaths_train[i*opt.batchsize:(i+1)*opt.batchsize], opt)
img,mask = Toinputshape(img, mask, FINESIZE)
img = Totensor(img,use_gpu)
mask = Totensor(mask,use_gpu)
mask_pred = net(img) if opt.model =='UNet':
loss = criterion(mask_pred, mask) mask_pred = net(img)
epoch_loss += loss.item() loss = criterion(mask_pred, mask)
epoch_loss += loss.item()
elif opt.model =='BiSeNet':
mask_pred, mask_pred_sup1, mask_pred_sup2 = net(img)
loss1 = criterion(mask_pred, mask)
loss2 = criterion(mask_pred_sup1, mask)
loss3 = criterion(mask_pred_sup2, mask)
loss = loss1 + loss2 + loss3
epoch_loss += loss1.item()
optimizer.zero_grad() optimizer.zero_grad()
loss.backward() loss.backward()
...@@ -151,30 +160,47 @@ for epoch in range(EPOCHS): ...@@ -151,30 +160,47 @@ for epoch in range(EPOCHS):
data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'result.png'),True) data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'result.png'),True)
if i == random_save: if i == random_save:
data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'epoch_'+str(epoch+1)+'.png'),True) data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'epoch_'+str(epoch+1)+'.png'),True)
epoch_loss = epoch_loss/int(img_num*0.8/opt.batchsize)
loss_plot['train'].append(epoch_loss)
# torch.cuda.empty_cache() #val
# # net.eval()
epoch_loss_eval = 0 epoch_loss_eval = 0
with torch.no_grad(): with torch.no_grad():
#net.eval() # net.eval()
for i,(img,mask) in enumerate(zip(dataset_eval_images,dataset_eval_masks)): for i in range(int(img_num*0.2/opt.batchsize)):
# print(epoch,i,img.shape,mask.shape) img,mask = loadimage(imagepaths_eval[i*opt.batchsize:(i+1)*opt.batchsize], maskpaths_eval[i*opt.batchsize:(i+1)*opt.batchsize], opt,test_flag=True)
img,mask = Toinputshape(img, mask, FINESIZE,test_flag=True) if opt.model =='UNet':
img = Totensor(img,use_gpu) mask_pred = net(img)
mask = Totensor(mask,use_gpu) elif opt.model =='BiSeNet':
mask_pred = net(img) mask_pred, _, _ = net(img)
loss = criterion(mask_pred, mask) # mask_pred = net(img)
loss= criterion(mask_pred, mask)
epoch_loss_eval += loss.item() epoch_loss_eval += loss.item()
epoch_loss_eval = epoch_loss_eval/int(img_num*0.2/opt.batchsize)
loss_plot['eval'].append(epoch_loss_eval)
# torch.cuda.empty_cache() # torch.cuda.empty_cache()
#savelog
endtime = datetime.datetime.now() endtime = datetime.datetime.now()
print('--- Epoch train_loss: {0:.6f} eval_loss: {1:.6f} Cost time: {2:} s'.format( util.writelog(os.path.join(dir_checkpoint,'loss.txt'),
epoch_loss/len(dataset_train_images), '--- Epoch train_loss: {0:.6f} eval_loss: {1:.6f} Cost time: {2:} s'.format(
epoch_loss_eval/len(dataset_eval_images), epoch_loss,
(endtime - starttime).seconds)), epoch_loss_eval,
torch.save(net.cpu().state_dict(),dir_checkpoint+'last.pth') (endtime - starttime).seconds),
True)
if (epoch+1)%SAVE_FRE == 0: #plot
torch.save(net.cpu().state_dict(),dir_checkpoint+'epoch'+str(epoch+1)+'.pth') plt.plot(np.linspace(opt.startepoch+1,epoch+1,epoch+1-opt.startepoch),loss_plot['train'],label='train')
plt.plot(np.linspace(opt.startepoch+1,epoch+1,epoch+1-opt.startepoch),loss_plot['eval'],label='eval')
plt.xlabel('Epoch')
plt.ylabel('BCELoss')
plt.legend(loc=1)
plt.savefig(os.path.join(dir_checkpoint,'loss.jpg'))
plt.close()
#save network
torch.save(net.cpu().state_dict(),os.path.join(dir_checkpoint,'last.pth'))
f = open(os.path.join(dir_checkpoint,'epoch_log.txt'),'w+')
f.write(str(epoch+1))
f.close()
if (epoch+1)%opt.savefreq == 0:
torch.save(net.cpu().state_dict(),os.path.join(dir_checkpoint,'epoch'+str(epoch+1)+'.pth'))
print('network saved.') print('network saved.')
...@@ -21,6 +21,7 @@ import torch.backends.cudnn as cudnn ...@@ -21,6 +21,7 @@ import torch.backends.cudnn as cudnn
''' '''
opt = Options() opt = Options()
opt.parser.add_argument('--gpu_id',type=int,default=0, help='')
opt.parser.add_argument('--N',type=int,default=25, help='') opt.parser.add_argument('--N',type=int,default=25, help='')
opt.parser.add_argument('--lr',type=float,default=0.0002, help='') opt.parser.add_argument('--lr',type=float,default=0.0002, help='')
opt.parser.add_argument('--beta1',type=float,default=0.5, help='') opt.parser.add_argument('--beta1',type=float,default=0.5, help='')
...@@ -32,14 +33,15 @@ opt.parser.add_argument('--lambda_gan',type=float,default=1, help='') ...@@ -32,14 +33,15 @@ opt.parser.add_argument('--lambda_gan',type=float,default=1, help='')
opt.parser.add_argument('--finesize',type=int,default=256, help='') opt.parser.add_argument('--finesize',type=int,default=256, help='')
opt.parser.add_argument('--loadsize',type=int,default=286, help='') opt.parser.add_argument('--loadsize',type=int,default=286, help='')
opt.parser.add_argument('--batchsize',type=int,default=1, help='') opt.parser.add_argument('--batchsize',type=int,default=1, help='')
opt.parser.add_argument('--perload_num',type=int,default=16, help='') opt.parser.add_argument('--perload_num',type=int,default=16, help='number of images pool')
opt.parser.add_argument('--norm',type=str,default='instance', help='') opt.parser.add_argument('--norm',type=str,default='instance', help='')
opt.parser.add_argument('--dataset',type=str,default='./datasets/face/', help='')
opt.parser.add_argument('--maxiter',type=int,default=10000000, help='') opt.parser.add_argument('--maxiter',type=int,default=10000000, help='')
opt.parser.add_argument('--savefreq',type=int,default=10000, help='') opt.parser.add_argument('--savefreq',type=int,default=10000, help='')
opt.parser.add_argument('--startiter',type=int,default=0, help='') opt.parser.add_argument('--startiter',type=int,default=0, help='')
opt.parser.add_argument('--continuetrain', action='store_true', help='') opt.parser.add_argument('--continuetrain', action='store_true', help='')
opt.parser.add_argument('--savename',type=str,default='MosaicNet', help='') opt.parser.add_argument('--savename',type=str,default='face', help='')
''' '''
...@@ -50,19 +52,27 @@ dir_checkpoint = os.path.join('checkpoints/',opt.savename) ...@@ -50,19 +52,27 @@ dir_checkpoint = os.path.join('checkpoints/',opt.savename)
util.makedirs(dir_checkpoint) util.makedirs(dir_checkpoint)
util.writelog(os.path.join(dir_checkpoint,'loss.txt'), util.writelog(os.path.join(dir_checkpoint,'loss.txt'),
str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt)) str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt))
torch.cuda.set_device(opt.gpu_id)
N = opt.N N = opt.N
loss_sum = [0.,0.,0.,0.] loss_sum = [0.,0.,0.,0.]
loss_plot = [[],[]] loss_plot = [[],[]]
item_plot = [] item_plot = []
videos = os.listdir('./dataset') # list video dir
videos.sort() videonames = os.listdir(opt.dataset)
lengths = [] videonames.sort()
print('check dataset...') lengths = [];tmp = []
for video in videos: print('Check dataset...')
video_images = os.listdir('./dataset/'+video+'/ori') for video in videonames:
lengths.append(len(video_images)) if video != 'opt.txt':
video_images = os.listdir(os.path.join(opt.dataset,video,'origin_image'))
lengths.append(len(video_images))
tmp.append(video)
videonames = tmp
video_num = len(videonames)
#def network
print('Init network...')
if opt.hd: if opt.hd:
netG = videoHD_model.MosaicNet(3*N+1, 3, norm=opt.norm) netG = videoHD_model.MosaicNet(3*N+1, 3, norm=opt.norm)
else: else:
...@@ -71,7 +81,8 @@ loadmodel.show_paramsnumber(netG,'netG') ...@@ -71,7 +81,8 @@ loadmodel.show_paramsnumber(netG,'netG')
if opt.gan: if opt.gan:
if opt.hd: if opt.hd:
netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=2) #netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=1)
netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=2,getIntermFeat=True)
else: else:
netD = pix2pix_model.define_D(3*2, 64, 'basic', norm = opt.norm) netD = pix2pix_model.define_D(3*2, 64, 'basic', norm = opt.norm)
netD.train() netD.train()
...@@ -106,36 +117,38 @@ if opt.use_gpu: ...@@ -106,36 +117,38 @@ if opt.use_gpu:
cudnn.benchmark = True cudnn.benchmark = True
''' '''
--------------------------preload data-------------------------- --------------------------preload data & data pool--------------------------
''' '''
def loaddata(): def loaddata(video_index):
video_index = random.randint(0,len(videos)-1)
video = videos[video_index] videoname = videonames[video_index]
img_index = random.randint(int(N/2)+1,lengths[video_index]- int(N/2)-1) img_index = random.randint(int(N/2)+1,lengths[video_index]- int(N/2)-1)
input_img = np.zeros((opt.loadsize,opt.loadsize,3*N+1), dtype='uint8') input_img = np.zeros((opt.loadsize,opt.loadsize,3*N+1), dtype='uint8')
# this frame
this_mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index)+'.png'),'gray',loadsize=opt.loadsize)
input_img[:,:,-1] = this_mask
#print(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg'))
ground_true = impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg'),loadsize=opt.loadsize)
mosaic_size,mod,rect_rat,father = mosaic.get_random_parameter(ground_true,this_mask)
# merge other frame
for i in range(0,N): for i in range(0,N):
img = impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index+i-int(N/2))+'.jpg'),loadsize=opt.loadsize)
img = cv2.imread('./dataset/'+video+'/mosaic/output_'+'%05d'%(img_index+i-int(N/2))+'.png') mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index+i-int(N/2))+'.png'),'gray',loadsize=opt.loadsize)
img = impro.resize(img,opt.loadsize) img_mosaic = mosaic.addmosaic_base(img, mask, mosaic_size,model = mod,rect_rat=rect_rat,father=father)
input_img[:,:,i*3:(i+1)*3] = img input_img[:,:,i*3:(i+1)*3] = img_mosaic
mask = cv2.imread('./dataset/'+video+'/mask/output_'+'%05d'%(img_index)+'.png',0) # to tensor
mask = impro.resize(mask,opt.loadsize)
mask = impro.mask_threshold(mask,15,128)
input_img[:,:,-1] = mask
ground_true = cv2.imread('./dataset/'+video+'/ori/output_'+'%05d'%(img_index)+'.png')
ground_true = impro.resize(ground_true,opt.loadsize)
input_img,ground_true = data.random_transform_video(input_img,ground_true,opt.finesize,N) input_img,ground_true = data.random_transform_video(input_img,ground_true,opt.finesize,N)
input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False) input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False)
ground_true = data.im2tensor(ground_true,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False) ground_true = data.im2tensor(ground_true,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False)
return input_img,ground_true return input_img,ground_true
print('preloading data, please wait 5s...') print('Preloading data, please wait...')
if opt.perload_num <= opt.batchsize: if opt.perload_num <= opt.batchsize:
opt.perload_num = opt.batchsize*2 opt.perload_num = opt.batchsize*2
#data pool
input_imgs = torch.rand(opt.perload_num,N*3+1,opt.finesize,opt.finesize).cuda() input_imgs = torch.rand(opt.perload_num,N*3+1,opt.finesize,opt.finesize).cuda()
ground_trues = torch.rand(opt.perload_num,3,opt.finesize,opt.finesize).cuda() ground_trues = torch.rand(opt.perload_num,3,opt.finesize,opt.finesize).cuda()
load_cnt = 0 load_cnt = 0
...@@ -144,14 +157,15 @@ def preload(): ...@@ -144,14 +157,15 @@ def preload():
global load_cnt global load_cnt
while 1: while 1:
try: try:
video_index = random.randint(0,video_num-1)
ran = random.randint(0, opt.perload_num-1) ran = random.randint(0, opt.perload_num-1)
input_imgs[ran],ground_trues[ran] = loaddata() input_imgs[ran],ground_trues[ran] = loaddata(video_index)
load_cnt += 1 load_cnt += 1
# time.sleep(0.1) # time.sleep(0.1)
except Exception as e: except Exception as e:
print("error:",e) print("error:",e)
import threading import threading
t = threading.Thread(target=preload,args=()) #t为新创建的线程 t = threading.Thread(target=preload,args=())
t.daemon = True t.daemon = True
t.start() t.start()
time_start=time.time() time_start=time.time()
......
...@@ -3,7 +3,7 @@ import numpy as np ...@@ -3,7 +3,7 @@ import numpy as np
import torch import torch
import torchvision.transforms as transforms import torchvision.transforms as transforms
import cv2 import cv2
from .image_processing import color_adjust from .image_processing import color_adjust,dctblur
transform = transforms.Compose([ transform = transforms.Compose([
transforms.ToTensor(), transforms.ToTensor(),
...@@ -61,6 +61,11 @@ def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape = ...@@ -61,6 +61,11 @@ def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape =
image_tensor = image_tensor.cuda() image_tensor = image_tensor.cuda()
return image_tensor return image_tensor
def shuffledata(data,target):
state = np.random.get_state()
np.random.shuffle(data)
np.random.set_state(state)
np.random.shuffle(target)
def random_transform_video(src,target,finesize,N): def random_transform_video(src,target,finesize,N):
...@@ -78,8 +83,8 @@ def random_transform_video(src,target,finesize,N): ...@@ -78,8 +83,8 @@ def random_transform_video(src,target,finesize,N):
target = target[:,::-1,:] target = target[:,::-1,:]
#random color #random color
alpha = random.uniform(-0.3,0.3) alpha = random.uniform(-0.1,0.1)
beta = random.uniform(-0.2,0.2) beta = random.uniform(-0.1,0.1)
b = random.uniform(-0.05,0.05) b = random.uniform(-0.05,0.05)
g = random.uniform(-0.05,0.05) g = random.uniform(-0.05,0.05)
r = random.uniform(-0.05,0.05) r = random.uniform(-0.05,0.05)
...@@ -87,39 +92,54 @@ def random_transform_video(src,target,finesize,N): ...@@ -87,39 +92,54 @@ def random_transform_video(src,target,finesize,N):
src[:,:,i*3:(i+1)*3] = color_adjust(src[:,:,i*3:(i+1)*3],alpha,beta,b,g,r) src[:,:,i*3:(i+1)*3] = color_adjust(src[:,:,i*3:(i+1)*3],alpha,beta,b,g,r)
target = color_adjust(target,alpha,beta,b,g,r) target = color_adjust(target,alpha,beta,b,g,r)
# random_num = 15 #random blur
# bright = random.randint(-random_num*2,random_num*2) if random.random()<0.5:
# for i in range(N*3): src[:,:,i]=np.clip(src[:,:,i].astype('int')+bright,0,255).astype('uint8') interpolations = [cv2.INTER_LINEAR,cv2.INTER_CUBIC,cv2.INTER_LANCZOS4]
# for i in range(3): target[:,:,i]=np.clip(target[:,:,i].astype('int')+bright,0,255).astype('uint8') size_ran = random.uniform(0.7,1.5)
interpolation_up = interpolations[random.randint(0,2)]
return src,target interpolation_down =interpolations[random.randint(0,2)]
tmp = cv2.resize(src[:,:,:3*N], (int(finesize*size_ran),int(finesize*size_ran)),interpolation=interpolation_up)
src[:,:,:3*N] = cv2.resize(tmp, (finesize,finesize),interpolation=interpolation_down)
def random_transform_image(img,mask,finesize,test_flag = False): tmp = cv2.resize(target, (int(finesize*size_ran),int(finesize*size_ran)),interpolation=interpolation_up)
target = cv2.resize(tmp, (finesize,finesize),interpolation=interpolation_down)
# randomsize = int(finesize*(1.2+0.2*random.random())+2) return src,target
def random_transform_single(img,out_shape):
out_h,out_w = out_shape
img = cv2.resize(img,(int(out_w*random.uniform(1.1, 1.5)),int(out_h*random.uniform(1.1, 1.5))))
h,w = img.shape[:2] h,w = img.shape[:2]
loadsize = min((h,w)) h_move = int((h-out_h)*random.random())
a = (float(h)/float(w))*random.uniform(0.9, 1.1) w_move = int((w-out_w)*random.random())
img = img[h_move:h_move+out_h,w_move:w_move+out_w]
if h<w: if random.random()<0.5:
mask = cv2.resize(mask, (int(loadsize/a),loadsize)) if random.random()<0.5:
img = cv2.resize(img, (int(loadsize/a),loadsize)) img = img[:,::-1]
else: else:
mask = cv2.resize(mask, (loadsize,int(loadsize*a))) img = img[::-1,:]
img = cv2.resize(img, (loadsize,int(loadsize*a))) if img.shape[0] != out_h or img.shape[1]!= out_w :
img = cv2.resize(img,(out_w,out_h))
# mask = randomsize(mask,loadsize) return img
# img = randomsize(img,loadsize)
def random_transform_image(img,mask,finesize,test_flag = False):
#random scale
if random.random()<0.5:
h,w = img.shape[:2]
loadsize = min((h,w))
a = (float(h)/float(w))*random.uniform(0.9, 1.1)
if h<w:
mask = cv2.resize(mask, (int(loadsize/a),loadsize))
img = cv2.resize(img, (int(loadsize/a),loadsize))
else:
mask = cv2.resize(mask, (loadsize,int(loadsize*a)))
img = cv2.resize(img, (loadsize,int(loadsize*a)))
#random crop #random crop
h,w = img.shape[:2] h,w = img.shape[:2]
h_move = int((h-finesize)*random.random()) h_move = int((h-finesize)*random.random())
w_move = int((w-finesize)*random.random()) w_move = int((w-finesize)*random.random())
# print(h,w,h_move,w_move)
img_crop = img[h_move:h_move+finesize,w_move:w_move+finesize] img_crop = img[h_move:h_move+finesize,w_move:w_move+finesize]
mask_crop = mask[h_move:h_move+finesize,w_move:w_move+finesize] mask_crop = mask[h_move:h_move+finesize,w_move:w_move+finesize]
...@@ -137,10 +157,6 @@ def random_transform_image(img,mask,finesize,test_flag = False): ...@@ -137,10 +157,6 @@ def random_transform_image(img,mask,finesize,test_flag = False):
#random color #random color
img = color_adjust(img,ran=True) img = color_adjust(img,ran=True)
# random_num = 15
# for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+random.randint(-random_num,random_num),0,255).astype('uint8')
# bright = random.randint(-random_num*2,random_num*2)
# for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+bright,0,255).astype('uint8')
#random flip #random flip
if random.random()<0.5: if random.random()<0.5:
...@@ -152,11 +168,19 @@ def random_transform_image(img,mask,finesize,test_flag = False): ...@@ -152,11 +168,19 @@ def random_transform_image(img,mask,finesize,test_flag = False):
mask = mask[::-1,:] mask = mask[::-1,:]
#random blur #random blur
if random.random()>0.5: if random.random()<0.5:
size_ran = random.uniform(0.5,1.5) img = dctblur(img,random.randint(1,15))
img = cv2.resize(img, (int(finesize*size_ran),int(finesize*size_ran)))
img = cv2.resize(img, (finesize,finesize)) # interpolations = [cv2.INTER_LINEAR,cv2.INTER_CUBIC,cv2.INTER_LANCZOS4]
#img = cv2.blur(img, (random.randint(1,3), random.randint(1,3))) # size_ran = random.uniform(0.7,1.5)
# img = cv2.resize(img, (int(finesize*size_ran),int(finesize*size_ran)),interpolation=interpolations[random.randint(0,2)])
# img = cv2.resize(img, (finesize,finesize),interpolation=interpolations[random.randint(0,2)])
#check shape
if img.shape[0]!= finesize or img.shape[1]!= finesize or mask.shape[0]!= finesize or mask.shape[1]!= finesize:
img = cv2.resize(img,(finesize,finesize))
mask = cv2.resize(mask,(finesize,finesize))
print('warning! shape error.')
return img,mask return img,mask
def showresult(img1,img2,img3,name,is0_1 = False): def showresult(img1,img2,img3,name,is0_1 = False):
......
...@@ -2,11 +2,18 @@ import os,json ...@@ -2,11 +2,18 @@ import os,json
# ffmpeg 3.4.6 # ffmpeg 3.4.6
def video2image(videopath,imagepath,fps=0): def video2image(videopath,imagepath,fps=0,start_time=0,last_time=0):
if fps == 0: if start_time == 0:
os.system('ffmpeg -i "'+videopath+'" -f image2 '+imagepath) if fps == 0:
os.system('ffmpeg -i "'+videopath+'" -f image2 '+'-q:v -0 '+imagepath)
else:
os.system('ffmpeg -i "'+videopath+'" -r '+str(fps)+' -f image2 '+'-q:v -0 '+imagepath)
else: else:
os.system('ffmpeg -i "'+videopath+'" -r '+str(fps)+' -f image2 '+imagepath) if fps == 0:
os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+videopath+'" -f image2 '+'-q:v -0 '+imagepath)
else:
os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+videopath+'" -r '+str(fps)+' -f image2 '+'-q:v -0 '+imagepath)
def video2voice(videopath,voicepath): def video2voice(videopath,voicepath):
os.system('ffmpeg -i "'+videopath+'" -f mp3 '+voicepath) os.system('ffmpeg -i "'+videopath+'" -f mp3 '+voicepath)
...@@ -53,4 +60,4 @@ def continuous_screenshot(videopath,savedir,fps): ...@@ -53,4 +60,4 @@ def continuous_screenshot(videopath,savedir,fps):
fps: save how many images per second fps: save how many images per second
''' '''
videoname = os.path.splitext(os.path.basename(videopath))[0] videoname = os.path.splitext(os.path.basename(videopath))[0]
os.system('ffmpeg -i "'+videopath+'" -vf fps='+str(fps)+' '+savedir+'/'+videoname+'_%05d.jpg') os.system('ffmpeg -i "'+videopath+'" -vf fps='+str(fps)+' -q:v -0 '+savedir+'/'+videoname+'_%06d.jpg')
...@@ -3,13 +3,24 @@ import numpy as np ...@@ -3,13 +3,24 @@ import numpy as np
import random import random
import platform import platform
system_type = 'Linux' system_type = 'Linux'
if 'Windows' in platform.platform(): if 'Windows' in platform.platform():
system_type = 'Windows' system_type = 'Windows'
def imread(file_path,mod = 'normal'): DCT_Q = np.array([[8,16,19,22,26,27,29,34],
[16,16,22,24,27,29,34,37],
[19,22,26,27,29,34,34,38],
[22,22,26,27,29,34,37,40],
[22,26,27,29,32,35,40,48],
[26,27,29,32,35,40,48,58],
[26,27,29,34,38,46,56,59],
[27,29,35,38,46,56,69,83]])
def imread(file_path,mod = 'normal',loadsize = 0):
''' '''
mod = 'normal' | 'gray' | 'all' mod: 'normal' | 'gray' | 'all'
loadsize: 0->original
''' '''
if system_type == 'Linux': if system_type == 'Linux':
if mod == 'normal': if mod == 'normal':
...@@ -26,6 +37,9 @@ def imread(file_path,mod = 'normal'): ...@@ -26,6 +37,9 @@ def imread(file_path,mod = 'normal'):
img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),0) img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),0)
else: else:
img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1) img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1)
if loadsize != 0:
img = resize(img, loadsize, interpolation=cv2.INTER_CUBIC)
return img return img
...@@ -40,6 +54,13 @@ def imwrite(file_path,img): ...@@ -40,6 +54,13 @@ def imwrite(file_path,img):
cv2.imencode('.jpg', img)[1].tofile(file_path) cv2.imencode('.jpg', img)[1].tofile(file_path)
def resize(img,size,interpolation=cv2.INTER_LINEAR): def resize(img,size,interpolation=cv2.INTER_LINEAR):
'''
cv2.INTER_NEAREST      最邻近插值点法
cv2.INTER_LINEAR        双线性插值法
cv2.INTER_AREA         邻域像素再取样插补
cv2.INTER_CUBIC        双立方插补,4*4大小的补点
cv2.INTER_LANCZOS4 8x8像素邻域的Lanczos插值
'''
h, w = img.shape[:2] h, w = img.shape[:2]
if np.min((w,h)) ==size: if np.min((w,h)) ==size:
return img return img
...@@ -55,8 +76,6 @@ def resize_like(img,img_like): ...@@ -55,8 +76,6 @@ def resize_like(img,img_like):
return img return img
def ch_one2three(img): def ch_one2three(img):
#zeros = np.zeros(img.shape[:2], dtype = "uint8")
# ret,thresh = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
res = cv2.merge([img, img, img]) res = cv2.merge([img, img, img])
return res return res
...@@ -78,11 +97,11 @@ def color_adjust(img,alpha=1,beta=0,b=0,g=0,r=0,ran = False): ...@@ -78,11 +97,11 @@ def color_adjust(img,alpha=1,beta=0,b=0,g=0,r=0,ran = False):
''' '''
img = img.astype('float') img = img.astype('float')
if ran: if ran:
alpha = random.uniform(-0.2,0.2) alpha = random.uniform(-0.1,0.1)
beta = random.uniform(-0.2,0.2) beta = random.uniform(-0.1,0.1)
b = random.uniform(-0.1,0.1) b = random.uniform(-0.05,0.05)
g = random.uniform(-0.1,0.1) g = random.uniform(-0.05,0.05)
r = random.uniform(-0.1,0.1) r = random.uniform(-0.05,0.05)
img = (1+alpha)*img+255.0*beta img = (1+alpha)*img+255.0*beta
bgr = [b*255.0,g*255.0,r*255.0] bgr = [b*255.0,g*255.0,r*255.0]
for i in range(3): img[:,:,i]=img[:,:,i]+bgr[i] for i in range(3): img[:,:,i]=img[:,:,i]+bgr[i]
...@@ -98,14 +117,6 @@ def makedataset(target_image,orgin_image): ...@@ -98,14 +117,6 @@ def makedataset(target_image,orgin_image):
img[0:256,256:512] = orgin_image[0:256,int(w/2-256/2):int(w/2+256/2)] img[0:256,256:512] = orgin_image[0:256,int(w/2-256/2):int(w/2+256/2)]
return img return img
def image2folat(img,ch):
size=img.shape[0]
if ch == 1:
img = (img[:,:,0].reshape(1,size,size)/255.0).astype(np.float32)
else:
img = (img.transpose((2, 0, 1))/255.0).astype(np.float32)
return img
def spiltimage(img,size = 128): def spiltimage(img,size = 128):
h, w = img.shape[:2] h, w = img.shape[:2]
# size = min(h,w) # size = min(h,w)
...@@ -133,6 +144,34 @@ def mergeimage(img1,img2,orgin_image,size = 128): ...@@ -133,6 +144,34 @@ def mergeimage(img1,img2,orgin_image,size = 128):
result_img = cv2.add(new_img1,new_img2) result_img = cv2.add(new_img1,new_img2)
return result_img return result_img
def block_dct_and_idct(g,QQF):
T = cv2.dct(g)
IT = np.round(cv2.idct(np.round(np.round(16.0*T/QQF)*QQF/16)))
return IT
def image_dct_and_idct(I,QF):
h,w = I.shape
QQF = DCT_Q*QF
for i in range(int(h/8)):
for j in range(int(w/8)):
I[i*8:(i+1)*8,j*8:(j+1)*8] = block_dct_and_idct(I[i*8:(i+1)*8,j*8:(j+1)*8],QQF)
return I
def dctblur(img,Q):
'''
Q: 1~20, 1->best
'''
h,w = img.shape[:2]
img[:8*int(h/8),:8*int(w/8)]
img = img.astype(np.float32)
if img.ndim == 2:
img = image_dct_and_idct(img, Q)
if img.ndim == 3:
h,w,ch = img.shape
for i in range(ch):
img[:,:,i] = image_dct_and_idct(img[:,:,i], Q)
return (np.clip(img,0,255)).astype(np.uint8)
def find_mostlikely_ROI(mask): def find_mostlikely_ROI(mask):
contours,hierarchy=cv2.findContours(mask, cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE) contours,hierarchy=cv2.findContours(mask, cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
if len(contours)>0: if len(contours)>0:
...@@ -199,8 +238,20 @@ def mask_area(mask): ...@@ -199,8 +238,20 @@ def mask_area(mask):
return area return area
def replace_mosaic(img_origin,img_fake,x,y,size,no_father): def Q_lapulase(resImg):
img_fake = resize(img_fake,size*2,interpolation=cv2.INTER_LANCZOS4) '''
Evaluate image quality
score > 20 normal
score > 50 clear
'''
img2gray = cv2.cvtColor(resImg, cv2.COLOR_BGR2GRAY)
img2gray = resize(img2gray,512)
res = cv2.Laplacian(img2gray, cv2.CV_64F)
score = res.var()
return score
def replace_mosaic(img_origin,img_fake,mask,x,y,size,no_father):
img_fake = cv2.resize(img_fake,(size*2,size*2),interpolation=cv2.INTER_LANCZOS4)
if no_father: if no_father:
img_origin[y-size:y+size,x-size:x+size]=img_fake img_origin[y-size:y+size,x-size:x+size]=img_fake
img_result = img_origin img_result = img_origin
...@@ -212,13 +263,20 @@ def replace_mosaic(img_origin,img_fake,x,y,size,no_father): ...@@ -212,13 +263,20 @@ def replace_mosaic(img_origin,img_fake,x,y,size,no_father):
#eclosion #eclosion
eclosion_num = int(size/5) eclosion_num = int(size/5)
entad = int(eclosion_num/2+2) entad = int(eclosion_num/2+2)
mask = np.zeros(img_origin.shape, dtype='uint8')
mask = cv2.rectangle(mask,(x-size+entad,y-size+entad),(x+size-entad,y+size-entad),(255,255,255),-1) # mask = np.zeros(img_origin.shape, dtype='uint8')
# mask = cv2.rectangle(mask,(x-size+entad,y-size+entad),(x+size-entad,y+size-entad),(255,255,255),-1)
mask = cv2.resize(mask,(img_origin.shape[1],img_origin.shape[0]))
mask = ch_one2three(mask)
mask = (cv2.blur(mask, (eclosion_num, eclosion_num))) mask = (cv2.blur(mask, (eclosion_num, eclosion_num)))
mask = mask/255.0 mask_tmp = np.zeros_like(mask)
mask_tmp[y-size:y+size,x-size:x+size] = mask[y-size:y+size,x-size:x+size]# Fix edge overflow
mask = mask_tmp/255.0
img_tmp = np.zeros(img_origin.shape) img_tmp = np.zeros(img_origin.shape)
img_tmp[y-size:y+size,x-size:x+size]=img_fake img_tmp[y-size:y+size,x-size:x+size]=img_fake
img_result = img_origin.copy() img_result = img_origin.copy()
img_result = (img_origin*(1-mask)+img_tmp*mask).astype('uint8') img_result = (img_origin*(1-mask)+img_tmp*mask).astype('uint8')
return img_result return img_result
\ No newline at end of file
...@@ -10,10 +10,19 @@ def addmosaic(img,mask,opt): ...@@ -10,10 +10,19 @@ def addmosaic(img,mask,opt):
elif opt.mosaic_size == 0: elif opt.mosaic_size == 0:
img = addmosaic_autosize(img, mask, opt.mosaic_mod) img = addmosaic_autosize(img, mask, opt.mosaic_mod)
else: else:
img = addmosaic_normal(img,mask,opt.mosaic_size,opt.output_size,model = opt.mosaic_mod) img = addmosaic_base(img,mask,opt.mosaic_size,opt.output_size,model = opt.mosaic_mod)
return img return img
def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6): def addmosaic_base(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6,father=0):
'''
img: input image
mask: input mask
n: mosaic size
out_size: output size 0->original
model : squa_avg squa_mid squa_random squa_avg_circle_edge rect_avg
rect_rat: if model==rect_avg , mosaic w/h=rect_rat
father : father size, -1->no 0->auto
'''
n = int(n) n = int(n)
if out_size: if out_size:
img = resize(img,out_size) img = resize(img,out_size)
...@@ -44,9 +53,9 @@ def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6): ...@@ -44,9 +53,9 @@ def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6):
for j in range(int(w/n)): for j in range(int(w/n)):
img_mosaic[i*n:(i+1)*n,j*n:(j+1)*n,:]=img[i*n:(i+1)*n,j*n:(j+1)*n,:].mean(0).mean(0) img_mosaic[i*n:(i+1)*n,j*n:(j+1)*n,:]=img[i*n:(i+1)*n,j*n:(j+1)*n,:].mean(0).mean(0)
mask = cv2.threshold(mask,127,255,cv2.THRESH_BINARY)[1] mask = cv2.threshold(mask,127,255,cv2.THRESH_BINARY)[1]
mask = ch_one2three(mask) _mask = ch_one2three(mask)
mask_inv = cv2.bitwise_not(mask) mask_inv = cv2.bitwise_not(_mask)
imgroi1 = cv2.bitwise_and(mask,img_mosaic) imgroi1 = cv2.bitwise_and(_mask,img_mosaic)
imgroi2 = cv2.bitwise_and(mask_inv,img) imgroi2 = cv2.bitwise_and(mask_inv,img)
img_mosaic = cv2.add(imgroi1,imgroi2) img_mosaic = cv2.add(imgroi1,imgroi2)
...@@ -58,12 +67,21 @@ def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6): ...@@ -58,12 +67,21 @@ def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6):
if mask[int(i*n_h+n_h/2),int(j*n_w+n_w/2)] == 255: if mask[int(i*n_h+n_h/2),int(j*n_w+n_w/2)] == 255:
img_mosaic[i*n_h:(i+1)*n_h,j*n_w:(j+1)*n_w,:]=img[i*n_h:(i+1)*n_h,j*n_w:(j+1)*n_w,:].mean(0).mean(0) img_mosaic[i*n_h:(i+1)*n_h,j*n_w:(j+1)*n_w,:]=img[i*n_h:(i+1)*n_h,j*n_w:(j+1)*n_w,:].mean(0).mean(0)
if father != -1:
if father==0:
mask = (cv2.blur(mask, (n, n)))
else:
mask = (cv2.blur(mask, (father, father)))
mask = ch_one2three(mask)/255.0
img_mosaic = (img*(1-mask)+img_mosaic*mask).astype('uint8')
return img_mosaic return img_mosaic
def get_autosize(img,mask,area_type = 'normal'): def get_autosize(img,mask,area_type = 'normal'):
h,w = img.shape[:2] h,w = img.shape[:2]
mask = cv2.resize(mask,(w,h)) size = np.min([h,w])
alpha = np.min((w,h))/512 mask = resize(mask,size)
alpha = size/512
try: try:
if area_type == 'normal': if area_type == 'normal':
area = mask_area(mask) area = mask_area(mask)
...@@ -85,66 +103,32 @@ def get_autosize(img,mask,area_type = 'normal'): ...@@ -85,66 +103,32 @@ def get_autosize(img,mask,area_type = 'normal'):
pass pass
return size return size
def addmosaic_autosize(img,mask,model,area_type = 'normal'): def get_random_parameter(img,mask):
h,w = img.shape[:2] # mosaic size
mask = cv2.resize(mask,(w,h)) p = np.array([0.5,0.5])
alpha = np.min((w,h))/512 mod = np.random.choice(['normal','bounding'], p = p.ravel())
try: mosaic_size = get_autosize(img,mask,area_type = mod)
if area_type == 'normal': mosaic_size = int(mosaic_size*random.uniform(0.9,2.1))
area = mask_area(mask)
elif area_type == 'bounding':
w,h = cv2.boundingRect(mask)[2:]
area = w*h
except:
area = 0
area = area/(alpha*alpha)
if area>50000:
img_mosaic = addmosaic_normal(img,mask,alpha*((area-50000)/50000+12),model = model)
elif 20000<area<=50000:
img_mosaic = addmosaic_normal(img,mask,alpha*((area-20000)/30000+8),model = model)
elif 5000<area<=20000:
img_mosaic = addmosaic_normal(img,mask,alpha*((area-5000)/20000+7),model = model)
elif 0<=area<=5000:
img_mosaic = addmosaic_normal(img,mask,alpha*((area-0)/5000+6),model = model)
else:
pass
return img_mosaic
def addmosaic_random(img,mask,area_type = 'normal'): # mosaic mod
# img = resize(img,512) p = np.array([0.25, 0.25, 0.1, 0.4])
h,w = img.shape[:2] mod = np.random.choice(['squa_mid','squa_avg','squa_avg_circle_edge','rect_avg'], p = p.ravel())
mask = cv2.resize(mask,(w,h))
alpha = np.min((w,h))/512 # rect_rat for rect_avg
#area_avg=5925*4 rect_rat = random.uniform(1.1,1.6)
try:
if area_type == 'normal': # father size
area = mask_area(mask) father = int(mosaic_size*random.uniform(0,1.5))
elif area_type == 'bounding':
w,h = cv2.boundingRect(mask)[2:] return mosaic_size,mod,rect_rat,father
area = w*h
except:
area = 0 def addmosaic_autosize(img,mask,model,area_type = 'normal'):
area = area/(alpha*alpha) mosaic_size = get_autosize(img,mask,area_type = 'normal')
if area>50000: img_mosaic = addmosaic_base(img,mask,mosaic_size,model = model)
img_mosaic = random_mod(img,mask,alpha*random.uniform(8,30)) #16,30
elif 20000<area<=50000:
img_mosaic = random_mod(img,mask,alpha*random.uniform(8,20)) #12,20
elif 5000<area<=20000:
img_mosaic = random_mod(img,mask,alpha*random.uniform(8,15))
elif 0<=area<=5000:
img_mosaic = random_mod(img,mask,alpha*random.uniform(4,10))
else:
pass
return img_mosaic return img_mosaic
def random_mod(img,mask,n): def addmosaic_random(img,mask):
ran=random.random() mosaic_size,mod,rect_rat,father = get_random_parameter(img,mask)
if ran < 0.3: img_mosaic = addmosaic_base(img,mask,mosaic_size,model = mod,rect_rat=rect_rat,father=father)
img = addmosaic_normal(img,mask,n,model = 'squa_mid') return img_mosaic
if 0.3 <= ran < 0.5: \ No newline at end of file
img = addmosaic_normal(img,mask,n,model = 'squa_avg')
elif 0.5 <= ran <0.6:
img = addmosaic_normal(img,mask,n,model = 'squa_avg_circle_edge')
else:
img = addmosaic_normal(img,mask,n,model = 'rect_avg')
return img
\ No newline at end of file
...@@ -40,6 +40,13 @@ def is_videos(paths): ...@@ -40,6 +40,13 @@ def is_videos(paths):
tmp.append(path) tmp.append(path)
return tmp return tmp
def is_dirs(paths):
tmp = []
for path in paths:
if os.path.isdir(path):
tmp.append(path)
return tmp
def writelog(path,log,isprint=False): def writelog(path,log,isprint=False):
f = open(path,'a+') f = open(path,'a+')
f.write(log+'\n') f.write(log+'\n')
...@@ -72,7 +79,15 @@ def file_init(opt): ...@@ -72,7 +79,15 @@ def file_init(opt):
if not os.path.isdir(opt.result_dir): if not os.path.isdir(opt.result_dir):
os.makedirs(opt.result_dir) os.makedirs(opt.result_dir)
print('makedir:',opt.result_dir) print('makedir:',opt.result_dir)
clean_tempfiles() clean_tempfiles(True)
def second2stamp(s):
h = int(s/3600)
s = int(s%3600)
m = int(s/60)
s = int(s%60)
return "%02d:%02d:%02d" % (h, m, s)
def get_bar(percent,num = 25): def get_bar(percent,num = 25):
bar = '[' bar = '['
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册