New version V0.3.0 #5 #8

57467a8f · hypox64 · 700c1859 · 57467a8f · 57467a8f · 57467a8f
40 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -154,6 +154,7 @@ result/
 /pretrained_models_old
 /deepmosaic_window
 /sftp-config.json
+/exe
 #./make_datasets
 /make_datasets/video
 /make_datasets/tmp

--- a/README.md
+++ b/README.md
@@ -6,25 +6,19 @@ This porject based on "semantic segmentation" and "Image-to-Image Translation".<
 * [中文版README](./README_CN.md)<br>
 ### More example
 origin | auto add mosaic |  auto clean mosaic  
 :-:|:-:|:-:
 ![image](./imgs/example/lena.jpg) | ![image](./imgs/example/lena_add.jpg) | ![image](./imgs/example/lena_clean.jpg) 
 ![image](./imgs/example/youknow.png)  | ![image](./imgs/example/youknow_add.png) | ![image](./imgs/example/youknow_clean.png) 
 * Compared with [DeepCreamPy](https://github.com/deeppomf/DeepCreamPy)
 mosaic image | DeepCreamPy | ours  
 :-:|:-:|:-:
 ![image](./imgs/example/face_a_mosaic.jpg) | ![image](./imgs/example/a_dcp.png) | ![image](./imgs/example/face_a_clean.jpg) 
 ![image](./imgs/example/face_b_mosaic.jpg) | ![image](./imgs/example/b_dcp.png) | ![image](./imgs/example/face_b_clean.jpg) 
 * Style Transfer
 origin | to Van Gogh | to winter
 :-:|:-:|:-:
 ![image](./imgs/example/SZU.jpg) | ![image](./imgs/example/SZU_vangogh.jpg) | ![image](./imgs/example/SZU_summer2winter.jpg) 
 An interesting example:[Ricardo Milos to cat](https://www.bilibili.com/video/BV1Q7411W7n6)
 ## Run DeepMosaics
@@ -33,6 +27,7 @@ You can either run DeepMosaics via pre-built binary package or from source.<br>
 ### Pre-built binary package
 For windows, we bulid a GUI version for easy test.<br>
 Download this version and pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs)  [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br>
 * [[How to use]](./docs/exe_help.md)<br>
 ![image](./imgs/GUI.png)<br>
@@ -64,11 +59,11 @@ You can download pre_trained models and put them into './pretrained_models'.<br>
 [[Introduction to pre-trained models]](./docs/pre-trained_models_introduction.md)<br>
 #### Simple example
-* Add Mosaic (output video will save in './result')<br>
+* Add Mosaic (output media will save in './result')<br>
 ```bash
 python3 deepmosaic.py --media_path ./imgs/ruoruo.jpg --model_path ./pretrained_models/mosaic/add_face.pth --use_gpu -1
 ```
-* Clean Mosaic (output video will save in './result')<br>
+* Clean Mosaic (output media will save in './result')<br>
 ```bash
 python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretrained_models/mosaic/clean_face_HD.pth --use_gpu -1
 ```
@@ -76,5 +71,9 @@ python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretra
 If you want to test other image or video, please refer to this file.<br>
 [[options_introduction.md]](./docs/options_introduction.md) <br>
+## Training with your own dataset
+If you want to train with your own dataset, please refer to [training_with_your_own_dataset.md](./docs/training_with_your_own_dataset.md)
 ## Acknowledgments
-This code borrows heavily from [[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet)[[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD).
+This code borrows heavily from [[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet) [[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD) [[BiSeNet]](https://github.com/ooooverflow/BiSeNet).
--- a/README_CN.md
+++ b/README_CN.md
@@ -3,25 +3,19 @@
 这是一个通过深度学习自动的为图片/视频添加马赛克,或消除马赛克的项目.<br>它基于“语义分割”以及“图像翻译”.<br>
 ### 更多例子
 原始 | 自动打码 |  自动去码  
 :-:|:-:|:-:
 ![image](./imgs/example/lena.jpg) | ![image](./imgs/example/lena_add.jpg) | ![image](./imgs/example/lena_clean.jpg) 
 ![image](./imgs/example/youknow.png)  | ![image](./imgs/example/youknow_add.png) | ![image](./imgs/example/youknow_clean.png) 
 * 与 [DeepCreamPy](https://github.com/deeppomf/DeepCreamPy)相比较
 马赛克图片 | DeepCreamPy | ours  
 :-:|:-:|:-:
 ![image](./imgs/example/face_a_mosaic.jpg) | ![image](./imgs/example/a_dcp.png) | ![image](./imgs/example/face_a_clean.jpg) 
 ![image](./imgs/example/face_b_mosaic.jpg) | ![image](./imgs/example/b_dcp.png) | ![image](./imgs/example/face_b_clean.jpg) 
 * 风格转换
 原始 | 梵高风格 | 转化为冬天
 :-:|:-:|:-:
 ![image](./imgs/example/SZU.jpg) | ![image](./imgs/example/SZU_vangogh.jpg) | ![image](./imgs/example/SZU_summer2winter.jpg) 
 一个有意思的尝试:[香蕉君♂猫](https://www.bilibili.com/video/BV1Q7411W7n6)
 ## 如何运行
@@ -74,5 +68,9 @@ python3 deepmosaic.py --media_path ./result/ruoruo_add.jpg --model_path ./pretra
 如果想要测试其他的图片或视频,请参照以下文件输入参数.<br>
 [[options_introduction_CN.md]](./docs/options_introduction_CN.md) <br>
+## 使用自己的数据训练模型
+如果需要使用自己的数据训练模型，请参照 [training_with_your_own_dataset.md](./docs/training_with_your_own_dataset.md)
 ## 鸣谢
-代码大量的参考了以下项目:[[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet)[[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD).
+代码大量的参考了以下项目:[[pytorch-CycleGAN-and-pix2pix]](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) [[Pytorch-UNet]](https://github.com/milesial/Pytorch-UNet) [[pix2pixHD]](https://github.com/NVIDIA/pix2pixHD) [[BiSeNet]](https://github.com/ooooverflow/BiSeNet).
\ No newline at end of file
--- a/cores/core.py
+++ b/cores/core.py
@@ -38,7 +38,7 @@ def addmosaic_video(opt,netS):
    positions = []
    for i,imagepath in enumerate(imagepaths,1):
        img = impro.imread(os.path.join('./tmp/video2image',imagepath))
-        mask,x,y,area = runmodel.get_ROI_position(img,netS,opt)
+        mask,x,y,size,area = runmodel.get_ROI_position(img,netS,opt)
        positions.append([x,y,area])      
        cv2.imwrite(os.path.join('./tmp/ROI_mask',imagepath),mask)
        print('\r','Find ROI location:'+str(i)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
@@ -110,7 +110,7 @@ def cleanmosaic_img(opt,netG,netM):
    print('Clean Mosaic:',path)
    img_origin = impro.imread(path)
    x,y,size,mask = runmodel.get_mosaic_position(img_origin,netM,opt)
-    #cv2.imwrite('./mask/'+os.path.basename(path), mask)
+    cv2.imwrite('./mask/'+os.path.basename(path), mask)
    img_result = img_origin.copy()
    if size != 0 :
        img_mosaic = img_origin[y-size:y+size,x-size:x+size]
@@ -118,7 +118,7 @@ def cleanmosaic_img(opt,netG,netM):
            img_fake = runmodel.traditional_cleaner(img_mosaic,opt)
        else:
            img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt)
-        img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather)
+        img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather)
    else:
        print('Do not find mosaic')
    impro.imwrite(os.path.join(opt.result_dir,os.path.splitext(os.path.basename(path))[0]+'_clean.jpg'),img_result)
@@ -126,7 +126,7 @@ def cleanmosaic_img(opt,netG,netM):
 def cleanmosaic_video_byframe(opt,netG,netM):
    path = opt.media_path
    fps,imagepaths = video_init(opt,path)[:2]
-    positions = get_mosaic_positions(opt,netM,imagepaths,savemask=False)
+    positions = get_mosaic_positions(opt,netM,imagepaths,savemask=True)
    # clean mosaic
    for i,imagepath in enumerate(imagepaths,0):
        x,y,size = positions[i][0],positions[i][1],positions[i][2]
@@ -138,7 +138,8 @@ def cleanmosaic_video_byframe(opt,netG,netM):
                img_fake = runmodel.traditional_cleaner(img_mosaic,opt)
            else:
                img_fake = runmodel.run_pix2pix(img_mosaic,netG,opt)
-        img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather)
+        mask = cv2.imread(os.path.join('./tmp/mosaic_mask',imagepath),0)
+        img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather)
        cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result)
        print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
    print()
@@ -178,13 +179,13 @@ def cleanmosaic_video_fusion(opt,netG,netM):
            mosaic_input = np.zeros((INPUT_SIZE,INPUT_SIZE,3*N+1), dtype='uint8')
            mosaic_input[:,:,0:N*3] = impro.resize(img_pool[y-size:y+size,x-size:x+size,:], INPUT_SIZE)
-            mask = impro.resize(mask,np.min(img_origin.shape[:2]))[y-size:y+size,x-size:x+size]
+            mask_input = impro.resize(mask,np.min(img_origin.shape[:2]))[y-size:y+size,x-size:x+size]
-            mosaic_input[:,:,-1] = impro.resize(mask, INPUT_SIZE)
+            mosaic_input[:,:,-1] = impro.resize(mask_input, INPUT_SIZE)
            mosaic_input = data.im2tensor(mosaic_input,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1 = False)
            unmosaic_pred = netG(mosaic_input)
            img_fake = data.tensor2im(unmosaic_pred,rgb2bgr = False ,is0_1 = False)
-            img_result = impro.replace_mosaic(img_origin,img_fake,x,y,size,opt.no_feather)
+            img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather)
            cv2.imwrite(os.path.join('./tmp/replace_mosaic',imagepath),img_result)
        print('\r','Clean Mosaic:'+str(i+1)+'/'+str(len(imagepaths)),util.get_bar(100*i/len(imagepaths),num=35),end='')
    print()

--- a/cores/options.py
+++ b/cores/options.py
@@ -16,17 +16,17 @@ class Options():
        self.parser.add_argument('--mode', type=str, default='auto',help='Program running mode. auto | add | clean | style')
        self.parser.add_argument('--model_path', type=str, default='./pretrained_models/mosaic/add_face.pth',help='pretrained model path')
        self.parser.add_argument('--result_dir', type=str, default='./result',help='output media will be saved here')
-        self.parser.add_argument('--tempimage_type', type=str, default='png',help='type of temp image, png | jpg, png is better but occupy more storage space')
+        self.parser.add_argument('--tempimage_type', type=str, default='jpg',help='type of temp image, png | jpg, png is better but occupy more storage space')
        self.parser.add_argument('--netG', type=str, default='auto',
            help='select model to use for netG(Clean mosaic and Transfer style) -> auto | unet_128 | unet_256 | resnet_9blocks | HD | video')
        self.parser.add_argument('--fps', type=int, default=0,help='read and output fps, if 0-> origin')
        self.parser.add_argument('--output_size', type=int, default=0,help='size of output media, if 0 -> origin')
+        self.parser.add_argument('--mask_threshold', type=int, default=64,help='threshold of recognize clean or add mosaic position 0~255')
        #AddMosaic
        self.parser.add_argument('--mosaic_mod', type=str, default='squa_avg',help='type of mosaic -> squa_avg | squa_random | squa_avg_circle_edge | rect_avg | random')
        self.parser.add_argument('--mosaic_size', type=int, default=0,help='mosaic size,if 0 auto size')
        self.parser.add_argument('--mask_extend', type=int, default=10,help='extend mosaic area')
-        self.parser.add_argument('--mask_threshold', type=int, default=64,help='threshold of recognize mosaic position 0~255')
        #CleanMosaic     
        self.parser.add_argument('--mosaic_position_model_path', type=str, default='auto',help='name of model use to find mosaic position')

--- a/deepmosaic.py
+++ b/deepmosaic.py
@@ -15,7 +15,7 @@ def main():
    else:
        files = [opt.media_path]        
    if opt.mode == 'add':
-        netS = loadmodel.unet(opt)
+        netS = loadmodel.bisenet(opt,'roi')
        for file in files:
            opt.media_path = file
            if util.is_img(file):
@@ -26,7 +26,7 @@ def main():
                print('This type of file is not supported')
    elif opt.mode == 'clean':
-        netM = loadmodel.unet_clean(opt)
+        netM = loadmodel.bisenet(opt,'mosaic')
        if opt.traditional:
            netG = None
        elif opt.netG == 'video':

--- a/docs/Release_notes.txt
+++ b/docs/Release_notes.txt
+DeepMosaics V0.3.0
+Core program building with windows10_1703_x86_64 
+ + python 3.68  
+ + pyinstaller 3.5
+GUI building with C#
+For more detail, please view on github: https://github.com/HypoX64/DeepMosaics
+Releases History
+ V0.3.0
+   1. Support BiSeNet(Better recognition of mosaics).
+   2. New videoHD model.
+   3. Better feathering method.
+ V0.2.0
+   1. Add video model.
+   2. Now you can input chinese path
+   3. Support style transfer
+   4. Support fps limit
+ V0.1.2
+   1. Support pix2pixHD model
+ V0.1.1
+   1. Check path, can't input illegal path
+ V0.1.0
+   1. Initial release.
\ No newline at end of file
--- a/docs/exe_help.md
+++ b/docs/exe_help.md
 ## DeepMosaics.exe  Instructions
 [[中文版]](./exe_help_CN.md)
 This is a GUI version compiled in Windows.<br>
 Download this version and pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs)  [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br>
 Attentions:<br>
  - Require Windows_x86_64, Windows10 is better.<br>
  - Different pre-trained models are suitable for different effects.<br>
  - Run time depends on computer performance.<br>
  - If output video cannot be played, you can try with [potplayer](https://daumpotplayer.com/download/).<br>
  - GUI version update slower than source.<br>
 ### How to use
 * step 1: Choose image or video.
 * step 2: Choose model(Different pre-trained models are suitable for different effects)
 * step3:  Run program and wait.
 * step4:  Cheek reult in './result'.
 ### Introduction to pre-trained models
 * Mosaic
 |               Name               |                         Description                         |
 | :------------------------------: | :---------------------------------------------------------: |
 |           add_face.pth           |          Add mosaic to all faces in images/videos.          |
 |        clean_face_HD.pth         | Clean mosaic to all faces in images/video.<br>(RAM > 8GB).  |
 |         add_youknow.pth          |      Add mosaic to all (FBI Warning) in images/videos.      |
 | clean_youknow_resnet_9blocks.pth |     Clean mosaic to all (FBI Warning) in images/videos.     |
 |     clean_youknow_video.pth      |        Clean mosaic to all (FBI Warning) in videos.         |
 |    clean_youknow_video_HD.pth    | Clean mosaic to all (FBI Warning) in videos.<br>(RAM > 8GB) |
 *  Style Transfer
 |          Name           |                        Description                        |
 | :---------------------: | :-------------------------------------------------------: |
 | style_apple2orange.pth  | Convert apples to oranges. |
 | style_orange2apple.pth  | Convert oranges to apples |
 | style_summer2winter.pth |     Convert summer to winter.     |
 | style_winter2summer.pth | Convert winter to summer. |
 |    style_cezanne.pth    |            Convert photos/video to Paul Cézanne style.            |
 |     style_monet.pth     | Convert photos/video to Claude Monet style. |
 |     style_ukiyoe.pth     | Convert photos/video to Ukiyoe style. |
 |     style_vangogh.pth     | Convert photos/video to Van Gogh style. |
 ### Annotation
 ![image](../imgs/GUI_Instructions.jpg)<br>
 * 1. Choose image or video.
 * 2. Choose model(Different pre-trained models are suitable for different effects).
 * 3. Program running mode.   (auto | add | clean | style)
 * 4. Use GPU to run deep learning model. (The current version does not support gpu, if you need to use gpu please run source).
 * 5. Limit the fps of the output video(0->original fps).
 * 6. More options.
 * 7. More options can be input.
 * 8. Run program.
 * 9. Open help file.
 * 10.  Sponsor our project.
 * 11.  Version information.
 * 12. Open the URL on github.
 ### Introduction to options
 If you need more effects,  use '--option your-parameters' to enter what you need.
 * Base
 |    Option    |                Description                 |                 Default                 |
 | :----------: | :----------------------------------------: | :-------------------------------------: |
 |  --use_gpu   |           if -1, do not use gpu            |                    0                    |
 | --media_path |         your videos or images path         |            ./imgs/ruoruo.jpg            |
 |    --mode    | program running mode(auto/clean/add/style) |                 'auto'                  |
 | --model_path |           pretrained model path            | ./pretrained_models/mosaic/add_face.pth |
 | --result_dir |      output media will be saved here       |                ./result                 |
 |    --fps     |     read and output fps, if 0-> origin     |                    0                    |
 * AddMosaic
 |      Option      |                         Description                          | Default  |
 | :--------------: | :----------------------------------------------------------: | :------: |
 |   --mosaic_mod   | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random | squa_avg |
 |  --mosaic_size   |                mosaic size,if 0 -> auto size                 |    0     |
 |  --mask_extend   |                      extend mosaic area                      |    10    |
 | --mask_threshold |         threshold of recognize mosaic position 0~255         |    64    |
 * CleanMosaic
 |    Option     |                         Description                          | Default |
 | :-----------: | :----------------------------------------------------------: | :-----: |
 | --traditional | if specified, use traditional image processing methods to clean mosaic |         |
 |   --tr_blur   | ksize of blur when using traditional method, it will affect final quality |   10    |
 |   --tr_down   | downsample when using traditional method,it will affect final quality |   10    |
 | --medfilt_num |        medfilt window of mosaic movement in the video        |   11    |
 * Style Transfer
 |    Option     |             Description              | Default |
 | :-----------: | :----------------------------------: | :-----: |
 | --output_size | size of output media, if 0 -> origin |   512   |
\ No newline at end of file
--- a/docs/exe_help_CN.md
+++ b/docs/exe_help_CN.md
 ## DeepMosaics.exe  使用说明
 下载程序以及预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs)  [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br>
 注意事项:<br>
  - 程序的运行要求在64位Windows操作系统,我仅在Windows10运行过,其他版本暂未经过测试<br>
  - 请根据需求选择合适的预训练模型进行测试<br>
  - 运行时间取决于电脑性能,对于视频文件,我们建议使用源码以及GPU运行<br>
  - 如果输出的视频无法播放,这边建议您尝试[potplayer](https://daumpotplayer.com/download/).<br>
  - 相比于源码,该版本的更新将会延后.
 ### 如何使用
 * step 1: 选择需要处理的图片或视频
 * step 2: 选择预训练模型(不同的预训练模型有不同的效果)
 * step3:  运行程序并等待
 * step4:  查看结果(储存在result文件夹下)
 ## 预训练模型说明
 当前的预训练模型分为两类——添加/移除马赛克以及风格转换.
 * 马赛克
 |              文件名              |                     描述                      |
 | :------------------------------: | :-------------------------------------------: |
 |           add_face.pth           |           对图片或视频中的脸部打码            |
 |        clean_face_HD.pth         | 对图片或视频中的脸部去码<br>(要求内存 > 8GB). |
 |         add_youknow.pth          |        对图片或视频中的十八禁内容打码         |
 | clean_youknow_resnet_9blocks.pth |        对图片或视频中的十八禁内容去码         |
 |     clean_youknow_video.pth      |           对视频中的十八禁内容去码            |
 |    clean_youknow_video_HD.pth    | 对视频中的十八禁内容去码<br>(要求内存 > 8GB)  |
 * 风格转换
 |          文件名        |                        描述                        |
 | :---------------------: | :-------------------------------------------------------: |
 | style_apple2orange.pth  | 苹果变橙子 |
 | style_orange2apple.pth  | 橙子变苹果 |
 | style_summer2winter.pth |     夏天变冬天     |
 | style_winter2summer.pth | 冬天变夏天 |
 |    style_cezanne.pth    |            转化为Paul Cézanne 的绘画风格            |
 |     style_monet.pth     | 转化为Claude Monet的绘画风格 |
 |     style_ukiyoe.pth     | 转化为Ukiyoe的绘画风格 |
 |     style_vangogh.pth     | 转化为Van Gogh的绘画风格 |
 ### GUI界面注释
 ![image](../imgs/GUI_Instructions.jpg)<br>
 * 1. 选择需要处理的图片或视频
 * 2. 选择预训练模型
 * 3. 程序运行模式  (auto | add | clean | style)
 * 4. 使用GPU (该版本目前不支持GPU,若需要使用GPU请使用源码运行).
 * 5. 限制输出的视频帧率(0->原始帧率).
 * 6. 更多的选项以及参数
 * 7. 自行输入更多参数，详见下文
 * 8. 运行
 * 9. 打开帮助文件
 * 10.  支持我们
 * 11.  版本信息
 * 12. 打开项目的github页面
 ### 参数说明
 如果需要更多的效果,  请按照 '--option your-parameters' 输入所需要的参数
 * 基本
 |    选项    |        描述         |                 默认                 |
 | :----------: | :------------------------: | :-------------------------------------: |
 |  --use_gpu   |   if -1, do not use gpu    |                    0                    |
 | --media_path | 需要处理的视频或者照片的路径 |            ./imgs/ruoruo.jpg            |
 |    --mode    |    运行模式(auto/clean/add/style)    |                 'auto'                  |
 | --model_path |   预训练模型的路径    | ./pretrained_models/mosaic/add_face.pth |
 | --result_dir | 保存路径 |                 ./result          |
 |    --fps    |    限制视频输出的fps，0则为默认    |                 0                  |
 *  添加马赛克
 |    选项    |        描述       |                 默认                 |
 | :----------: | :------------------------: | :-------------------------------------: |
 | --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random |                    squa_avg                    |
 | --mosaic_size | 马赛克大小，０则为自动 |            0            |
 |    --mask_extend    |    拓展马赛克区域    |         10  |
 | --mask_threshold | 马赛克区域识别阈值 0~255 | 64 |
 * 去除马赛克
 |    选项    |        描述       |                 默认                 |
 | :----------: | :------------------------: | :-------------------------------------: |
 | --traditional | 如果输入这个参数则使用传统方法清除马赛克 |                                        |
 | --tr_blur | 传统方法模糊尺寸 |            10            |
 |    --tr_down    |    传统方法下采样尺寸    |         10  |
 | --medfilt_num | medfilt window of mosaic movement in the video | 11 |
 * 风格转换
 |    选项    |        描述       |                 默认                 |
 | :----------: | :------------------------: | :-------------------------------------: |
 | --output_size | 输出媒体的尺寸，如果是０则为原始尺寸 |512|
\ No newline at end of file
--- a/docs/how_to_train.md
+++ b/docs/how_to_train.md
+### make datasets
--- a/docs/options_introduction.md
+++ b/docs/options_introduction.md
 ## Introduction to options
 If you need more effects,  use '--option your-parameters' to enter what you need.
 ### Base
 |    Option    |        Description         |                 Default                 |
 | :----------: | :------------------------: | :-------------------------------------: |
 |  --use_gpu   |   if -1, do not use gpu    |                    0                    |
 | --media_path | your videos or images path |            ./imgs/ruoruo.jpg            |
 |    --mode    |    program running mode(auto/clean/add/style)    |                 'auto'                  |
 | --model_path |   pretrained model path    | ./pretrained_models/mosaic/add_face.pth |
 | --result_dir |  output media will be saved here|                 ./result          |
 |    --fps    |    read and output fps, if 0-> origin    |                 0                  |
 ### AddMosaic
 |    Option    |        Description         |                 Default                 |
 | :----------: | :------------------------: | :-------------------------------------: |
 | --mosaic_mod | type of mosaic -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random |                    squa_avg                    |
 | --mosaic_size | mosaic size,if 0 -> auto size |            0            |
 |    --mask_extend    |    extend mosaic area    |         10  |
 | --mask_threshold | threshold of recognize mosaic position 0~255 | 64 |
 ### CleanMosaic
 |    Option    |        Description         |                 Default                 |
 | :----------: | :------------------------: | :-------------------------------------: |
 | --traditional | if specified, use traditional image processing methods to clean mosaic |                                        |
 | --tr_blur | ksize of blur when using traditional method, it will affect final quality |            10            |
 |    --tr_down    |    downsample when using traditional method,it will affect final quality    |         10  |
 | --medfilt_num | medfilt window of mosaic movement in the video | 11 |
 ### Style Transfer
 |    Option    |        Description         |                 Default                 |
 | :----------: | :------------------------: | :-------------------------------------: |
 | --output_size | size of output media, if 0 -> origin |512|
\ No newline at end of file
--- a/docs/options_introduction_CN.md
+++ b/docs/options_introduction_CN.md
 ## 参数说明
 如果需要更多的效果,  请按照 '--option your-parameters' 输入所需要的参数
 ### 基本
 |    选项    |        描述         |                 默认                 |
 | :----------: | :------------------------: | :-------------------------------------: |
 |  --use_gpu   |   if -1, do not use gpu    |                    0                    |
 | --media_path | 需要处理的视频或者照片的路径 |            ./imgs/ruoruo.jpg            |
 |    --mode    |    运行模式(auto/clean/add/style)    |                 'auto'                  |
 | --model_path |   预训练模型的路径    | ./pretrained_models/mosaic/add_face.pth |
 | --result_dir | 保存路径 |                 ./result          |
 |    --fps    |    限制视频输出的fps，0则为默认    |                 0                  |
 ### 添加马赛克
 |    选项    |        描述       |                 默认                 |
 | :----------: | :------------------------: | :-------------------------------------: |
 | --mosaic_mod | 马赛克类型 -> squa_avg/ squa_random/ squa_avg_circle_edge/ rect_avg/random |                    squa_avg                    |
 | --mosaic_size | 马赛克大小，０则为自动 |            0            |
 |    --mask_extend    |    拓展马赛克区域    |         10  |
 | --mask_threshold | 马赛克区域识别阈值 0~255 | 64 |
 ### 去除马赛克
 |    选项    |        描述       |                 默认                 |
 | :----------: | :------------------------: | :-------------------------------------: |
 | --traditional | 如果输入这个参数则使用传统方法清除马赛克 |                                        |
 | --tr_blur | 传统方法模糊尺寸 |            10            |
 |    --tr_down    |    传统方法下采样尺寸    |         10  |
 | --medfilt_num | medfilt window of mosaic movement in the video | 11 |
 ### 风格转换
 |    选项    |        描述       |                 默认                 |
 | :----------: | :------------------------: | :-------------------------------------: |
 | --output_size | 输出媒体的尺寸，如果是０则为原始尺寸 |512|
\ No newline at end of file
--- a/docs/pre-trained_models_introduction.md
+++ b/docs/pre-trained_models_introduction.md
 ## Introduction to pre-trained models
 The current pre-trained models are divided into two categories(Add/Clean mosaic and StyleTransfer).
 Download  pre-trained model via [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs)  [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br>
 ### Mosaic
-|               Name               |                         Description                         |
+|               Name               |                       Description                       |
-| :------------------------------: | :---------------------------------------------------------: |
+| :------------------------------: | :-----------------------------------------------------: |
-|           add_face.pth           |          Add mosaic to all faces in images/videos.          |
+|           add_face.pth           |         Add mosaic to  faces in images/videos.          |
-|        clean_face_HD.pth         | Clean mosaic to all faces in images/video.<br>(RAM > 8GB).  |
+|        clean_face_HD.pth         | Clean mosaic to  faces in images/video.<br>(RAM > 8GB). |
-|         add_youknow.pth          |      Add mosaic to all (FBI Warning) in images/videos.      |
+|         add_youknow.pth          |          Add mosaic to  ... in images/videos.           |
-| clean_youknow_resnet_9blocks.pth |     Clean mosaic to all (FBI Warning) in images/videos.     |
+| clean_youknow_resnet_9blocks.pth |         Clean mosaic to  ... in images/videos.          |
-|     clean_youknow_video.pth      |        Clean mosaic to all (FBI Warning) in videos.         |
+|     clean_youknow_video.pth      |             Clean mosaic to  ... in videos.             |
-|    clean_youknow_video_HD.pth    | Clean mosaic to all (FBI Warning) in videos.<br>(RAM > 8GB) |
+|    clean_youknow_video_HD.pth    |     Clean mosaic to  ... in videos.<br>(RAM > 8GB)      |
 ### Style Transfer
 |          Name           |                        Description                        |
 | :---------------------: | :-------------------------------------------------------: |
 | style_apple2orange.pth  | Convert apples to oranges. |
 | style_orange2apple.pth  | Convert oranges to apples |
 | style_summer2winter.pth |     Convert summer to winter.     |
 | style_winter2summer.pth | Convert winter to summer. |
 |    style_cezanne.pth    |            Convert photos/video to Paul Cézanne style.            |
 |     style_monet.pth     | Convert photos/video to Claude Monet style. |
 |     style_ukiyoe.pth     | Convert photos/video to Ukiyoe style. |
 |     style_vangogh.pth     | Convert photos/video to Van Gogh style. |
--- a/docs/pre-trained_models_introduction_CN.md
+++ b/docs/pre-trained_models_introduction_CN.md
 ## 预训练模型说明
 当前的预训练模型分为两类——添加/移除马赛克以及风格转换.
 可以通过以下方式下载预训练模型 [[Google Drive]](https://drive.google.com/open?id=1LTERcN33McoiztYEwBxMuRjjgxh4DEPs)  [[百度云,提取码1x0a]](https://pan.baidu.com/s/10rN3U3zd5TmfGpO_PEShqQ) <br>
 ### 马赛克
 |              文件名              |                     描述                      |
 | :------------------------------: | :-------------------------------------------: |
 |           add_face.pth           |           对图片或视频中的脸部打码            |
 |        clean_face_HD.pth         | 对图片或视频中的脸部去码<br>(要求内存 > 8GB). |
-|         add_youknow.pth          |        对图片或视频中的十八禁内容打码         |
+|         add_youknow.pth          |          对图片或视频中的...内容打码          |
-| clean_youknow_resnet_9blocks.pth |        对图片或视频中的十八禁内容去码         |
+| clean_youknow_resnet_9blocks.pth |          对图片或视频中的...内容去码          |
-|     clean_youknow_video.pth      |           对视频中的十八禁内容去码            |
+|     clean_youknow_video.pth      |             对视频中的...内容去码             |
-|    clean_youknow_video_HD.pth    | 对视频中的十八禁内容去码<br>(要求内存 > 8GB)  |
+|    clean_youknow_video_HD.pth    |   对视频中的...内容去码<br>(要求内存 > 8GB)   |
 ### 风格转换
 |          文件名        |                        描述                        |
 | :---------------------: | :-------------------------------------------------------: |
 | style_apple2orange.pth  | 苹果变橙子 |
 | style_orange2apple.pth  | 橙子变苹果 |
 | style_summer2winter.pth |     夏天变冬天     |
 | style_winter2summer.pth | 冬天变夏天 |
 |    style_cezanne.pth    |            转化为Paul Cézanne 的绘画风格            |
 |     style_monet.pth     | 转化为Claude Monet的绘画风格 |
 |     style_ukiyoe.pth     | 转化为Ukiyoe的绘画风格 |
 |     style_vangogh.pth     | 转化为Van Gogh的绘画风格 |
--- a/docs/training_with_your_own_dataset.md
+++ b/docs/training_with_your_own_dataset.md
+# Training with your own dataset
+Training with your own dataset requires a GPU with 6G memory (above GTX1060).<br>
+We will make "face" as an example. If you don't have any picture, you can download [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) or [WIDER](http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/WiderFace_Results.html).
+## Getting Started
+#### Prerequisites
+  - Linux, Mac OS, Windows
+  - Python 3.6+
+  - [ffmpeg 3.4.6](http://ffmpeg.org/)
+  - [Pytorch 1.0+](https://pytorch.org/)
+  - NVIDIA GPU(with more than 6G memory) + CUDA CuDNN<br>
+#### Dependencies
+This code depends on opencv-python, torchvision, matplotlib available via pip install.
+#### Clone this repo
+```bash
+git clone https://github.com/HypoX64/DeepMosaics
+cd DeepMosaics
+```
+## Make training datasets
+```bash
+cd make_datasets
+```
+### Add mosaic dataset
+Please generate mask from images which you want to add mosaic(number of images should be above 1000). And then put the images in ```face/origin_image```, and masks in ```face/mask```.<br>
+* You can use ```draw_mask.py```to generate them.
+```bash
+python draw_mask.py --datadir 'dir for your pictures' --savedir ../datasets/draw/face
+#Press the left mouse button to draw the mask .  Press 'S' to save mask, 'A' to reduce  brush size, 'D' to increase brush size, 'W' to cancel drawing.
+```
+* If you want to get images from videos, you can use ```get_image_from_video.py```
+```bash
+python get_image_from_video.py --datadir 'dir for your videos' --savedir ../datasets/video2image --fps 1
+```
+### Clean mosaic dataset
+We provide several methods for generating clean mosaic datasets. However, for better effect, we recommend train a addmosaic model in a small data  first and use it to automatically generate datasets in a big data.(recommend: Method 2(for image) & Method 4(for video))
+* Method 1: Use drawn mask to make pix2pix(HD) datasets(Require``` origin_image``` and ```mask```)
+```bash
+python make_pix2pix_dataset.py --datadir ../datasets/draw/face --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod drawn --minsize 128 --square
+```
+* Method 2: Use addmosaic model to make pix2pix(HD) datasets(Require addmosaic pre-trained model)
+```bash
+python make_pix2pix_dataset.py --datadir 'dir for your pictures' --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod network --model_path ../pretrained_models/mosaic/add_face.pth --minsize 128 --square --mask_threshold 128
+```
+* Method 3: Use Irregular Masks to make pix2pix(HD) datasets(Require [Irregular Masks](https://nv-adlr.github.io/publication/partialconv-inpainting))
+```bash
+python make_pix2pix_dataset.py --datadir 'dir for your pictures' --hd --outsize 512 --fold 1 --name face --savedir ../datasets/pix2pix/face --mod irregular --irrholedir ../datasets/Irregular_Holes_mask --square
+```
+* Method 4: Use addmosaic model to make video datasets(Require addmosaic pre-trained model. This is better for processing video mosaics)
+```bash
+python make_video_dataset.py --datadir 'dir for your videos' --model_path ../pretrained_models/mosaic/add_face.pth --mask_threshold 96 --savedir ../datasets/video/face
+```
+## Training
+### Add
+```bash
+cd train/add
+python train.py --gpu_id 0 --dataset ../../datasets/draw/face --savename face --loadsize 512 --finesize 360 --batchsize 16
+```
+### Clean
+* For image datasets(generated by ```make_pix2pix_dataset.py```)
+We use [pix2pix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) or [pix2pixHD](https://github.com/NVIDIA/pix2pixHD) to train model. We just take pix2pixHD as an example.
+```bash
+git clone https://github.com/NVIDIA/pix2pixHD
+cd pix2pixHD
+pip install dominate
+python train.py --name face --resize_or_crop resize_and_crop --loadSize 563 --fineSize 512 --label_nc 0 --no_instance --dataroot ../datasets/pix2pix/face
+```
+* For video datasets(generated by ```make_video_dataset.py```)
+```bash
+cd train/clean
+python train.py --dataset ../../datasets/video/face --savename face --savefreq 100000 --gan --hd --lr 0.0002 --lambda_gan 1 --gpu_id 0 --perload_num 8
+```
+## Testing
+Put saved network to ```./pretrained_models/mosaic/``` and rename it as ```add_face.pth``` or ```clean_face_HD.pth``` or ```clean_face_video_HD.pth```
--- a/make_datasets/csv/video_used_time.csv
+++ b/make_datasets/csv/video_used_time.csv
-010412_249-1pon-whole1_hd.avi,00:12:00,00:13:33,00:14:26,00:15:06,00:19:35,00:24:30,00:25:53,00:29:29,00:29:55,00:30:30,00:31:43,00:32:54,00:33:39,00:35:55,00:38:30,00:38:49,00:39:47,00:41:15,00:42:35,00:43:15,00:43:50,00:45:30,00:46:33,00:47:35,00:49:10,00:49:20,00:51:04,00:51:20,00:53:10,00:55:05
-011013_511-1pon-whole1_hd.avi,00:16:09,00:16:43,00:19:12,00:19:54,00:24:52,00:26:23,00:29:20,00:31:40,00:32:16,00:36:45,00:37:15,00:37:35,00:38:00,00:38:40,00:41:40,00:46:09,00:57:50,00:58:10
-012514_744-1pon-whole1_hd.mp4,00:08:12,00:12:00,00:12:30,00:17:40,00:19:35,00:20:50,00:21:50,00:24:35,00:29:10,00:30:25,00:33:10,00:39:35,00:40:35,00:42:25,00:42:35,00:57:05,00:58:25,00:59:15
-020916_242-1pon-1080p.mp4,00:13:35,00:15:10,00:18:20,00:26:50,00:31:25,00:33:15,00:34:55,00:37:15,00:38:25,00:39:35,00:41:05,00:41:55,00:42:10,00:43:10,00:43:20,00:45:15,00:45:20,00:46:10,00:47:50,00:49:10,00:50:00,00:50:20,00:52:10,00:56:55,00:57:05,00:57:35,00:59:15,00:59:30
-031516_262-1pon-1080p.mp4,00:09:30,00:13:00,00:13:50,00:14:50,00:16:00,00:20:55,00:25:50,00:26:35,00:30:30,00:32:40,00:38:20,00:38:30,00:39:55,00:42:10,00:43:45,00:45:40,00:46:20,00:47:50,00:48:05,00:49:50,00:51:45,00:51:50,00:57:00
-031716_001-1pon-1080p.mp4,00:02:30,00:02:40,00:02:55,00:04:00,00:04:20,00:05:40,00:06:05,00:06:50,00:08:10,00:08:20,00:08:30,00:08:47,00:10:00,00:10:05,00:10:20,00:10:30,00:11:50,00:12:00,00:12:35,00:13:20,00:14:20,00:15:35
-032113_554-1pon-whole1_hd.avi,00:13:20,00:21:20,00:23:15,00:23:35,00:24:00,00:25:10,0:25:30,00:25:50,00:26:35,00:26:50,00:31:40,00:35:15,00:35:25,00:37:10,00:45:35,00:46:05,00:48:00,00:49:50,00:50:30,00:51:50,00:52:30,00:52:40,00:52:50,00:58:20,00:58:30,00:59:30,00:59:45,01:01:45,01:02:00,01:03:50,01:04:05,01:04:20,01:04:30,01:05:35,01:07:40
-032313_556-1pon-whole1_hd.avi,00:04:05,00:05:00,00:06:40,00:06:50,00:07:50,00:09:10,00:10:30,00:13:15,00:16:05,00:17:35,00:18:20,00:20:25,00:20:30,00:22:30,00:26:50,00:27:30,00:35:30,00:42:40,00:44:09,00:50:00,00:52:50,00:53:40,00:54:15,00:58:00,00:58:25,01:04:05,01:05:05,01:06:15,01:06:50,01:07:51,01:08:10
-032715_001-1pon-1080p.mp4,00:09:20,00:10:35,00:10:45,00:13:25,00:21:20,00:24:50,00:28:10,00:29:26,00:29:52,00:30:55,00:31:10,00:31:55,00:32:20,00:32:40,00:33:10,00:34:30,00:35:40,00:35:50,00:48:30,00:48:50,00:49:45,00:50:15,00:53:55,00:57:13,00:57:20,00:59:00,00:59:55
-032715_004-1pon-1080p.mp4,00:22:30,00:22:55,00:24:44,00:26:15,00:28:00,00:28:40,00:30:40,00:35:40,00:38:20,00:38:50,00:39:50,00:41:30,00:42:10,00:42:30,00:43:40,00:44:05,00:44:35,00:45:17,00:45:36,00:46:23,00:46:55,00:47:20,00:47:40,00:48:05,00:48:30,00:50:50,00:52:00,00:53:30,00:53:45,00:54:25,00:54:45,00:57:40,00:58:00,00:58:40,00:58:50
-040111_063-1pon-whole1_hd.avi,00:08:25,00:08:45,00:09:00,00:10:55,00:16:40,00:17:05,00:17:35,00:19:10,00:27:00,00:28:05,00:29:05,00:31:40,00:36:00,00:37:50,00:45:30,00:46:15,00:47:45,00:50:15,00:52:50,00:53:47,00:53:58,00:55:05,00:56:15,00:58:40,00:59:00,00:59:20,00:59:45
-040814_786-1pon-whole1_hd.avi,00:04:40,00:05:00,00:06:50,00:10:20,00:21:00,00:23:35,00:24:10,00:26:40,00:28:35,00:29:15,00:29:20,00:31:15,00:32:50,00:36:10,00:39:40,00:42:00,00:42:50,00:44:00,00:44:15,00:44:36,00:45:00,00:45:20,00:47:20,00:48:10,00:48:30,00:53:50,00:54:43,00:55:20,00:59:15,00:59:30
-050915_077-1pon-1080p,00:11:00,00:12:30,00:19:20,00:19:50,00:21:00,00:22:00,00:23:40,00:24:30,00:28:20,00:33:50,00:36:00,00:37:30,00:38:50,00:39:30,00:41:50,00:44:20,00:48:45,00:49:25,00:50:45,00:51:00,00:53:05,00:54:00,00:54:27,00:57:30,00:59:10,01:00:30,01:04:10,01:04:20,01:04:30,01:04:50,01:05:20
-052215_084-1pon-1080p.mp4,00:26:50,00:27:15,00:30:20,00:33:20,00:34:00,00:37:00,00:41:00,00:43:00,00:44:30,00:47:40,00:50:35,00:50:40,00:51:40,00:55:20,00:55:50,00:55:55,00:56:20,00:57:30,00:57:40,00:59:10,00:59:15,01:00:05
-062015_101-1pon-1080p.mp4,00:11:00,00:12:47,00:13:10,00:14:20,00:15:20,00:16:20,00:17:10,00:17:25,00:19:45,00:21:05,00:23:40,00:27:40,00:28:10,00:37:15,00:41:30,00:43:20,00:44:25,00:46:51,00:47:20,00:49:00,00:50:40,00:51:50,00:52:50,00:55:00,00:56:20,00:58:10,00:59:00,10:00:00
-062715_105-1pon-1080p.mp4,00:11:30,00:11:55,00:12:00,00:12:30,00:13:45,00:16:50,00:18:25,00:19:20,00:20:40,00:25:15,00:36:20,00:36:40,00:37:25,00:39:05,00:39:50,00:40:55,00:41:55,00:45:40,00:43:30,00:44:15,00:45:30,00:47:40,00:50:05,00:50:10,00:50:20,00:50:30,00:55:10,00:56:35,00:58:40,01:00:15,01:05:05,01:05:15,01:05:30,01:05:50
-1pondo_070315_108_1080p.mp4,00:11:10,00:11:50,00:13:50,00:14:20,00:14:35,00:15:50,00:17:20,00:18:35,00:20:45,00:24:35,00:25:05,00:29:15,00:30:40,00:31:55,00:35:20,00:42:55,00:43:05,00:46:15,00:48:00,00:51:45,00:52:33,00:54:20,00:59:25,00:59:40,01:00:05
-071114_842-1pon-whole1_hd.mp4,00:09:50,00:11:25,00:16:35,00:18:20,00:22:10,00:25:25,00:26:35,00:33:50,00:35:40,00:43:10
-071715_116-1pon-1080p.mp4,00:10:50,00:11:30,00:12:50,00:15:10,00:16:45,00:17:05,00:25:20,00:26:45,00:28:30,00:30:20,00:32:55,00:34:30,00:37:40,00:38:40,00:40:20,00:41:20,00:44:10,00:47:15,00:55:00,00:59:40,00:59:50
-071815_117-1pon-1080p.mp4,00:14:50,00:15:10,00:18:05,00:14:50,00:25:55,00:26:25,00:32:45,00:33:40,00:43:15,00:45:05,00:45:45,00:48:40,00:48:50,00:55:45,10:00:20,01:00:35,01:01:00,01:01:10
-080815_130-1pon-1080p,00:14:50,00:17:15,00:17:20,00:23:55,00:25:30,00:25:55,00:28:20,00:28:30,00:30:10,00:31:00,00:33:25,00:33:35,00:33:45,00:33:50,00:39:25,00:39:50,00:40:25,00:44:05,00:45:00,00:45:40,00:45:50,00:46:55,00:49:15,00:49:25,00:46:40,00:50:10,00:50:15,00:51:25,00:51:50,00:53:14,00:53:20,00:54:15,00:56:15,00:56:25,00:56:45,00:57:45,00:57:30,00:58:00,00:56:45,00:56:55,01:00:00,01:00:05,01:00:25,01:00:30
-081514_863-1pon-whole1_hd.avi,00:10:30,00:26:00,00:30:00,00:38:21,00:40:15,00:40:30,00:49:10,00:50:05,00:57:10,00:59:00
-090614_877-1pon-whole1_hd.mp4,00:04:45,00:05:15,00:12:25,00:12:40,00:15:00,00:15:15,00:16:25,00:20:50,00:21:45,00:26:10,00:33:35,00:35:55,00:37:50,00:37:55,00:38:12,00:39:55,00:41:50,00:44:27,00:44:37,00:46:30,00:47:35,00:47:40,00:48:20,00:59:50
-091215_152-1pon-1080p.mp4,00:05:30,00:06:10,00:06:20,00:08:15,00:10:10,00:11:15,00:12:15,00:12:55,0:15:15,00:15:35,00:18:00,00:24:45,00:25:45,00:33:45,00:35:32,00:37:35,00:37:55,00:38:50,00:42:15,00:45:00,00:47:55,00:48:20,00:48:35,00:48:42,00:49:43,00:50:15,00:51:10,00:55:35,00:57:00,00:57:55,01:03:30,01:05:00
-092813_670-1pon-whole1_hd.avi,00:16:32,00:19:00,00:22:10,00:23:20,00:23:40,00:30:20,00:32:00,00:35:00,00:36:50,00:41:40,00:44:50,00:52:45,00:54:00
-103015_180-1pon-1080p.mp4,00:24:50,00:31:25,00:41:20,00:48:10,00:48:50,00:49:20,00:50:15,00:52:45,00:53:30,01:02:40,01:03:35,01:09:50,01:15:05,01:16:50
-110615_185-1pon-1080p.mp4,00:15:00,00:15:40,00:34:15,00:34:50,00:35:30,00:37:05,00:39:35,00:40:30,00:41:40,00:47:35,00:50:15,00:51:01,00:51:35,00:54:15,00:55:40,00:55:50,00:57:20,00:59:35,01:00:00,01:00:25
-120310_979-1pon-whole1_hd.avi,00:15:10,00:14:25,00:14:30,00:14:50,00:15:45,00:16:35,00:16:55,00:17:25,00:19:25,00:20:45,00:27:05,00:30:17,00:32:00,00:33:50,00:35:45,00:38:55,00:40:25,00:40:40,00:41:10,00:42:50,00:44:35,00:45:15,00:46:15,00:48:00,00:49:10,00:50:10,00:54:00,00:55:23,00:55:30,00:55:50
-021315-806-carib-1080p.mp4,00:13:30,00:15:20,00:17:40,00:21:50,00:22:25,00:24:35,00:28:50,00:28:52,00:31:00,00:37:25,00:37:35,00:38:20,00:38:45,00:43:30,00:48:35,00:51:30,00:51:50,00:52:19,00:56:20,00:58:35
-021715-809-carib-1080p.mp4,00:17:30,00:20:35,00:21:00,00:22:00,00:23:55,00:24:15,00:28:40,00:37:20,00:39:05,00:40:05,00:40:50,00:42:45,00:45:00,00:46:40,00:48:00,00:48:20,00:51:30,00:52:10,00:53:35,00:54:10,00:54:20,00:56:45,00:56:55,00:59:10,00:59:35,00:59:55
-022715-817-carib-1080p.mp4,00:57:52,00:08:50,00:10:00,00:12:50,00:14:05,00:18:25,00:20:45,00:20:57,00:22:15,00:23:30,00:23:55,00:24:18,00:24:50,00:25:25,00:26:30,00:26:55,00:28:50,00:31:55,00:34:00,00:34:35,00:42:45,00:44:33
-030914-558-carib-high_1.mp4,00:10:45,00:12:45,00:14:40,00:16:33,00:19:40,00:21:35,00:21:55,00:23:05,00:26:15,00:27:30,00:29:55,00:31:10,00:31:40,00:36:40,00:41:40,00:42:40,00:44:50,00:49:50,00:52:25,00:53:50,00:54:30,00:55:20,00:55:10,00:57:05,00:57:25,00:59:05,01:00:15,01:02:11,01:03:55,01:05:10
-031815-830-carib-1080p.mp4,00:13:15,00:13:25,00:13:55,00:14:40,00:15:40,00:17:30,00:18:20,00:19:10,00:21:00,00:22:10,00:22:25,00:23:25,00:27:10,00:28:33,00:35:05,00:35:40,00:37:50,00:38:00,00:39:35,00:41:35,00:42:40,00:47:40,00:50:33,00:55:50,01:02:10,01:05:20,01:05:30
-032016-121-carib-1080p.mp4,00:27:20,00:28:40,00:28:55,00:30:35,00:36:10,00:39:10,00:40:30,00:43:00,00:46:05,00:50:00,00:56:05,00:56:20,00:59:20
-032913-301-carib-whole_hd1.wmv,00:06:00,00:09:40,00:11:00,00:13:00,00:15:05,00:16:40,00:18:05,00:20:00,00:39:31,00:34:35,00:44:50,00:47:25,00:49:50,00:51:20,00:54:58,00:56:55,00:59:50,01:00:50
-032914-571-carib-high_1.mp4,00:13:30,00:13:55,00:16:40,00:15:25,00:20:40,00:26:45,00:32:05,00:33:15,00:36:40,00:38:55,00:39:00,00:39:25,00:47:30,00:49:20
-042514-588-carib-high_1.mp4,00:10:30,00:11:15,00:19:15,00:20:00,00:20:30,00:22:05,00:22:45,00:22:53,00:24:15,00:30:50,00:32:25,00:34:15,00:34:45,00:34:55,0:36:05,00:37:20,00:37:40,00:38:30,00:39:35,00:41:00,00:43:30,00:43:40
-052315-884-carib-1080p.mp4,00:09:35,00:14:10,00:14:30,00:14:40,00:17:10,00:17:50,00:19:00,00:20:20,01:21:55,00:22:40,00:23:05,00:24:00,00:26:00,00:27:15,00:30:25,00:32:50,00:37:55,0:39:35,00:40:10,00:41:40,00:43:15,00:43:40,00:47:55,00:49:30,00:49:55,00:58:55,01:00:40
-053114-612-carib-high_1.mp4,00:08:35,00:13:35,00:15:25,00:16:40,00:20:35,00:22:25,00:26:10,00:29:10,00:32:55,00:34:10,00:37:05,00:37:40,00:39:40,00:40:52,00:42:08,00:42:15
-062615-908-carib-1080p.mp4,00:13:45,00:14:40,00:15:45,00:16:11,00:17:00,00:22:10,00:23:40,00:26:10,00:27:15,00:27:50,00:31:30,00:35:00,00:40:20,00:43:10,00:44:35,00:47:17,00:50:25,00:51:15,00:52:20,00:54:10,00:55:30,01:00:20
\ No newline at end of file
--- a/make_datasets/draw_mask.py
+++ b/make_datasets/draw_mask.py
@@ -6,18 +6,25 @@ import random
 import sys
 sys.path.append("..")
+from cores import Options
 from util import util
 from util import image_processing as impro
-image_dir = './datasets_img/v2im'
-mask_dir = './datasets_img/v2im_mask'
-util.makedirs(mask_dir)
-files = os.listdir(image_dir)
+opt = Options()
-files_new =files.copy()
+opt.parser.add_argument('--datadir',type=str,default=' ', help='your images dir')
-print('find image:',len(files))
+opt.parser.add_argument('--savedir',type=str,default='../datasets/draw/face', help='')
-masks = os.listdir(mask_dir)
+opt = opt.getparse()
-print('mask:',len(masks))
+mask_savedir = os.path.join(opt.savedir,'mask')
+img_savedir = os.path.join(opt.savedir,'origin_image')
+util.makedirs(mask_savedir)
+util.makedirs(img_savedir)
+filepaths = util.Traversal(opt.datadir)
+filepaths = util.is_imgs(filepaths)
+random.shuffle(filepaths)
+print('find image:',len(filepaths))
 # mouse callback function
 drawing = False # true if mouse is pressed
@@ -32,68 +39,58 @@ def draw_circle(event,x,y,flags,param):
    elif event == cv2.EVENT_MOUSEMOVE:
        if drawing == True:
-            cv2.circle(img,(x,y),brushsize,(0,255,0),-1)
+            cv2.circle(img_drawn,(x,y),brushsize,(0,255,0),-1)
    elif event == cv2.EVENT_LBUTTONUP:
        drawing = False
-        cv2.circle(img,(x,y),brushsize,(0,255,0),-1)
+        cv2.circle(img_drawn,(x,y),brushsize,(0,255,0),-1)
-def makemask(img):
+def makemask(img_drawn):
    # starttime = datetime.datetime.now()
-    mask = np.zeros(img.shape, np.uint8)
+    mask = np.zeros(img_drawn.shape, np.uint8)
-    for row in range(img.shape[0]):
+    for row in range(img_drawn.shape[0]):
-        for col in range(img.shape[1]):
+        for col in range(img_drawn.shape[1]):
-            # if (img[row,col,:] == [0,255,0]).all(): #too slow
+            # if (img_drawn[row,col,:] == [0,255,0]).all(): #too slow
-            if img[row,col,0] == 0:
+            if img_drawn[row,col,0] == 0:
-                if img[row,col,1] == 255:
+                if img_drawn[row,col,1] == 255:
-                    if img[row,col,2] == 0:
+                    if img_drawn[row,col,2] == 0:
                        mask[row,col,:] = [255,255,255]
-    # endtime = datetime.datetime.now()
-    # print('Cost time:',(endtime-starttime))
    return mask
-for i in range(len(masks)):
-    masks[i]=masks[i].replace('.png','.jpg')
-for file in files:
-    if file  in masks:
-        files_new.remove(file)
-files = files_new
-# files = list(set(files)) #Distinct 
-print('remain:',len(files))
-random.shuffle(files)
-# files.sort()
 cnt = 0
+for file in filepaths:
+    try:
+        cnt += 1
+        img = impro.imread(file,loadsize=512)
+        img_drawn = img.copy()
+        cv2.namedWindow('image')
+        cv2.setMouseCallback('image',draw_circle) #MouseCallback
+        while(1):
-for file in files:
+            cv2.imshow('image',img_drawn)
-    cnt += 1
+            k = cv2.waitKey(1) & 0xFF
-    img = cv2.imread(os.path.join(image_dir,file))
+            if k == ord('s'):
-    img = impro.resize(img,512)
-    cv2.namedWindow('image')
+                img_drawn = impro.resize(img_drawn,256)
-    cv2.setMouseCallback('image',draw_circle) #MouseCallback
+                mask = makemask(img_drawn)
-    while(1):
+                cv2.imwrite(os.path.join(mask_savedir,os.path.splitext(os.path.basename(file))[0]+'.png'),mask)
+                cv2.imwrite(os.path.join(img_savedir,os.path.basename(file)),img)   
-        cv2.imshow('image',img)
+                print('Saved:',os.path.join(mask_savedir,os.path.splitext(os.path.basename(file))[0]+'.png'),mask)
-        k = cv2.waitKey(1) & 0xFF
+                # cv2.destroyAllWindows()
-        if k == ord(' '):
+                print('remain:',len(filepaths)-cnt)
-            img = impro.resize(img,256)
+                brushsize = 20
-            mask = makemask(img)
+                break
-            cv2.imwrite(os.path.join(mask_dir,os.path.splitext(file)[0]+'.png'),mask)
+            elif k == ord('a'):
-            print(os.path.join(mask_dir,os.path.splitext(file)[0]+'.png'))
+                brushsize -= 5
-            # cv2.destroyAllWindows()
+                if brushsize<5:
-            print('remain:',len(files)-cnt)
+                    brushsize = 5
-            brushsize = 20
+                print('brushsize:',brushsize)
-            break
+            elif k == ord('d'):
-        elif k == ord('a'):
+                brushsize += 5
-            brushsize -= 5
+                print('brushsize:',brushsize)
-            if brushsize<5:
+            elif k == ord('w'):
-                brushsize = 5
+                print('remain:',len(filepaths)-cnt)
-            print('brushsize:',brushsize)
+                break
-        elif k == ord('d'):
+    except Exception as e:
-            brushsize += 5
+        print(file,e)
-            print('brushsize:',brushsize)
-        elif k == ord('w'):
-            print('remain:',len(files)-cnt)
-            break
-# cv2.destroyAllWindows()
\ No newline at end of file
--- a/make_datasets/get_image_from_video.py
+++ b/make_datasets/get_image_from_video.py
 import os
-import numpy as np
-import cv2
-import random
-import csv
 import sys
 sys.path.append("..")
+from cores import Options
 from util import util,ffmpeg
-from util import image_processing as impro
-files = util.Traversal('./videos')
+opt = Options()
+opt.parser.add_argument('--datadir',type=str,default='', help='your video dir')
+opt.parser.add_argument('--savedir',type=str,default='../datasets/video2image', help='')
+opt = opt.getparse()
+files = util.Traversal(opt.datadir)
 videos = util.is_videos(files)
-output_dir = './datasets_img/v2im'
-util.makedirs(output_dir)
+util.makedirs(opt.savedir)
-FPS = 1
-util.makedirs(output_dir)
 for video in videos:
-    ffmpeg.continuous_screenshot(video, output_dir, FPS)
+    ffmpeg.continuous_screenshot(video, opt.savedir, opt.fps)
\ No newline at end of file
--- a/make_datasets/make_pix2pix_dataset.py
+++ b/make_datasets/make_pix2pix_dataset.py
+import os
+import random
+import sys
+import datetime
+import time
+import shutil
+import threading
+import warnings
+warnings.filterwarnings(action='ignore')
+import numpy as np
+import cv2
+sys.path.append("..")
+from models import runmodel,loadmodel
+import util.image_processing as impro
+from util import util,mosaic,data
+from cores import Options
+opt = Options()
+opt.parser.add_argument('--datadir',type=str,default='../datasets/draw/face', help='')
+opt.parser.add_argument('--savedir',type=str,default='../datasets/pix2pix/face', help='')
+opt.parser.add_argument('--name',type=str,default='', help='save name')
+opt.parser.add_argument('--mod',type=str,default='drawn', help='drawn | network | irregular | drawn,irregular | network,irregular')
+opt.parser.add_argument('--square', action='store_true', help='if specified, crop to square')
+opt.parser.add_argument('--irrholedir',type=str,default='../datasets/Irregular_Holes_mask', help='')  
+opt.parser.add_argument('--hd', action='store_true', help='if false make dataset for pix2pix, if Ture for pix2pix_HD')
+opt.parser.add_argument('--savemask', action='store_true', help='if specified,save mask')
+opt.parser.add_argument('--outsize', type=int ,default= 512,help='')
+opt.parser.add_argument('--fold', type=int ,default= 1,help='')
+opt.parser.add_argument('--start', type=int ,default= 0,help='')
+opt.parser.add_argument('--minsize', type=int ,default= 128,help='when [square], minimal roi size')
+opt.parser.add_argument('--quality', type=int ,default= 40,help='when [square], minimal quality')
+opt = opt.getparse()
+util.makedirs(opt.savedir)
+util.writelog(os.path.join(opt.savedir,'opt.txt'), 
+              str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt))
+opt.mod = (opt.mod).split(',')
+#save dir
+if opt.hd:
+    train_A_path = os.path.join(opt.savedir,'train_A')
+    train_B_path = os.path.join(opt.savedir,'train_B')
+    util.makedirs(train_A_path)
+    util.makedirs(train_B_path)
+else:
+    train_path = os.path.join(opt.savedir,'train')
+    util.makedirs(train_path)
+if opt.savemask:
+    mask_save_path = os.path.join(opt.savedir,'mask')
+    util.makedirs(mask_save_path)
+#read dir
+if 'drawn' in opt.mod:
+    imgpaths = util.Traversal(os.path.join(opt.datadir,'origin_image'))
+    imgpaths.sort()
+    maskpaths = util.Traversal(os.path.join(opt.datadir,'mask'))
+    maskpaths.sort()
+if 'network' in opt.mod or 'irregular' in opt.mod:
+    imgpaths = util.Traversal(opt.datadir)
+    random.shuffle (imgpaths)
+if 'irregular' in opt.mod:
+    irrpaths = util.Traversal(opt.irrholedir)
+#def network
+if 'network' in opt.mod:
+    net = loadmodel.bisenet(opt,'roi')
+# def checksaveimage(opt,img,mask):
+#     #check
+#     saveflag = True
+#     x,y,size,area = impro.boundingSquare(mask, random.uniform(1.4,1.6))
+#     if area < 1000:
+#         saveflag = False
+#     else:
+#         if opt.square:
+#             if size < opt.minsize:
+#                 saveflag = False
+#             else:
+#                 img = impro.resize(img[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
+#                 mask =  impro.resize(mask[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
+#                 if impro.Q_lapulase(img)<opt.quality:
+#                     saveflag = False         
+#         else:
+#             img = impro.resize(img,opt.outsize,interpolation=cv2.INTER_CUBIC)
+#     if saveflag:
+#         # add mosaic
+#         img_mosaic = mosaic.addmosaic_random(img, mask)
+#         global savecnt
+#         savecnt += 1
+#         if opt.hd:
+#             cv2.imwrite(os.path.join(train_A_path,opt.name+'%06d' % savecnt+'.jpg'), img_mosaic)
+#             cv2.imwrite(os.path.join(train_B_path,opt.name+'%06d' % savecnt+'.jpg'), img)
+#         else:
+#             merge_img = impro.makedataset(img_mosaic, img)
+#             cv2.imwrite(os.path.join(train_path,opt.name+'%06d' % savecnt+'.jpg'), merge_img)
+#         if opt.savemask:
+#             cv2.imwrite(os.path.join(mask_save_path,opt.name+'%06d' % savecnt+'.png'), mask)
+print('Find images:',len(imgpaths))
+starttime = datetime.datetime.now()
+filecnt = 0
+savecnt = opt.start
+for fold in range(opt.fold):
+    for i in range(len(imgpaths)):
+        filecnt += 1
+        try:
+            # load image and get mask
+            img = impro.imread(imgpaths[i])
+            if 'drawn' in opt.mod:
+                mask_drawn = impro.imread(maskpaths[i],'gray')
+                mask_drawn = impro.resize_like(mask_drawn, img)
+                mask = mask_drawn
+            if 'irregular' in opt.mod:
+                mask_irr = impro.imread(irrpaths[random.randint(0,12000-1)],'gray')
+                mask_irr = data.random_transform_single(mask_irr, (img.shape[0],img.shape[1]))
+                mask = mask_irr
+            if 'network' in opt.mod:
+                mask_net = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0]
+                if not opt.all_mosaic_area:
+                    mask_net = impro.find_mostlikely_ROI(mask_net)
+                mask = mask_net
+            if opt.mod == ['drawn','irregular']:
+                mask = cv2.bitwise_and(mask_irr, mask_drawn)
+            if opt.mod == ['network','irregular']:
+                mask = cv2.bitwise_and(mask_irr, mask_net)
+                #checkandsave
+                # t=threading.Thread(target=checksaveimage,args=(opt,img,mask,))
+                # t.start()
+                saveflag = True
+                x,y,size,area = impro.boundingSquare(mask, random.uniform(1.4,1.6))
+                if area < 1000:
+                    saveflag = False
+                else:
+                    if opt.square:
+                        if size < opt.minsize:
+                            saveflag = False
+                        else:
+                            img = impro.resize(img[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
+                            mask =  impro.resize(mask[y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
+                            if impro.Q_lapulase(img)<opt.quality:
+                                saveflag = False         
+                    else:
+                        img = impro.resize(img,opt.outsize,interpolation=cv2.INTER_CUBIC)
+                if saveflag:
+                    # add mosaic
+                    img_mosaic = mosaic.addmosaic_random(img, mask)
+                    # global savecnt
+                    savecnt += 1
+                    if opt.hd:
+                        cv2.imwrite(os.path.join(train_A_path,opt.name+'%06d' % savecnt+'.jpg'), img_mosaic)
+                        cv2.imwrite(os.path.join(train_B_path,opt.name+'%06d' % savecnt+'.jpg'), img)
+                    else:
+                        merge_img = impro.makedataset(img_mosaic, img)
+                        cv2.imwrite(os.path.join(train_path,opt.name+'%06d' % savecnt+'.jpg'), merge_img)
+                    if opt.savemask:
+                        cv2.imwrite(os.path.join(mask_save_path,opt.name+'%06d' % savecnt+'.png'), mask)
+                # print("Processing:",imgpaths[i]," ","Remain:",len(imgpaths)*opt.fold-filecnt)
+                # cv2.namedWindow('image', cv2.WINDOW_NORMAL)
+                # cv2.imshow('image',img_mosaic)
+                # cv2.waitKey(0)
+                # cv2.destroyAllWindows()   
+        except Exception as e:
+            print(imgpaths[i],e)
+        if filecnt%10==0:
+            endtime = datetime.datetime.now()
+            # used_time = (endtime-starttime).seconds
+            used_time = (endtime-starttime).seconds
+            all_length = len(imgpaths)*opt.fold 
+            percent = round(100*filecnt/all_length,1)
+            all_time = used_time/filecnt*all_length
+            print('\r','',str(filecnt)+'/'+str(all_length)+' ',
+                util.get_bar(percent,30),'',
+                util.second2stamp(used_time)+'/'+util.second2stamp(all_time),
+                'f:'+str(savecnt),end= " ")
\ No newline at end of file
--- a/make_datasets/make_video_dataset.py
+++ b/make_datasets/make_video_dataset.py
+import os
+import random
+import sys
+import datetime
+import time
+import shutil
+import threading
+import numpy as np
+import cv2
+sys.path.append("..")
+from models import runmodel,loadmodel
+import util.image_processing as impro
+from util import util,mosaic,data,ffmpeg
+from cores import Options
+opt = Options()
+opt.parser.add_argument('--datadir',type=str,default='your video dir', help='')
+opt.parser.add_argument('--savedir',type=str,default='../datasets/video/face', help='')
+opt.parser.add_argument('--interval',type=int,default=30, help='interval of split video ')
+opt.parser.add_argument('--time',type=int,default=5, help='split video time')
+opt.parser.add_argument('--minmaskarea',type=int,default=2000, help='')
+opt.parser.add_argument('--quality', type=int ,default= 45,help='minimal quality')
+opt.parser.add_argument('--outsize', type=int ,default= 286,help='')
+opt.parser.add_argument('--startcnt', type=int ,default= 0,help='')
+opt.parser.add_argument('--minsize', type=int ,default= 96,help='minimal roi size')
+opt = opt.getparse()
+util.makedirs(opt.savedir)
+util.writelog(os.path.join(opt.savedir,'opt.txt'), 
+              str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt))
+videopaths = util.Traversal(opt.datadir)
+videopaths = util.is_videos(videopaths)
+random.shuffle(videopaths)
+# def network
+net = loadmodel.bisenet(opt,'roi')
+result_cnt = opt.startcnt
+video_cnt = 1
+starttime = datetime.datetime.now()
+for videopath in videopaths:
+    try:
+        timestamps=[]
+        fps,endtime,height,width = ffmpeg.get_video_infos(videopath)
+        for cut_point in range(1,int((endtime-opt.time)/opt.interval)):
+            util.clean_tempfiles()
+            ffmpeg.video2image(videopath, './tmp/video2image/%05d.'+opt.tempimage_type,fps=1,
+                start_time = util.second2stamp(cut_point*opt.interval),last_time = util.second2stamp(opt.time))
+            imagepaths = util.Traversal('./tmp/video2image')
+            cnt = 0 
+            for i in range(opt.time):
+                img = impro.imread(imagepaths[i])
+                mask = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0]
+                if not opt.all_mosaic_area:
+                    mask = impro.find_mostlikely_ROI(mask)
+                x,y,size,area = impro.boundingSquare(mask,Ex_mul=1)
+                if area > opt.minmaskarea and size>opt.minsize and impro.Q_lapulase(img)>opt.quality:
+                    cnt +=1
+            if cnt == opt.time:
+                # print(second)
+                timestamps.append(util.second2stamp(cut_point*opt.interval))
+        util.writelog(os.path.join(opt.savedir,'opt.txt'),videopath+'\n'+str(timestamps))
+        #print(timestamps)
+        # util.clean_tempfiles()
+        # fps,endtime,height,width = ffmpeg.get_video_infos(videopath)
+        # # print(fps,endtime,height,width)
+        # ffmpeg.continuous_screenshot(videopath, './tmp/video2image', 1)
+        # # find where to cut
+        # print('Find where to cut...')
+        # timestamps=[]
+        # imagepaths = util.Traversal('./tmp/video2image')
+        # for second in range(int(endtime)):
+        #     if second%opt.interval==0:
+        #         cnt = 0 
+        #         for i in range(opt.time):
+        #             img = impro.imread(imagepaths[second+i])
+        #             mask = runmodel.get_ROI_position(img,net,opt)[0]
+        #             if not opt.all_mosaic_area:
+        #                 mask = impro.find_mostlikely_ROI(mask)
+        #             if impro.mask_area(mask) > opt.minmaskarea and impro.Q_lapulase(img)>opt.quality:
+        #                 # print(impro.mask_area(mask))
+        #                 cnt +=1
+        #         if cnt == opt.time:
+        #             # print(second)
+        #             timestamps.append(util.second2stamp(second))
+        #generate datasets
+        print('Generate datasets...')
+        for timestamp in timestamps:
+            savecnt = '%05d' % result_cnt
+            origindir = os.path.join(opt.savedir,savecnt,'origin_image')
+            maskdir = os.path.join(opt.savedir,savecnt,'mask')
+            util.makedirs(origindir)
+            util.makedirs(maskdir)
+            util.clean_tempfiles()
+            ffmpeg.video2image(videopath, './tmp/video2image/%05d.'+opt.tempimage_type,
+                start_time = timestamp,last_time = util.second2stamp(opt.time))
+            endtime = datetime.datetime.now()
+            print(str(video_cnt)+'/'+str(len(videopaths))+' ',
+                util.get_bar(100*video_cnt/len(videopaths),35),'',
+                util.second2stamp((endtime-starttime).seconds)+'/'+util.second2stamp((endtime-starttime).seconds/video_cnt*len(videopaths)))
+            imagepaths = util.Traversal('./tmp/video2image')
+            imagepaths = sorted(imagepaths)
+            imgs=[];masks=[]
+            mask_flag = False
+            for imagepath in imagepaths:
+                img = impro.imread(imagepath)
+                mask = runmodel.get_ROI_position(img,net,opt,keepsize=True)[0]
+                imgs.append(img)
+                masks.append(mask)
+                if not mask_flag:
+                    mask_avg = mask.astype(np.float64)
+                    mask_flag = True
+                else:
+                    mask_avg += mask.astype(np.float64)
+            mask_avg = np.clip(mask_avg/len(imagepaths),0,255).astype('uint8')
+            mask_avg = impro.mask_threshold(mask_avg,20,64)
+            if not opt.all_mosaic_area:
+                mask_avg = impro.find_mostlikely_ROI(mask_avg)
+            x,y,size,area = impro.boundingSquare(mask_avg,Ex_mul=random.uniform(1.1,1.5))
+            for i in range(len(imagepaths)):
+                img = impro.resize(imgs[i][y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC) 
+                mask = impro.resize(masks[i][y-size:y+size,x-size:x+size],opt.outsize,interpolation=cv2.INTER_CUBIC)
+                impro.imwrite(os.path.join(origindir,'%05d'%(i+1)+'.jpg'), img)
+                impro.imwrite(os.path.join(maskdir,'%05d'%(i+1)+'.png'), mask)
+            result_cnt+=1
+    except Exception as e:
+        video_cnt +=1
+        util.writelog(os.path.join(opt.savedir,'opt.txt'), 
+              videopath+'\n'+str(result_cnt)+'\n'+str(e))
+    video_cnt +=1
--- a/make_datasets/use_addmosaic_model_make_dataset.py
+++ b/make_datasets/use_addmosaic_model_make_dataset.py
-import sys
-import os
-import random
-import datetime
-import numpy as np
-import cv2
-import torch
-import torch.backends.cudnn as cudnn
-import torch.nn as nn
-from torch import optim
-from unet import UNet
-from mosaic import random_mosaic
-import image_processing as impro
-def runmodel(img,net):
-    img=impro.image2folat(img,3)
-    img=img.reshape(1,3,128,128)
-    img = torch.from_numpy(img)
-    img=img.cuda()
-    pred = net(img)
-    pred = (pred.cpu().detach().numpy()*255)
-    pred = pred.reshape(128,128).astype('uint8')
-    return pred
-dir_img = './origin_image/'
-dir_mosaic = './mosaic/'
-dir_mask = './mask/'
-dir_dataset = './dataset/'
-dir_checkpoint = 'checkpoints/'
-net = UNet(n_channels = 3, n_classes = 1)
-net.load_state_dict(torch.load(dir_checkpoint+'mosaic_position.pth'))
-net.cuda()
-net.eval()
-# cudnn.benchmark = True
-files = os.listdir(dir_mosaic)
-for i,file in enumerate(files,1):
-    orgin_image = cv2.imread(dir_img+file)
-    mosaic_image = cv2.imread(dir_mosaic+file)
-    img = impro.resize(mosaic_image,128)
-    img1,img2 = impro.spiltimage(img)
-    mask1 =runmodel(img1,net)
-    mask2 =runmodel(img2,net)
-    mask = impro.mergeimage(mask1,mask2,img)
-    # test_mask = mask.copy()
-    mask = impro.mask_threshold(mask,blur=5,threshold=128)
-    if impro.mask_area(mask) > 1:
-        h,w = orgin_image.shape[:2]
-        mosaic_image = cv2.resize(mosaic_image,(w,h))
-        # test_mask  = cv2.resize(test_mask,(w,h))
-        # test_mask  = impro.ch_one2three(test_mask)
-        x,y,size,area = impro.boundingSquare(mask,Ex_mul=1.5)
-        rat = min(orgin_image.shape[:2])/128.0
-        x,y,size = int(rat*x),int(rat*y),int(rat*size)
-        orgin_crop = orgin_image[y-size:y+size,x-size:x+size]
-        mosaic_crop = mosaic_image[y-size:y+size,x-size:x+size]
-        # mosaic_crop = test_mask[y-size:y+size,x-size:x+size]
-        result = impro.makedataset(mosaic_crop,orgin_crop)
-        cv2.imwrite(dir_dataset+file,result)
-    if i%1000==0:
-        print(i,'image finished.')
--- a/make_datasets/use_addmosaic_model_make_video_dataset.py
+++ b/make_datasets/use_addmosaic_model_make_video_dataset.py
-import os
-import numpy as np
-import cv2
-import random
-import sys
-sys.path.append("..")
-from models import runmodel,loadmodel
-from util import mosaic,util,ffmpeg,filt
-from util import image_processing as impro
-from cores import options
-opt = options.Options().getparse()
-util.file_init(opt)
-videos = os.listdir('./video')
-videos.sort()
-opt.model_path = '../pretrained_models/add_youknow_128.pth'
-opt.use_gpu = True
-Ex = 1.4
-Area_Type  = 'normal'
-suffix = ''
-net = loadmodel.unet(opt)
-for i,path in enumerate(videos,0):
-    try:
-        path = os.path.join('./video',path)
-        util.clean_tempfiles()
-        ffmpeg.video2voice(path,'./tmp/voice_tmp.mp3')
-        ffmpeg.video2image(path,'./tmp/video2image/output_%05d.'+opt.tempimage_type)
-        imagepaths=os.listdir('./tmp/video2image')
-        imagepaths.sort()
-        # get position
-        positions = []
-        img_ori_example = impro.imread(os.path.join('./tmp/video2image',imagepaths[0]))
-        mask_avg = np.zeros((impro.resize(img_ori_example, 128)).shape[:2])
-        for imagepath in imagepaths:
-            imagepath = os.path.join('./tmp/video2image',imagepath)
-            #print('Find ROI location:',imagepath)
-            img = impro.imread(imagepath)
-            x,y,size,mask = runmodel.get_mosaic_position(img,net,opt,threshold = 80)
-            cv2.imwrite(os.path.join('./tmp/ROI_mask',
-                              os.path.basename(imagepath)),mask)
-            positions.append([x,y,size])
-            mask_avg = mask_avg + mask
-        #print('Optimize ROI locations...')
-        mask_index = filt.position_medfilt(np.array(positions), 13)
-        mask = np.clip(mask_avg/len(imagepaths),0,255).astype('uint8')
-        mask = impro.mask_threshold(mask,20,32)
-        x,y,size,area = impro.boundingSquare(mask,Ex_mul=Ex)
-        rat = min(img_ori_example.shape[:2])/128.0
-        x,y,size = int(rat*x),int(rat*y),int(rat*size)
-        cv2.imwrite(os.path.join('./tmp/ROI_mask_check',
-                                'test_show.png'),mask)
-        if size !=0 :
-            mask_path = './dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix+'/mask'
-            ori_path = './dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix+'/ori'
-            mosaic_path = './dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix+'/mosaic'
-            os.makedirs('./dataset/'+os.path.splitext(os.path.basename(path))[0]+suffix)
-            os.makedirs(mask_path)
-            os.makedirs(ori_path)
-            os.makedirs(mosaic_path)
-            #print('Add mosaic to images...')
-            mosaic_size = mosaic.get_autosize(img_ori_example,mask,area_type = Area_Type)*random.uniform(1,2)
-            models = ['squa_avg','rect_avg','squa_mid']
-            mosaic_type = random.randint(0,len(models)-1)
-            rect_rat = random.uniform(1.2,1.6)
-            for i in range(len(imagepaths)):
-                mask = impro.imread(os.path.join('./tmp/ROI_mask',imagepaths[mask_index[i]]),mod = 'gray')
-                img_ori = impro.imread(os.path.join('./tmp/video2image',imagepaths[i]))
-                img_mosaic = mosaic.addmosaic_normal(img_ori,mask,mosaic_size,model = models[mosaic_type],rect_rat=rect_rat)
-                mask = impro.resize(mask, min(img_ori.shape[:2]))
-                img_ori_crop = impro.resize(img_ori[y-size:y+size,x-size:x+size],256)
-                img_mosaic_crop = impro.resize(img_mosaic[y-size:y+size,x-size:x+size],256)
-                mask_crop = impro.resize(mask[y-size:y+size,x-size:x+size],256)
-                cv2.imwrite(os.path.join(ori_path,os.path.basename(imagepaths[i])),img_ori_crop)
-                cv2.imwrite(os.path.join(mosaic_path,os.path.basename(imagepaths[i])),img_mosaic_crop)
-                cv2.imwrite(os.path.join(mask_path,os.path.basename(imagepaths[i])),mask_crop)
-    except Exception as e:
-        print(e)
-    print(util.get_bar(100*i/len(videos),num=50))
\ No newline at end of file
--- a/make_datasets/use_drawn_mask_make_dataset.py
+++ b/make_datasets/use_drawn_mask_make_dataset.py
-import numpy as np
-import cv2
-import os
-from torchvision import transforms
-from PIL import Image
-import random
-import sys
-sys.path.append("..")
-import util.image_processing as impro
-from util import util,mosaic
-import datetime
-import shutil
-mask_dir = '/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/mask'
-img_dir ='/media/hypo/Project/MyProject/DeepMosaics/DeepMosaics/train/add/datasets/av/origin_image'
-output_dir = './datasets_img'
-util.makedirs(output_dir)
-HD = True # if false make dataset for pix2pix, if Ture for pix2pix_HD
-MASK = True # if True, output mask,too
-OUT_SIZE = 256
-FOLD_NUM = 2
-Bounding = False
-if HD:
-    train_A_path = os.path.join(output_dir,'train_A')
-    train_B_path = os.path.join(output_dir,'train_B')
-    util.makedirs(train_A_path)
-    util.makedirs(train_B_path)
-else:
-    train_path = os.path.join(output_dir,'train')
-    util.makedirs(train_path)
-if MASK:
-    mask_path = os.path.join(output_dir,'mask')
-    util.makedirs(mask_path)
-mask_names = os.listdir(mask_dir)
-img_names = os.listdir(img_dir)
-mask_names.sort()
-img_names.sort()
-print('Find images:',len(img_names))
-cnt = 0
-for fold in range(FOLD_NUM):
-    for img_name,mask_name in zip(img_names,mask_names):
-        try:
-            img = impro.imread(os.path.join(img_dir,img_name))
-            mask = impro.imread(os.path.join(mask_dir,mask_name),'gray')
-            mask = impro.resize_like(mask, img)
-            x,y,size,area = impro.boundingSquare(mask, 1.5)
-            if area > 100:
-                if Bounding:
-                    img = impro.resize(img[y-size:y+size,x-size:x+size],OUT_SIZE) 
-                    mask =  impro.resize(mask[y-size:y+size,x-size:x+size],OUT_SIZE)
-                img_mosaic = mosaic.addmosaic_random(img, mask)
-                if HD:
-                    cv2.imwrite(os.path.join(train_A_path,'%05d' % cnt+'.jpg'), img_mosaic)
-                    cv2.imwrite(os.path.join(train_B_path,'%05d' % cnt+'.jpg'), img)
-                else:
-                    merge_img = impro.makedataset(img_mosaic, img)
-                    cv2.imwrite(os.path.join(train_path,'%05d' % cnt+'.jpg'), merge_img)
-                if MASK:
-                    cv2.imwrite(os.path.join(mask_path,'%05d' % cnt+'.png'), mask)
-                print("Processing:",img_name," ","Remain:",len(img_names)*FOLD_NUM-cnt)
-        except Exception as e:
-            print(img_name,e)
-        cnt += 1
--- a/make_datasets/use_irregular_holes_make_dataset.py
+++ b/make_datasets/use_irregular_holes_make_dataset.py
-import numpy as np
-import cv2
-import os
-from torchvision import transforms
-from PIL import Image
-import random
-import sys
-sys.path.append("..")
-import util.image_processing as impro
-from util import util,mosaic
-import datetime
-ir_mask_path = './Irregular_Holes_mask'
-img_dir ='/media/hypo/Hypoyun/Datasets/other/face512' 
-MOD = 'mosaic' #HD | pix2pix | mosaic
-MASK = False # if True, output mask,too
-BOUNDING = True # if true the mosaic size will be more big
-suffix = '_1'
-output_dir = os.path.join('./datasets_img',MOD)
-util.makedirs(output_dir)
-if MOD == 'HD':
-    train_A_path = os.path.join(output_dir,'train_A')
-    train_B_path = os.path.join(output_dir,'train_B')
-    util.makedirs(train_A_path)
-    util.makedirs(train_B_path)
-elif MOD == 'pix2pix':
-    train_path = os.path.join(output_dir,'train')
-    util.makedirs(train_path)
-elif MOD == 'mosaic':
-    ori_path = os.path.join(output_dir,'ori')
-    mosaic_path = os.path.join(output_dir,'mosaic')
-    mask_path = os.path.join(output_dir,'mask')
-    util.makedirs(ori_path)
-    util.makedirs(mosaic_path)
-    util.makedirs(mask_path)
-if MASK:
-    mask_path = os.path.join(output_dir,'mask')
-    util.makedirs(mask_path)
-transform_mask = transforms.Compose([
-     transforms.RandomResizedCrop(size=512, scale=(0.5,1)),
-     transforms.RandomHorizontalFlip(),
- ])
-transform_img = transforms.Compose([
-     transforms.Resize(512),
-     transforms.RandomCrop(512)
- ])
-mask_names = os.listdir(ir_mask_path)
-img_paths = util.Traversal(img_dir)
-img_paths = util.is_imgs(img_paths)
-print('Find images:',len(img_paths))
-for i,img_path in enumerate(img_paths,1):
-    try:        
-        img = Image.open(img_path)
-        img = transform_img(img)
-        img = np.array(img)
-        img = img[...,::-1]
-        if BOUNDING:
-            mosaic_area = 0
-            while mosaic_area < 16384:
-                mask = Image.open(os.path.join(ir_mask_path,random.choices(mask_names)[0]))
-                mask = transform_mask(mask)
-                mask = np.array(mask)
-                mosaic_area = impro.mask_area(mask)
-            mosaic_img = mosaic.addmosaic_random(img, mask,'bounding') 
-        else:
-            mask = Image.open(os.path.join(ir_mask_path,random.choices(mask_names)[0]))
-            mask = transform_mask(mask)
-            mask = np.array(mask)
-            mosaic_img = mosaic.addmosaic_random(img, mask)
-        if MOD == 'HD':#[128:384,128:384,:] --->256
-            cv2.imwrite(os.path.join(train_A_path,'%05d' % i+suffix+'.jpg'), mosaic_img)
-            cv2.imwrite(os.path.join(train_B_path,'%05d' % i+suffix+'.jpg'), img)
-            if MASK:
-                cv2.imwrite(os.path.join(mask_path,'%05d' % i+suffix+'.png'), mask)
-        elif MOD == 'pix2pix':
-            merge_img = impro.makedataset(mosaic_img, img)
-            cv2.imwrite(os.path.join(train_path,'%05d' % i+suffix+'.jpg'), merge_img)
-        elif MOD == 'mosaic':
-            cv2.imwrite(os.path.join(mosaic_path,'%05d' % i+suffix+'.jpg'), mosaic_img)
-            cv2.imwrite(os.path.join(ori_path,'%05d' % i+suffix+'.jpg'), img)
-            cv2.imwrite(os.path.join(mask_path,'%05d' % i+suffix+'.png'), mask)
-        print('\r','Proc/all:'+str(i)+'/'+str(len(img_paths)),util.get_bar(100*i/len(img_paths),num=40),end='')
-    except Exception as e:
-        print(img_path,e)
--- a/models/BiSeNet_model.py
+++ b/models/BiSeNet_model.py
+# This code clone from https://github.com/ooooverflow/BiSeNet
+import torch.nn as nn
+import torch
+import torch.nn.functional as F
+from . import components
+import warnings
+warnings.filterwarnings(action='ignore')
+def flatten(tensor):
+    """Flattens a given tensor such that the channel axis is first.
+    The shapes are transformed as follows:
+       (N, C, D, H, W) -> (C, N * D * H * W)
+    """
+    C = tensor.size(1)
+    # new axis order
+    axis_order = (1, 0) + tuple(range(2, tensor.dim()))
+    # Transpose: (N, C, D, H, W) -> (C, N, D, H, W)
+    transposed = tensor.permute(axis_order)
+    # Flatten: (C, N, D, H, W) -> (C, N * D * H * W)
+    return transposed.contiguous().view(C, -1)
+class DiceLoss(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.epsilon = 1e-5
+    def forward(self, output, target):
+        assert output.size() == target.size(), "'input' and 'target' must have the same shape"
+        output = F.softmax(output, dim=1)
+        output = flatten(output)
+        target = flatten(target)
+        # intersect = (output * target).sum(-1).sum() + self.epsilon
+        # denominator = ((output + target).sum(-1)).sum() + self.epsilon
+        intersect = (output * target).sum(-1)
+        denominator = (output + target).sum(-1)
+        dice = intersect / denominator
+        dice = torch.mean(dice)
+        return 1 - dice
+        # return 1 - 2. * intersect / denominator
+class resnet18(torch.nn.Module):
+    def __init__(self, pretrained=True):
+        super().__init__()
+        self.features = components.resnet18(pretrained=pretrained)
+        self.conv1 = self.features.conv1
+        self.bn1 = self.features.bn1
+        self.relu = self.features.relu
+        self.maxpool1 = self.features.maxpool
+        self.layer1 = self.features.layer1
+        self.layer2 = self.features.layer2
+        self.layer3 = self.features.layer3
+        self.layer4 = self.features.layer4
+    def forward(self, input):
+        x = self.conv1(input)
+        x = self.relu(self.bn1(x))
+        x = self.maxpool1(x)
+        feature1 = self.layer1(x)  # 1 / 4
+        feature2 = self.layer2(feature1)  # 1 / 8
+        feature3 = self.layer3(feature2)  # 1 / 16
+        feature4 = self.layer4(feature3)  # 1 / 32
+        # global average pooling to build tail
+        tail = torch.mean(feature4, 3, keepdim=True)
+        tail = torch.mean(tail, 2, keepdim=True)
+        return feature3, feature4, tail
+class resnet101(torch.nn.Module):
+    def __init__(self, pretrained=True):
+        super().__init__()
+        self.features = components.resnet101(pretrained=pretrained)
+        self.conv1 = self.features.conv1
+        self.bn1 = self.features.bn1
+        self.relu = self.features.relu
+        self.maxpool1 = self.features.maxpool
+        self.layer1 = self.features.layer1
+        self.layer2 = self.features.layer2
+        self.layer3 = self.features.layer3
+        self.layer4 = self.features.layer4
+    def forward(self, input):
+        x = self.conv1(input)
+        x = self.relu(self.bn1(x))
+        x = self.maxpool1(x)
+        feature1 = self.layer1(x)  # 1 / 4
+        feature2 = self.layer2(feature1)  # 1 / 8
+        feature3 = self.layer3(feature2)  # 1 / 16
+        feature4 = self.layer4(feature3)  # 1 / 32
+        # global average pooling to build tail
+        tail = torch.mean(feature4, 3, keepdim=True)
+        tail = torch.mean(tail, 2, keepdim=True)
+        return feature3, feature4, tail
+def build_contextpath(name,pretrained):
+    model = {
+        'resnet18': resnet18(pretrained=pretrained),
+        'resnet101': resnet101(pretrained=pretrained)
+    }
+    return model[name]
+class ConvBlock(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=2,padding=1):
+        super().__init__()
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+    def forward(self, input):
+        x = self.conv1(input)
+        return self.relu(self.bn(x))
+class Spatial_path(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.convblock1 = ConvBlock(in_channels=3, out_channels=64)
+        self.convblock2 = ConvBlock(in_channels=64, out_channels=128)
+        self.convblock3 = ConvBlock(in_channels=128, out_channels=256)
+    def forward(self, input):
+        x = self.convblock1(input)
+        x = self.convblock2(x)
+        x = self.convblock3(x)
+        return x
+class AttentionRefinementModule(torch.nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.sigmoid = nn.Sigmoid()
+        self.in_channels = in_channels
+        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
+    def forward(self, input):
+        # global average pooling
+        x = self.avgpool(input)
+        assert self.in_channels == x.size(1), 'in_channels and out_channels should all be {}'.format(x.size(1))
+        x = self.conv(x)
+        # x = self.sigmoid(self.bn(x))
+        x = self.sigmoid(x)
+        # channels of input and x should be same
+        x = torch.mul(input, x)
+        return x
+class FeatureFusionModule(torch.nn.Module):
+    def __init__(self, num_classes, in_channels):
+        super().__init__()
+        # self.in_channels = input_1.channels + input_2.channels
+        # resnet101 3328 = 256(from context path) + 1024(from spatial path) + 2048(from spatial path)
+        # resnet18  1024 = 256(from context path) + 256(from spatial path) + 512(from spatial path)
+        self.in_channels = in_channels
+        self.convblock = ConvBlock(in_channels=self.in_channels, out_channels=num_classes, stride=1)
+        self.conv1 = nn.Conv2d(num_classes, num_classes, kernel_size=1)
+        self.relu = nn.ReLU()
+        self.conv2 = nn.Conv2d(num_classes, num_classes, kernel_size=1)
+        self.sigmoid = nn.Sigmoid()
+        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
+    def forward(self, input_1, input_2):
+        x = torch.cat((input_1, input_2), dim=1)
+        assert self.in_channels == x.size(1), 'in_channels of ConvBlock should be {}'.format(x.size(1))
+        feature = self.convblock(x)
+        x = self.avgpool(feature)
+        x = self.relu(self.conv1(x))
+        x = self.sigmoid(self.conv2(x))
+        x = torch.mul(feature, x)
+        x = torch.add(x, feature)
+        return x
+class BiSeNet(torch.nn.Module):
+    def __init__(self, num_classes, context_path, train_flag=True):
+        super().__init__()
+        # build spatial path
+        self.saptial_path = Spatial_path()
+        self.sigmoid = nn.Sigmoid()
+        # build context path
+        if train_flag:
+            self.context_path = build_contextpath(name=context_path,pretrained=True)
+        else:
+            self.context_path = build_contextpath(name=context_path,pretrained=False)
+        # build attention refinement module  for resnet 101
+        if context_path == 'resnet101':
+            self.attention_refinement_module1 = AttentionRefinementModule(1024, 1024)
+            self.attention_refinement_module2 = AttentionRefinementModule(2048, 2048)
+            # supervision block
+            self.supervision1 = nn.Conv2d(in_channels=1024, out_channels=num_classes, kernel_size=1)
+            self.supervision2 = nn.Conv2d(in_channels=2048, out_channels=num_classes, kernel_size=1)
+            # build feature fusion module
+            self.feature_fusion_module = FeatureFusionModule(num_classes, 3328)
+        elif context_path == 'resnet18':
+            # build attention refinement module  for resnet 18
+            self.attention_refinement_module1 = AttentionRefinementModule(256, 256)
+            self.attention_refinement_module2 = AttentionRefinementModule(512, 512)
+            # supervision block
+            self.supervision1 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=1)
+            self.supervision2 = nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1)
+            # build feature fusion module
+            self.feature_fusion_module = FeatureFusionModule(num_classes, 1024)
+        else:
+            print('Error: unspport context_path network \n')
+        # build final convolution
+        self.conv = nn.Conv2d(in_channels=num_classes, out_channels=num_classes, kernel_size=1)
+        self.init_weight()
+        self.mul_lr = []
+        self.mul_lr.append(self.saptial_path)
+        self.mul_lr.append(self.attention_refinement_module1)
+        self.mul_lr.append(self.attention_refinement_module2)
+        self.mul_lr.append(self.supervision1)
+        self.mul_lr.append(self.supervision2)
+        self.mul_lr.append(self.feature_fusion_module)
+        self.mul_lr.append(self.conv)
+    def init_weight(self):
+        for name, m in self.named_modules():
+            if 'context_path' not in name:
+                if isinstance(m, nn.Conv2d):
+                    nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
+                elif isinstance(m, nn.BatchNorm2d):
+                    m.eps = 1e-5
+                    m.momentum = 0.1
+                    nn.init.constant_(m.weight, 1)
+                    nn.init.constant_(m.bias, 0)
+    def forward(self, input):
+        # output of spatial path
+        sx = self.saptial_path(input)
+        # output of context path
+        cx1, cx2, tail = self.context_path(input)
+        cx1 = self.attention_refinement_module1(cx1)
+        cx2 = self.attention_refinement_module2(cx2)
+        cx2 = torch.mul(cx2, tail)
+        # upsampling
+        cx1 = torch.nn.functional.interpolate(cx1, size=sx.size()[-2:], mode='bilinear')
+        cx2 = torch.nn.functional.interpolate(cx2, size=sx.size()[-2:], mode='bilinear')
+        cx = torch.cat((cx1, cx2), dim=1)
+        if self.training == True:
+            cx1_sup = self.supervision1(cx1)
+            cx2_sup = self.supervision2(cx2)
+            cx1_sup = torch.nn.functional.interpolate(cx1_sup, size=input.size()[-2:], mode='bilinear')
+            cx2_sup = torch.nn.functional.interpolate(cx2_sup, size=input.size()[-2:], mode='bilinear')
+        # output of feature fusion module
+        result = self.feature_fusion_module(sx, cx)
+        # upsampling
+        result = torch.nn.functional.interpolate(result, scale_factor=8, mode='bilinear')
+        result = self.conv(result)
+        if self.training == True:
+            return self.sigmoid(result), self.sigmoid(cx1_sup), self.sigmoid(cx2_sup)
+        return self.sigmoid(result)
\ No newline at end of file
--- a/models/__init__.py
+++ b/models/__init__.py
-from .pix2pix_model import *
-from .unet_model import UNet
--- a/models/components.py
+++ b/models/components.py
+import torch.nn as nn
+import torch.utils.model_zoo as model_zoo
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152']
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, planes)
+        self.bn1 = norm_layer(planes)
+        self.conv2 = conv3x3(planes, planes, stride)
+        self.bn2 = norm_layer(planes)
+        self.conv3 = conv1x1(planes, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, norm_layer=None):
+        super(ResNet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self.inplanes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = norm_layer(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer)
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, norm_layer=None):
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, norm_layer=norm_layer))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+def resnet18(pretrained=False, **kwargs):
+    """Constructs a ResNet-18 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
+    return model
+def resnet34(pretrained=False, **kwargs):
+    """Constructs a ResNet-34 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
+    return model
+def resnet50(pretrained=False, **kwargs):
+    """Constructs a ResNet-50 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
+    return model
+def resnet101(pretrained=False, **kwargs):
+    """Constructs a ResNet-101 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
+    return model
+def resnet152(pretrained=False, **kwargs):
+    """Constructs a ResNet-152 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
+    return model
\ No newline at end of file
--- a/models/loadmodel.py
+++ b/models/loadmodel.py
@@ -4,6 +4,7 @@ from .pix2pixHD_model import define_G as define_G_HD
 from .unet_model import UNet
 from .video_model import MosaicNet
 from .videoHD_model import MosaicNet as MosaicNet_HD
+from .BiSeNet_model import BiSeNet
 def show_paramsnumber(net,netname='net'):
    parameters = sum(param.numel() for param in net.parameters())
@@ -75,21 +76,35 @@ def video(opt):
        netG.cuda()
    return netG
+def bisenet(opt,type='roi'):
-def unet_clean(opt):
+    '''
-    net = UNet(n_channels = 3, n_classes = 1)
+    type: roi or mosaic
+    '''
+    net = BiSeNet(num_classes=1, context_path='resnet18',train_flag=False)
    show_paramsnumber(net,'segment')
-    net.load_state_dict(torch.load(opt.mosaic_position_model_path))
+    if type == 'roi':
+        net.load_state_dict(torch.load(opt.model_path))
+    elif type == 'mosaic':
+        net.load_state_dict(torch.load(opt.mosaic_position_model_path))
    net.eval()
    if opt.use_gpu:
        net.cuda()
    return net
-def unet(opt):
+# def unet_clean(opt):
-    net = UNet(n_channels = 3, n_classes = 1)
+#     net = UNet(n_channels = 3, n_classes = 1)
-    show_paramsnumber(net,'segment')
+#     show_paramsnumber(net,'segment')
-    net.load_state_dict(torch.load(opt.model_path))
+#     net.load_state_dict(torch.load(opt.mosaic_position_model_path))
-    net.eval()
+#     net.eval()
-    if opt.use_gpu:
+#     if opt.use_gpu:
-        net.cuda()
+#         net.cuda()
-    return net
+#     return net
+# def unet(opt):
+#     net = UNet(n_channels = 3, n_classes = 1)
+#     show_paramsnumber(net,'segment')
+#     net.load_state_dict(torch.load(opt.model_path))
+#     net.eval()
+#     if opt.use_gpu:
+#         net.cuda()
+#     return net
--- a/models/runmodel.py
+++ b/models/runmodel.py
@@ -7,7 +7,7 @@ from util import data
 import torch
 import numpy as np
-def run_unet(img,net,size = 224,use_gpu = True):
+def run_segment(img,net,size = 360,use_gpu = True):
    img = impro.resize(img,size)
    img = data.im2tensor(img,use_gpu = use_gpu,  bgr2rgb = False,use_transform = False , is0_1 = True)
    mask = net(img)
@@ -60,18 +60,26 @@ def run_styletransfer(opt, net, img):
    img = data.tensor2im(img)
    return img
-def get_ROI_position(img,net,opt):
+def get_ROI_position(img,net,opt,keepsize=True):
-    mask = run_unet(img,net,size=224,use_gpu = opt.use_gpu)
+    mask = run_segment(img,net,size=360,use_gpu = opt.use_gpu)
    mask = impro.mask_threshold(mask,opt.mask_extend,opt.mask_threshold)
+    if keepsize:
+        mask = impro.resize_like(mask, img)
    x,y,halfsize,area = impro.boundingSquare(mask, 1)
-    return mask,x,y,area
+    return mask,x,y,halfsize,area
-def get_mosaic_position(img_origin,net_mosaic_pos,opt,threshold = 128 ):
+def get_mosaic_position(img_origin,net_mosaic_pos,opt):
-    mask = run_unet(img_origin,net_mosaic_pos,size=224,use_gpu = opt.use_gpu)
+    h,w = img_origin.shape[:2]
-    mask = impro.mask_threshold(mask,30,threshold)
+    mask = run_segment(img_origin,net_mosaic_pos,size=360,use_gpu = opt.use_gpu)
+    # mask_1 = mask.copy()
+    mask = impro.mask_threshold(mask,ex_mun=int(min(h,w)/20),threshold=opt.mask_threshold)
    if not opt.all_mosaic_area:
        mask = impro.find_mostlikely_ROI(mask)
    x,y,size,area = impro.boundingSquare(mask,Ex_mul=opt.ex_mult)
-    rat = min(img_origin.shape[:2])/224.0
+    #Location fix
+    rat = min(h,w)/360.0
    x,y,size = int(rat*x),int(rat*y),int(rat*size)
+    x,y = np.clip(x, 0, w),np.clip(y, 0, h)
+    size = np.clip(size, 0, min(w-x,h-y))
+    # print(x,y,size)
    return x,y,size,mask
\ No newline at end of file
--- a/models/unet_model.py
+++ b/models/unet_model.py
 # This code clone from https://github.com/milesial/Pytorch-UNet
 # LICENSE file : https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE
-# full assembly of the sub-parts to form the complete net
+import torch
+import torch.nn as nn
 import torch.nn.functional as F
-from .unet_parts import *
+class double_conv(nn.Module):
+    '''(conv => BN => ReLU) * 2'''
+    def __init__(self, in_ch, out_ch):
+        super(double_conv, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_ch, out_ch, 3, padding=1),
+            nn.BatchNorm2d(out_ch),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_ch, out_ch, 3, padding=1),
+            nn.BatchNorm2d(out_ch),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+class inconv(nn.Module):
+    def __init__(self, in_ch, out_ch):
+        super(inconv, self).__init__()
+        self.conv = double_conv(in_ch, out_ch)
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+class down(nn.Module):
+    def __init__(self, in_ch, out_ch):
+        super(down, self).__init__()
+        self.mpconv = nn.Sequential(
+            nn.MaxPool2d(2),
+            double_conv(in_ch, out_ch)
+        )
+    def forward(self, x):
+        x = self.mpconv(x)
+        return x
+class Upsample(nn.Module):
+    def __init__(self,  scale_factor):
+        super(Upsample, self).__init__()
+        self.scale_factor = scale_factor
+    def forward(self, x):
+        return F.interpolate(x, scale_factor=self.scale_factor,mode='bilinear', align_corners=True)
+class up(nn.Module):
+    def __init__(self, in_ch, out_ch, bilinear=True):
+        super(up, self).__init__()
+        #  would be a nice idea if the upsampling could be learned too,
+        #  but my machine do not have enough memory to handle all those weights
+        if bilinear:
+            self.up = Upsample(scale_factor=2)
+        else:
+            self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)
+        self.conv = double_conv(in_ch, out_ch)
+    def forward(self, x1, x2):
+        x1 = self.up(x1)
+        # input is CHW
+        diffY = x2.size()[2] - x1.size()[2]
+        diffX = x2.size()[3] - x1.size()[3]
+        x1 = F.pad(x1, (diffX // 2, diffX - diffX//2,
+                        diffY // 2, diffY - diffY//2))
+        # for padding issues, see 
+        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
+        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
+        x = torch.cat([x2, x1], dim=1)
+        x = self.conv(x)
+        return x
+class outconv(nn.Module):
+    def __init__(self, in_ch, out_ch):
+        super(outconv, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_ch, out_ch, 1),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        return x
 class UNet(nn.Module):
    def __init__(self, n_channels, n_classes):

--- a/models/unet_parts.py
+++ b/models/unet_parts.py
-# This code clone from https://github.com/milesial/Pytorch-UNet
-# LICENSE file : https://github.com/milesial/Pytorch-UNet/blob/master/LICENSE
-# sub-parts of the U-Net model
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-class double_conv(nn.Module):
-    '''(conv => BN => ReLU) * 2'''
-    def __init__(self, in_ch, out_ch):
-        super(double_conv, self).__init__()
-        self.conv = nn.Sequential(
-            nn.Conv2d(in_ch, out_ch, 3, padding=1),
-            nn.BatchNorm2d(out_ch),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(out_ch, out_ch, 3, padding=1),
-            nn.BatchNorm2d(out_ch),
-            nn.ReLU(inplace=True)
-        )
-    def forward(self, x):
-        x = self.conv(x)
-        return x
-class inconv(nn.Module):
-    def __init__(self, in_ch, out_ch):
-        super(inconv, self).__init__()
-        self.conv = double_conv(in_ch, out_ch)
-    def forward(self, x):
-        x = self.conv(x)
-        return x
-class down(nn.Module):
-    def __init__(self, in_ch, out_ch):
-        super(down, self).__init__()
-        self.mpconv = nn.Sequential(
-            nn.MaxPool2d(2),
-            double_conv(in_ch, out_ch)
-        )
-    def forward(self, x):
-        x = self.mpconv(x)
-        return x
-class Upsample(nn.Module):
-    def __init__(self,  scale_factor):
-        super(Upsample, self).__init__()
-        self.scale_factor = scale_factor
-    def forward(self, x):
-        return F.interpolate(x, scale_factor=self.scale_factor,mode='bilinear', align_corners=True)
-class up(nn.Module):
-    def __init__(self, in_ch, out_ch, bilinear=True):
-        super(up, self).__init__()
-        #  would be a nice idea if the upsampling could be learned too,
-        #  but my machine do not have enough memory to handle all those weights
-        if bilinear:
-            self.up = Upsample(scale_factor=2)
-        else:
-            self.up = nn.ConvTranspose2d(in_ch//2, in_ch//2, 2, stride=2)
-        self.conv = double_conv(in_ch, out_ch)
-    def forward(self, x1, x2):
-        x1 = self.up(x1)
-        # input is CHW
-        diffY = x2.size()[2] - x1.size()[2]
-        diffX = x2.size()[3] - x1.size()[3]
-        x1 = F.pad(x1, (diffX // 2, diffX - diffX//2,
-                        diffY // 2, diffY - diffY//2))
-        # for padding issues, see 
-        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
-        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
-        x = torch.cat([x2, x1], dim=1)
-        x = self.conv(x)
-        return x
-class outconv(nn.Module):
-    def __init__(self, in_ch, out_ch):
-        super(outconv, self).__init__()
-        self.conv = nn.Sequential(
-            nn.Conv2d(in_ch, out_ch, 1),
-            nn.Sigmoid()
-        )
-    def forward(self, x):
-        x = self.conv(x)
-        return x
--- a/models/videoHD_model.py
+++ b/models/videoHD_model.py
@@ -15,7 +15,7 @@ class encoder_2d(nn.Module):
        ### downsample
        for i in range(n_downsampling):
            mult = 2**i
-            model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1),
+            model += [nn.ReflectionPad2d(1),nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=0),
                      norm_layer(ngf * mult * 2), activation]
        self.model = nn.Sequential(*model)
@@ -39,16 +39,6 @@ class decoder_2d(nn.Module):
        ### upsample         
        for i in range(n_downsampling):
            mult = 2**(n_downsampling - i)
-            # if i%2 ==0:
-            #     model += [  nn.Upsample(scale_factor = 2, mode='nearest'),
-            #     nn.ReflectionPad2d(1),
-            #     nn.Conv2d(ngf * mult, int(ngf * mult / 2),kernel_size=3, stride=1, padding=0),
-            #     norm_layer(int(ngf * mult / 2)),
-            #     nn.ReLU(True)]
-            # else:
-            #     model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1),
-            #                norm_layer(int(ngf * mult / 2)), activation]
            # model += [  nn.Upsample(scale_factor = 2, mode='nearest'),
            # nn.ReflectionPad2d(1),

--- a/models/video_model.py
+++ b/models/video_model.py
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from .unet_parts import *
 from .pix2pix_model import *

--- a/train/add/train.py
+++ b/train/add/train.py
@@ -2,8 +2,10 @@ import sys
 import os
 import random
 import datetime
+import time
 import numpy as np
+from matplotlib import pyplot as plt
 import cv2
 import torch
@@ -11,137 +13,144 @@ import torch.backends.cudnn as cudnn
 import torch.nn as nn
 from torch import optim
-import sys
 sys.path.append("..")
 sys.path.append("../..")
+from cores import Options
 from util import mosaic,util,ffmpeg,filt,data
 from util import image_processing as impro
-from models import unet_model
+from models import unet_model,BiSeNet_model
-from matplotlib import pyplot as plt
-import torch.backends.cudnn as cudnn
+'''
-LR = 0.0002
+--------------------------Get options--------------------------
-EPOCHS = 100
+'''
-BATCHSIZE = 16
+opt = Options()
-LOADSIZE = 256
+opt.parser.add_argument('--gpu_id',type=int,default=0, help='')
-FINESIZE = 224
+opt.parser.add_argument('--lr',type=float,default=0.001, help='')
-CONTINUE = True
+opt.parser.add_argument('--finesize',type=int,default=360, help='')
-use_gpu = True
+opt.parser.add_argument('--loadsize',type=int,default=400, help='')
-SAVE_FRE = 1
+opt.parser.add_argument('--batchsize',type=int,default=8, help='')
-MAX_LOAD = 30000
+opt.parser.add_argument('--model',type=str,default='BiSeNet', help='BiSeNet or UNet')
+opt.parser.add_argument('--maxepoch',type=int,default=100, help='')
+opt.parser.add_argument('--savefreq',type=int,default=5, help='')
-dir_img = './datasets/face/origin_image/'
+opt.parser.add_argument('--maxload',type=int,default=1000000, help='')
-dir_mask = './datasets/face/mask/'
+opt.parser.add_argument('--continuetrain', action='store_true', help='')
-dir_checkpoint = 'checkpoints/face/'
+opt.parser.add_argument('--startepoch',type=int,default=0, help='')
+opt.parser.add_argument('--dataset',type=str,default='./datasets/face/', help='')
+opt.parser.add_argument('--savename',type=str,default='face', help='')
+'''
+--------------------------Init--------------------------
+'''
+opt = opt.getparse()
+dir_img = os.path.join(opt.dataset,'origin_image')
+dir_mask = os.path.join(opt.dataset,'mask')
+dir_checkpoint = os.path.join('checkpoints/',opt.savename)
+util.makedirs(dir_checkpoint)
+util.writelog(os.path.join(dir_checkpoint,'loss.txt'), 
+              str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt))
+torch.cuda.set_device(opt.gpu_id)
 def Totensor(img,use_gpu=True):
    size=img.shape[0]
    img = torch.from_numpy(img).float()
-    if use_gpu:
+    if opt.use_gpu:
        img = img.cuda()
    return img
+def loadimage(imagepaths,maskpaths,opt,test_flag = False):
-def Toinputshape(imgs,masks,finesize,test_flag = False):
+    batchsize = len(imagepaths)
-    batchsize = len(imgs)
+    images = np.zeros((batchsize,3,opt.finesize,opt.finesize), dtype=np.float32)
-    result_imgs=[];result_masks=[]
+    masks = np.zeros((batchsize,1,opt.finesize,opt.finesize), dtype=np.float32)
-    for i in range(batchsize):
+    for i in range(len(imagepaths)):
-        # print(imgs[i].shape,masks[i].shape)
+        img = impro.resize(impro.imread(imagepaths[i]),opt.loadsize)
-        img,mask = data.random_transform_image(imgs[i], masks[i], finesize, test_flag)
+        mask = impro.resize(impro.imread(maskpaths[i],mod = 'gray'),opt.loadsize)      
-        # print(img.shape,mask.shape)
+        img,mask = data.random_transform_image(img, mask, opt.finesize, test_flag)
-        mask = (mask.reshape(1,finesize,finesize)/255.0)
+        images[i] = (img.transpose((2, 0, 1))/255.0)
-        img = (img.transpose((2, 0, 1))/255.0)
+        masks[i] = (mask.reshape(1,1,opt.finesize,opt.finesize)/255.0)
-        result_imgs.append(img)
+    images = Totensor(images,opt.use_gpu)
-        result_masks.append(mask)
+    masks = Totensor(masks,opt.use_gpu)
-    result_imgs = np.array(result_imgs)
-    result_masks  = np.array(result_masks)
+    return images,masks
-    return result_imgs,result_masks
-def batch_generator(images,masks,batchsize):
+'''
-    dataset_images = []
+--------------------------checking dataset--------------------------
-    dataset_masks = []
+'''
+print('checking dataset...')
-    for i in range(int(len(images)/batchsize)):
+imagepaths = sorted(util.Traversal(dir_img))[:opt.maxload]
-        dataset_images.append(images[i*batchsize:(i+1)*batchsize])
+maskpaths = sorted(util.Traversal(dir_mask))[:opt.maxload]
-        dataset_masks.append(masks[i*batchsize:(i+1)*batchsize])
+data.shuffledata(imagepaths, maskpaths)
-    if len(images)%batchsize != 0:
+if len(imagepaths) != len(maskpaths) :
-        dataset_images.append(images[len(images)-len(images)%batchsize:])
+    print('dataset error!')
-        dataset_masks.append(masks[len(images)-len(images)%batchsize:])
+    exit(0)
+img_num = len(imagepaths)
-    return dataset_images,dataset_masks
+print('find images:',img_num)
+imagepaths_train = (imagepaths[0:int(img_num*0.8)]).copy()
-def loadimage(dir_img,dir_mask,loadsize,eval_p):
+maskpaths_train = (maskpaths[0:int(img_num*0.8)]).copy()
-    t1 = datetime.datetime.now()
+imagepaths_eval = (imagepaths[int(img_num*0.8):]).copy()
-    imgnames = os.listdir(dir_img)
+maskpaths_eval = (maskpaths[int(img_num*0.8):]).copy()
-    # imgnames = imgnames[:100]   
-    random.shuffle(imgnames)
+'''
-    imgnames = imgnames[:MAX_LOAD]
+--------------------------def network--------------------------
-    print('load images:',len(imgnames))
+'''
-    imgnames = (f[:-4] for f in imgnames)
+if opt.model =='UNet':
-    images = []
+    net = unet_model.UNet(n_channels = 3, n_classes = 1)
-    masks = []
+elif opt.model =='BiSeNet':
-    for imgname in imgnames:
+    net = BiSeNet_model.BiSeNet(num_classes=1, context_path='resnet18')
-        img = impro.imread(dir_img+imgname+'.jpg')
-        mask = impro.imread(dir_mask+imgname+'.png',mod = 'gray')
+if opt.continuetrain:
-        img = impro.resize(img,loadsize)
-        mask = impro.resize(mask,loadsize)
-        images.append(img)
-        masks.append(mask)
-    train_images,train_masks = images[0:int(len(masks)*(1-eval_p))],masks[0:int(len(masks)*(1-eval_p))]
-    eval_images,eval_masks = images[int(len(masks)*(1-eval_p)):len(masks)],masks[int(len(masks)*(1-eval_p)):len(masks)]
-    t2 = datetime.datetime.now()
-    print('load data cost time:',(t2 - t1).seconds,'s')
-    return train_images,train_masks,eval_images,eval_masks
-util.makedirs(dir_checkpoint)
-print('loading data......')
-train_images,train_masks,eval_images,eval_masks = loadimage(dir_img,dir_mask,LOADSIZE,0.2)
-dataset_eval_images,dataset_eval_masks = batch_generator(eval_images,eval_masks,BATCHSIZE)
-dataset_train_images,dataset_train_masks = batch_generator(train_images,train_masks,BATCHSIZE)
-net = unet_model.UNet(n_channels = 3, n_classes = 1)
-if CONTINUE:
    if not os.path.isfile(os.path.join(dir_checkpoint,'last.pth')):
-        CONTINUE = False
+        opt.continuetrain = False
        print('can not load last.pth, training on init weight.')
-if CONTINUE:
+if opt.continuetrain:
-    net.load_state_dict(torch.load(dir_checkpoint+'last.pth'))
+    net.load_state_dict(torch.load(os.path.join(dir_checkpoint,'last.pth')))
-if use_gpu:
+    f = open(os.path.join(dir_checkpoint,'epoch_log.txt'),'r')
+    opt.startepoch = int(f.read())
+    f.close()
+if opt.use_gpu:
    net.cuda()
    cudnn.benchmark = True
+optimizer = torch.optim.Adam(net.parameters(), lr=opt.lr)
-optimizer = torch.optim.Adam(net.parameters(), lr=LR, betas=(0.9, 0.999))
+if opt.model =='UNet':
+    criterion = nn.BCELoss()
-criterion = nn.BCELoss()
+elif opt.model =='BiSeNet':
-# criterion = nn.L1Loss()
+    criterion = nn.BCELoss()
+    # criterion = BiSeNet_model.DiceLoss()
+'''
+--------------------------train--------------------------
+'''
+loss_plot = {'train':[],'eval':[]}
 print('begin training......')
-for epoch in range(EPOCHS):
+for epoch in range(opt.startepoch,opt.maxepoch):
-    random_save = random.randint(0, len(dataset_train_images))
+    random_save = random.randint(0, int(img_num*0.8/opt.batchsize))
+    data.shuffledata(imagepaths_train, maskpaths_train)
    starttime = datetime.datetime.now()
-    print('Epoch {}/{}.'.format(epoch + 1, EPOCHS))
+    util.writelog(os.path.join(dir_checkpoint,'loss.txt'),'Epoch {}/{}.'.format(epoch + 1, opt.maxepoch),True)
    net.train()
-    if use_gpu:
+    if opt.use_gpu:
        net.cuda()
    epoch_loss = 0
-    for i,(img,mask) in enumerate(zip(dataset_train_images,dataset_train_masks)):
+    for i in range(int(img_num*0.8/opt.batchsize)):
-        # print(epoch,i,img.shape,mask.shape)
+        img,mask = loadimage(imagepaths_train[i*opt.batchsize:(i+1)*opt.batchsize], maskpaths_train[i*opt.batchsize:(i+1)*opt.batchsize], opt)
-        img,mask = Toinputshape(img, mask, FINESIZE)
-        img = Totensor(img,use_gpu)
-        mask = Totensor(mask,use_gpu)
-        mask_pred = net(img)
+        if opt.model =='UNet':
-        loss = criterion(mask_pred, mask)
+            mask_pred = net(img)
-        epoch_loss += loss.item()
+            loss = criterion(mask_pred, mask)
+            epoch_loss += loss.item()
+        elif opt.model =='BiSeNet':
+            mask_pred, mask_pred_sup1, mask_pred_sup2 = net(img)
+            loss1 = criterion(mask_pred, mask)
+            loss2 = criterion(mask_pred_sup1, mask)
+            loss3 = criterion(mask_pred_sup2, mask)
+            loss = loss1 + loss2 + loss3
+            epoch_loss += loss1.item()
        optimizer.zero_grad()
        loss.backward()
@@ -151,30 +160,47 @@ for epoch in range(EPOCHS):
            data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'result.png'),True)
        if  i == random_save:
            data.showresult(img,mask,mask_pred,os.path.join(dir_checkpoint,'epoch_'+str(epoch+1)+'.png'),True)
+    epoch_loss = epoch_loss/int(img_num*0.8/opt.batchsize)
+    loss_plot['train'].append(epoch_loss)
-    # torch.cuda.empty_cache()
+    #val
-    # # net.eval()
    epoch_loss_eval = 0
    with torch.no_grad():
-    #net.eval()
+    # net.eval()
-        for i,(img,mask) in enumerate(zip(dataset_eval_images,dataset_eval_masks)):
+        for i in range(int(img_num*0.2/opt.batchsize)):
-            # print(epoch,i,img.shape,mask.shape)
+            img,mask = loadimage(imagepaths_eval[i*opt.batchsize:(i+1)*opt.batchsize], maskpaths_eval[i*opt.batchsize:(i+1)*opt.batchsize], opt,test_flag=True)
-            img,mask = Toinputshape(img, mask, FINESIZE,test_flag=True)
+            if opt.model =='UNet':
-            img = Totensor(img,use_gpu)
+                mask_pred = net(img)
-            mask = Totensor(mask,use_gpu)
+            elif opt.model =='BiSeNet':
-            mask_pred = net(img)
+                mask_pred, _, _ = net(img)
-            loss = criterion(mask_pred, mask)
+            # mask_pred = net(img)
+            loss= criterion(mask_pred, mask)
            epoch_loss_eval += loss.item()
+    epoch_loss_eval = epoch_loss_eval/int(img_num*0.2/opt.batchsize)
+    loss_plot['eval'].append(epoch_loss_eval)
    # torch.cuda.empty_cache()
+    #savelog
    endtime = datetime.datetime.now()
-    print('--- Epoch train_loss: {0:.6f} eval_loss: {1:.6f} Cost time: {2:} s'.format(
+    util.writelog(os.path.join(dir_checkpoint,'loss.txt'),
-        epoch_loss/len(dataset_train_images),
+                '--- Epoch train_loss: {0:.6f} eval_loss: {1:.6f} Cost time: {2:} s'.format(
-        epoch_loss_eval/len(dataset_eval_images),
+                    epoch_loss,
-        (endtime - starttime).seconds)),
+                    epoch_loss_eval,
-    torch.save(net.cpu().state_dict(),dir_checkpoint+'last.pth')
+                    (endtime - starttime).seconds),
+                True)
-    if (epoch+1)%SAVE_FRE == 0:
+    #plot
-        torch.save(net.cpu().state_dict(),dir_checkpoint+'epoch'+str(epoch+1)+'.pth')
+    plt.plot(np.linspace(opt.startepoch+1,epoch+1,epoch+1-opt.startepoch),loss_plot['train'],label='train')
+    plt.plot(np.linspace(opt.startepoch+1,epoch+1,epoch+1-opt.startepoch),loss_plot['eval'],label='eval')
+    plt.xlabel('Epoch')
+    plt.ylabel('BCELoss')
+    plt.legend(loc=1)
+    plt.savefig(os.path.join(dir_checkpoint,'loss.jpg'))
+    plt.close()
+    #save network
+    torch.save(net.cpu().state_dict(),os.path.join(dir_checkpoint,'last.pth'))
+    f = open(os.path.join(dir_checkpoint,'epoch_log.txt'),'w+')
+    f.write(str(epoch+1))
+    f.close()
+    if (epoch+1)%opt.savefreq == 0:
+        torch.save(net.cpu().state_dict(),os.path.join(dir_checkpoint,'epoch'+str(epoch+1)+'.pth'))
        print('network saved.')
--- a/train/clean/train.py
+++ b/train/clean/train.py
@@ -21,6 +21,7 @@ import torch.backends.cudnn as cudnn
 '''
 opt = Options()
+opt.parser.add_argument('--gpu_id',type=int,default=0, help='')
 opt.parser.add_argument('--N',type=int,default=25, help='')
 opt.parser.add_argument('--lr',type=float,default=0.0002, help='')
 opt.parser.add_argument('--beta1',type=float,default=0.5, help='')
@@ -32,14 +33,15 @@ opt.parser.add_argument('--lambda_gan',type=float,default=1, help='')
 opt.parser.add_argument('--finesize',type=int,default=256, help='')
 opt.parser.add_argument('--loadsize',type=int,default=286, help='')
 opt.parser.add_argument('--batchsize',type=int,default=1, help='')
-opt.parser.add_argument('--perload_num',type=int,default=16, help='')
+opt.parser.add_argument('--perload_num',type=int,default=16, help='number of images pool')
 opt.parser.add_argument('--norm',type=str,default='instance', help='')
+opt.parser.add_argument('--dataset',type=str,default='./datasets/face/', help='')
 opt.parser.add_argument('--maxiter',type=int,default=10000000, help='')
 opt.parser.add_argument('--savefreq',type=int,default=10000, help='')
 opt.parser.add_argument('--startiter',type=int,default=0, help='')
 opt.parser.add_argument('--continuetrain', action='store_true', help='')
-opt.parser.add_argument('--savename',type=str,default='MosaicNet', help='')
+opt.parser.add_argument('--savename',type=str,default='face', help='')
 '''
@@ -50,19 +52,27 @@ dir_checkpoint = os.path.join('checkpoints/',opt.savename)
 util.makedirs(dir_checkpoint)
 util.writelog(os.path.join(dir_checkpoint,'loss.txt'), 
              str(time.asctime(time.localtime(time.time())))+'\n'+util.opt2str(opt))
+torch.cuda.set_device(opt.gpu_id)
 N = opt.N
 loss_sum = [0.,0.,0.,0.]
 loss_plot = [[],[]]
 item_plot = []
-videos = os.listdir('./dataset')
+# list video dir 
-videos.sort()
+videonames = os.listdir(opt.dataset)
-lengths = []
+videonames.sort()
-print('check dataset...')
+lengths = [];tmp = []
-for video in videos:
+print('Check dataset...')
-    video_images = os.listdir('./dataset/'+video+'/ori')
+for video in videonames:
-    lengths.append(len(video_images))
+    if video != 'opt.txt':
+        video_images = os.listdir(os.path.join(opt.dataset,video,'origin_image'))
+        lengths.append(len(video_images))
+        tmp.append(video)
+videonames = tmp
+video_num = len(videonames)
+#def network
+print('Init network...')
 if opt.hd:
    netG = videoHD_model.MosaicNet(3*N+1, 3, norm=opt.norm)
 else:
@@ -71,7 +81,8 @@ loadmodel.show_paramsnumber(netG,'netG')
 if opt.gan:
    if opt.hd:
-        netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=2)    
+        #netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=1)
+        netD = pix2pixHD_model.define_D(6, 64, 3, norm = opt.norm, use_sigmoid=False, num_D=2,getIntermFeat=True)    
    else:
        netD = pix2pix_model.define_D(3*2, 64, 'basic', norm = opt.norm)
    netD.train()
@@ -106,36 +117,38 @@ if opt.use_gpu:
    cudnn.benchmark = True
 '''
--------------------------preload data--------------------------
+--------------------------preload data & data pool--------------------------
 '''
-def loaddata():
+def loaddata(video_index):
-    video_index = random.randint(0,len(videos)-1)
-    video = videos[video_index]
+    videoname = videonames[video_index]
    img_index = random.randint(int(N/2)+1,lengths[video_index]- int(N/2)-1)
    input_img = np.zeros((opt.loadsize,opt.loadsize,3*N+1), dtype='uint8')
+    # this frame
+    this_mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index)+'.png'),'gray',loadsize=opt.loadsize)
+    input_img[:,:,-1] = this_mask
+    #print(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg'))
+    ground_true =  impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index)+'.jpg'),loadsize=opt.loadsize)
+    mosaic_size,mod,rect_rat,father = mosaic.get_random_parameter(ground_true,this_mask)
+    # merge other frame
    for i in range(0,N):
+        img =  impro.imread(os.path.join(opt.dataset,videoname,'origin_image','%05d'%(img_index+i-int(N/2))+'.jpg'),loadsize=opt.loadsize)
-        img = cv2.imread('./dataset/'+video+'/mosaic/output_'+'%05d'%(img_index+i-int(N/2))+'.png')
+        mask = impro.imread(os.path.join(opt.dataset,videoname,'mask','%05d'%(img_index+i-int(N/2))+'.png'),'gray',loadsize=opt.loadsize)
-        img = impro.resize(img,opt.loadsize)
+        img_mosaic = mosaic.addmosaic_base(img, mask, mosaic_size,model = mod,rect_rat=rect_rat,father=father)
-        input_img[:,:,i*3:(i+1)*3] = img
+        input_img[:,:,i*3:(i+1)*3] = img_mosaic
-    mask = cv2.imread('./dataset/'+video+'/mask/output_'+'%05d'%(img_index)+'.png',0)
+    # to tensor
-    mask = impro.resize(mask,opt.loadsize)
-    mask = impro.mask_threshold(mask,15,128)
-    input_img[:,:,-1] = mask
-    ground_true = cv2.imread('./dataset/'+video+'/ori/output_'+'%05d'%(img_index)+'.png')
-    ground_true = impro.resize(ground_true,opt.loadsize)
    input_img,ground_true = data.random_transform_video(input_img,ground_true,opt.finesize,N)
    input_img = data.im2tensor(input_img,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False)
    ground_true = data.im2tensor(ground_true,bgr2rgb=False,use_gpu=opt.use_gpu,use_transform = False,is0_1=False)
    return input_img,ground_true
-print('preloading data, please wait 5s...')
+print('Preloading data, please wait...')
 if opt.perload_num <= opt.batchsize:
    opt.perload_num = opt.batchsize*2
+#data pool
 input_imgs = torch.rand(opt.perload_num,N*3+1,opt.finesize,opt.finesize).cuda()
 ground_trues = torch.rand(opt.perload_num,3,opt.finesize,opt.finesize).cuda()
 load_cnt = 0
@@ -144,14 +157,15 @@ def preload():
    global load_cnt   
    while 1:
        try:
+            video_index = random.randint(0,video_num-1)
            ran = random.randint(0, opt.perload_num-1)
-            input_imgs[ran],ground_trues[ran] = loaddata()
+            input_imgs[ran],ground_trues[ran] = loaddata(video_index)
            load_cnt += 1
            # time.sleep(0.1)
        except Exception as e:
            print("error:",e)
 import threading
-t = threading.Thread(target=preload,args=())  #t为新创建的线程
+t = threading.Thread(target=preload,args=()) 
 t.daemon = True
 t.start()
 time_start=time.time()

--- a/util/data.py
+++ b/util/data.py
@@ -3,7 +3,7 @@ import numpy as np
 import torch
 import torchvision.transforms as transforms
 import cv2
-from .image_processing import color_adjust
+from .image_processing import color_adjust,dctblur
 transform = transforms.Compose([  
    transforms.ToTensor(),  
@@ -61,6 +61,11 @@ def im2tensor(image_numpy, imtype=np.uint8, gray=False,bgr2rgb = True, reshape =
        image_tensor = image_tensor.cuda()
    return image_tensor
+def shuffledata(data,target):
+    state = np.random.get_state()
+    np.random.shuffle(data)
+    np.random.set_state(state)
+    np.random.shuffle(target)
 def random_transform_video(src,target,finesize,N):
@@ -78,8 +83,8 @@ def random_transform_video(src,target,finesize,N):
        target = target[:,::-1,:]
    #random color
-    alpha = random.uniform(-0.3,0.3)
+    alpha = random.uniform(-0.1,0.1)
-    beta  = random.uniform(-0.2,0.2)
+    beta  = random.uniform(-0.1,0.1)
    b     = random.uniform(-0.05,0.05)
    g     = random.uniform(-0.05,0.05)
    r     = random.uniform(-0.05,0.05)
@@ -87,39 +92,54 @@ def random_transform_video(src,target,finesize,N):
        src[:,:,i*3:(i+1)*3] = color_adjust(src[:,:,i*3:(i+1)*3],alpha,beta,b,g,r)
    target = color_adjust(target,alpha,beta,b,g,r)
-    # random_num = 15
+    #random blur
-    # bright = random.randint(-random_num*2,random_num*2)
+    if random.random()<0.5:
-    # for i in range(N*3): src[:,:,i]=np.clip(src[:,:,i].astype('int')+bright,0,255).astype('uint8')
+        interpolations = [cv2.INTER_LINEAR,cv2.INTER_CUBIC,cv2.INTER_LANCZOS4]
-    # for i in range(3): target[:,:,i]=np.clip(target[:,:,i].astype('int')+bright,0,255).astype('uint8')
+        size_ran = random.uniform(0.7,1.5)
+        interpolation_up = interpolations[random.randint(0,2)]
-    return src,target
+        interpolation_down =interpolations[random.randint(0,2)]
+        tmp = cv2.resize(src[:,:,:3*N], (int(finesize*size_ran),int(finesize*size_ran)),interpolation=interpolation_up)
+        src[:,:,:3*N] = cv2.resize(tmp, (finesize,finesize),interpolation=interpolation_down)
-def random_transform_image(img,mask,finesize,test_flag = False):
+        tmp = cv2.resize(target, (int(finesize*size_ran),int(finesize*size_ran)),interpolation=interpolation_up)
+        target = cv2.resize(tmp, (finesize,finesize),interpolation=interpolation_down)
-    # randomsize = int(finesize*(1.2+0.2*random.random())+2)
+    return src,target
+def random_transform_single(img,out_shape):
+    out_h,out_w = out_shape
+    img = cv2.resize(img,(int(out_w*random.uniform(1.1, 1.5)),int(out_h*random.uniform(1.1, 1.5))))
    h,w = img.shape[:2]
-    loadsize = min((h,w))
+    h_move = int((h-out_h)*random.random())
-    a = (float(h)/float(w))*random.uniform(0.9, 1.1)
+    w_move = int((w-out_w)*random.random())
+    img = img[h_move:h_move+out_h,w_move:w_move+out_w]
-    if h<w:
+    if random.random()<0.5:
-        mask = cv2.resize(mask, (int(loadsize/a),loadsize))
+        if random.random()<0.5:
-        img = cv2.resize(img, (int(loadsize/a),loadsize))
+            img = img[:,::-1]
-    else:
+        else:
-        mask = cv2.resize(mask, (loadsize,int(loadsize*a)))
+            img = img[::-1,:]
-        img = cv2.resize(img, (loadsize,int(loadsize*a)))
+    if img.shape[0] != out_h or img.shape[1]!= out_w :
+        img = cv2.resize(img,(out_w,out_h))
-    # mask = randomsize(mask,loadsize)
+    return img
-    # img = randomsize(img,loadsize)
+def random_transform_image(img,mask,finesize,test_flag = False):
+    #random scale
+    if random.random()<0.5:
+        h,w = img.shape[:2]
+        loadsize = min((h,w))
+        a = (float(h)/float(w))*random.uniform(0.9, 1.1)
+        if h<w:
+            mask = cv2.resize(mask, (int(loadsize/a),loadsize))
+            img = cv2.resize(img, (int(loadsize/a),loadsize))
+        else:
+            mask = cv2.resize(mask, (loadsize,int(loadsize*a)))
+            img = cv2.resize(img, (loadsize,int(loadsize*a)))
    #random crop
    h,w = img.shape[:2]
    h_move = int((h-finesize)*random.random())
    w_move = int((w-finesize)*random.random())
-    # print(h,w,h_move,w_move)
    img_crop = img[h_move:h_move+finesize,w_move:w_move+finesize]
    mask_crop = mask[h_move:h_move+finesize,w_move:w_move+finesize]
@@ -137,10 +157,6 @@ def random_transform_image(img,mask,finesize,test_flag = False):
    #random color
    img = color_adjust(img,ran=True)
-    # random_num = 15
-    # for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+random.randint(-random_num,random_num),0,255).astype('uint8')
-    # bright = random.randint(-random_num*2,random_num*2)
-    # for i in range(3): img[:,:,i]=np.clip(img[:,:,i].astype('int')+bright,0,255).astype('uint8')
    #random flip
    if random.random()<0.5:
@@ -152,11 +168,19 @@ def random_transform_image(img,mask,finesize,test_flag = False):
            mask = mask[::-1,:]
    #random blur
-    if random.random()>0.5:
+    if random.random()<0.5:
-        size_ran = random.uniform(0.5,1.5)
+        img = dctblur(img,random.randint(1,15))
-        img = cv2.resize(img, (int(finesize*size_ran),int(finesize*size_ran)))
-        img = cv2.resize(img, (finesize,finesize))
+        # interpolations = [cv2.INTER_LINEAR,cv2.INTER_CUBIC,cv2.INTER_LANCZOS4]
-        #img = cv2.blur(img, (random.randint(1,3), random.randint(1,3)))
+        # size_ran = random.uniform(0.7,1.5)
+        # img = cv2.resize(img, (int(finesize*size_ran),int(finesize*size_ran)),interpolation=interpolations[random.randint(0,2)])
+        # img = cv2.resize(img, (finesize,finesize),interpolation=interpolations[random.randint(0,2)])
+    #check shape
+    if img.shape[0]!= finesize or img.shape[1]!= finesize or mask.shape[0]!= finesize or mask.shape[1]!= finesize:
+        img = cv2.resize(img,(finesize,finesize))
+        mask = cv2.resize(mask,(finesize,finesize))
+        print('warning! shape error.')
    return img,mask
 def showresult(img1,img2,img3,name,is0_1 = False):

--- a/util/ffmpeg.py
+++ b/util/ffmpeg.py
@@ -2,11 +2,18 @@ import os,json
 # ffmpeg 3.4.6
-def video2image(videopath,imagepath,fps=0):
+def video2image(videopath,imagepath,fps=0,start_time=0,last_time=0):
-    if fps == 0:
+    if start_time == 0:
-        os.system('ffmpeg -i "'+videopath+'" -f image2 '+imagepath)
+        if fps == 0:
+            os.system('ffmpeg -i "'+videopath+'" -f image2 '+'-q:v -0 '+imagepath)
+        else:
+            os.system('ffmpeg -i "'+videopath+'" -r '+str(fps)+' -f image2 '+'-q:v -0 '+imagepath)
    else:
-        os.system('ffmpeg -i "'+videopath+'" -r '+str(fps)+' -f image2 '+imagepath)
+        if fps == 0:
+            os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+videopath+'" -f image2 '+'-q:v -0 '+imagepath)
+        else:
+            os.system('ffmpeg -ss '+start_time+' -t '+last_time+' -i "'+videopath+'" -r '+str(fps)+' -f image2 '+'-q:v -0 '+imagepath)
 def video2voice(videopath,voicepath):
    os.system('ffmpeg -i "'+videopath+'" -f mp3 '+voicepath)
@@ -53,4 +60,4 @@ def continuous_screenshot(videopath,savedir,fps):
    fps:       save how many images per second
    '''
    videoname = os.path.splitext(os.path.basename(videopath))[0]
-    os.system('ffmpeg -i "'+videopath+'" -vf fps='+str(fps)+' '+savedir+'/'+videoname+'_%05d.jpg')
+    os.system('ffmpeg -i "'+videopath+'" -vf fps='+str(fps)+' -q:v -0 '+savedir+'/'+videoname+'_%06d.jpg')
--- a/util/image_processing.py
+++ b/util/image_processing.py
@@ -3,13 +3,24 @@ import numpy as np
 import random
 import platform
 system_type = 'Linux'
 if 'Windows' in platform.platform():
    system_type = 'Windows'
-def imread(file_path,mod = 'normal'):
+DCT_Q = np.array([[8,16,19,22,26,27,29,34],
+                [16,16,22,24,27,29,34,37],
+                [19,22,26,27,29,34,34,38],
+                [22,22,26,27,29,34,37,40],
+                [22,26,27,29,32,35,40,48],
+                [26,27,29,32,35,40,48,58],
+                [26,27,29,34,38,46,56,59],
+                [27,29,35,38,46,56,69,83]])
+def imread(file_path,mod = 'normal',loadsize = 0):
    '''
-    mod = 'normal' | 'gray' | 'all'
+    mod:  'normal' | 'gray' | 'all'
+    loadsize: 0->original
    '''
    if system_type == 'Linux':
        if mod == 'normal':
@@ -26,6 +37,9 @@ def imread(file_path,mod = 'normal'):
            img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),0)
        else:
            img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1)
+    if loadsize != 0:
+        img = resize(img, loadsize, interpolation=cv2.INTER_CUBIC)
    return img
@@ -40,6 +54,13 @@ def imwrite(file_path,img):
        cv2.imencode('.jpg', img)[1].tofile(file_path)
 def resize(img,size,interpolation=cv2.INTER_LINEAR):
+    '''
+    cv2.INTER_NEAREST      最邻近插值点法
+    cv2.INTER_LINEAR        双线性插值法
+    cv2.INTER_AREA         邻域像素再取样插补
+    cv2.INTER_CUBIC        双立方插补，4*4大小的补点
+    cv2.INTER_LANCZOS4     8x8像素邻域的Lanczos插值
+    '''
    h, w = img.shape[:2]
    if np.min((w,h)) ==size:
        return img
@@ -55,8 +76,6 @@ def resize_like(img,img_like):
    return img
 def ch_one2three(img):
-    #zeros = np.zeros(img.shape[:2], dtype = "uint8")
-    # ret,thresh = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
    res = cv2.merge([img, img, img])
    return res
@@ -78,11 +97,11 @@ def color_adjust(img,alpha=1,beta=0,b=0,g=0,r=0,ran = False):
    '''
    img = img.astype('float')
    if ran:
-        alpha = random.uniform(-0.2,0.2)
+        alpha = random.uniform(-0.1,0.1)
-        beta  = random.uniform(-0.2,0.2)
+        beta  = random.uniform(-0.1,0.1)
-        b     = random.uniform(-0.1,0.1)
+        b     = random.uniform(-0.05,0.05)
-        g     = random.uniform(-0.1,0.1)
+        g     = random.uniform(-0.05,0.05)
-        r     = random.uniform(-0.1,0.1)
+        r     = random.uniform(-0.05,0.05)
    img = (1+alpha)*img+255.0*beta
    bgr = [b*255.0,g*255.0,r*255.0]
    for i in range(3): img[:,:,i]=img[:,:,i]+bgr[i]
@@ -98,14 +117,6 @@ def makedataset(target_image,orgin_image):
    img[0:256,256:512] = orgin_image[0:256,int(w/2-256/2):int(w/2+256/2)]
    return img
-def image2folat(img,ch):
-    size=img.shape[0]
-    if ch == 1:
-        img = (img[:,:,0].reshape(1,size,size)/255.0).astype(np.float32)
-    else:
-        img = (img.transpose((2, 0, 1))/255.0).astype(np.float32)
-    return img
 def spiltimage(img,size = 128):
    h, w = img.shape[:2]
    # size = min(h,w)
@@ -133,6 +144,34 @@ def mergeimage(img1,img2,orgin_image,size = 128):
    result_img = cv2.add(new_img1,new_img2)
    return result_img
+def block_dct_and_idct(g,QQF):
+    T = cv2.dct(g)
+    IT = np.round(cv2.idct(np.round(np.round(16.0*T/QQF)*QQF/16)))
+    return IT
+def image_dct_and_idct(I,QF):
+    h,w = I.shape
+    QQF = DCT_Q*QF
+    for i in range(int(h/8)):
+        for j in range(int(w/8)):
+            I[i*8:(i+1)*8,j*8:(j+1)*8] = block_dct_and_idct(I[i*8:(i+1)*8,j*8:(j+1)*8],QQF)
+    return I
+def dctblur(img,Q):
+    '''
+    Q: 1~20, 1->best
+    '''
+    h,w = img.shape[:2]
+    img[:8*int(h/8),:8*int(w/8)]
+    img = img.astype(np.float32)
+    if img.ndim == 2:
+        img = image_dct_and_idct(img, Q)
+    if img.ndim == 3:
+        h,w,ch = img.shape
+        for i in range(ch):
+            img[:,:,i] = image_dct_and_idct(img[:,:,i], Q)
+    return (np.clip(img,0,255)).astype(np.uint8)
 def find_mostlikely_ROI(mask):
    contours,hierarchy=cv2.findContours(mask, cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
    if len(contours)>0:
@@ -199,8 +238,20 @@ def mask_area(mask):
    return area
-def replace_mosaic(img_origin,img_fake,x,y,size,no_father):
+def Q_lapulase(resImg):
-    img_fake = resize(img_fake,size*2,interpolation=cv2.INTER_LANCZOS4)
+    '''
+    Evaluate image quality
+    score > 20   normal
+    score > 50   clear
+    '''
+    img2gray = cv2.cvtColor(resImg, cv2.COLOR_BGR2GRAY)
+    img2gray = resize(img2gray,512)
+    res = cv2.Laplacian(img2gray, cv2.CV_64F)
+    score = res.var()
+    return score
+def replace_mosaic(img_origin,img_fake,mask,x,y,size,no_father):
+    img_fake = cv2.resize(img_fake,(size*2,size*2),interpolation=cv2.INTER_LANCZOS4)
    if no_father:
        img_origin[y-size:y+size,x-size:x+size]=img_fake
        img_result = img_origin
@@ -212,13 +263,20 @@ def replace_mosaic(img_origin,img_fake,x,y,size,no_father):
        #eclosion
        eclosion_num = int(size/5)
        entad = int(eclosion_num/2+2)
-        mask = np.zeros(img_origin.shape, dtype='uint8')
-        mask = cv2.rectangle(mask,(x-size+entad,y-size+entad),(x+size-entad,y+size-entad),(255,255,255),-1)
+        # mask = np.zeros(img_origin.shape, dtype='uint8')
+        # mask = cv2.rectangle(mask,(x-size+entad,y-size+entad),(x+size-entad,y+size-entad),(255,255,255),-1)
+        mask = cv2.resize(mask,(img_origin.shape[1],img_origin.shape[0]))
+        mask = ch_one2three(mask)
        mask = (cv2.blur(mask, (eclosion_num, eclosion_num)))
-        mask = mask/255.0
+        mask_tmp = np.zeros_like(mask)
+        mask_tmp[y-size:y+size,x-size:x+size] = mask[y-size:y+size,x-size:x+size]# Fix edge overflow
+        mask = mask_tmp/255.0
        img_tmp = np.zeros(img_origin.shape)
        img_tmp[y-size:y+size,x-size:x+size]=img_fake
        img_result = img_origin.copy()
        img_result = (img_origin*(1-mask)+img_tmp*mask).astype('uint8')
    return img_result
\ No newline at end of file
--- a/util/mosaic.py
+++ b/util/mosaic.py
@@ -10,10 +10,19 @@ def addmosaic(img,mask,opt):
    elif opt.mosaic_size == 0:
        img = addmosaic_autosize(img, mask, opt.mosaic_mod)
    else:
-        img = addmosaic_normal(img,mask,opt.mosaic_size,opt.output_size,model = opt.mosaic_mod)
+        img = addmosaic_base(img,mask,opt.mosaic_size,opt.output_size,model = opt.mosaic_mod)
    return img
-def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6):
+def addmosaic_base(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6,father=0):
+    '''
+    img: input image
+    mask: input mask
+    n: mosaic size
+    out_size: output size  0->original
+    model : squa_avg squa_mid squa_random squa_avg_circle_edge rect_avg
+    rect_rat: if model==rect_avg , mosaic w/h=rect_rat
+    father : father size, -1->no 0->auto
+    '''
    n = int(n)
    if out_size:
        img = resize(img,out_size)      
@@ -44,9 +53,9 @@ def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6):
            for j in range(int(w/n)):
                img_mosaic[i*n:(i+1)*n,j*n:(j+1)*n,:]=img[i*n:(i+1)*n,j*n:(j+1)*n,:].mean(0).mean(0)
        mask = cv2.threshold(mask,127,255,cv2.THRESH_BINARY)[1]
-        mask = ch_one2three(mask)
+        _mask = ch_one2three(mask)
-        mask_inv = cv2.bitwise_not(mask)
+        mask_inv = cv2.bitwise_not(_mask)
-        imgroi1 = cv2.bitwise_and(mask,img_mosaic)
+        imgroi1 = cv2.bitwise_and(_mask,img_mosaic)
        imgroi2 = cv2.bitwise_and(mask_inv,img)
        img_mosaic = cv2.add(imgroi1,imgroi2)
@@ -58,12 +67,21 @@ def addmosaic_normal(img,mask,n,out_size = 0,model = 'squa_avg',rect_rat = 1.6):
                if mask[int(i*n_h+n_h/2),int(j*n_w+n_w/2)] == 255:
                    img_mosaic[i*n_h:(i+1)*n_h,j*n_w:(j+1)*n_w,:]=img[i*n_h:(i+1)*n_h,j*n_w:(j+1)*n_w,:].mean(0).mean(0)
+    if father != -1:
+        if father==0:
+            mask = (cv2.blur(mask, (n, n)))
+        else:
+            mask = (cv2.blur(mask, (father, father)))
+        mask = ch_one2three(mask)/255.0
+        img_mosaic = (img*(1-mask)+img_mosaic*mask).astype('uint8')
    return img_mosaic
 def get_autosize(img,mask,area_type = 'normal'):
    h,w = img.shape[:2]
-    mask = cv2.resize(mask,(w,h))
+    size = np.min([h,w])
-    alpha = np.min((w,h))/512
+    mask = resize(mask,size)
+    alpha = size/512
    try:
        if area_type == 'normal':
            area = mask_area(mask)
@@ -85,66 +103,32 @@ def get_autosize(img,mask,area_type = 'normal'):
        pass
    return size
-def addmosaic_autosize(img,mask,model,area_type = 'normal'):
+def get_random_parameter(img,mask):
-    h,w = img.shape[:2]
+    # mosaic size
-    mask = cv2.resize(mask,(w,h))
+    p = np.array([0.5,0.5])
-    alpha = np.min((w,h))/512
+    mod = np.random.choice(['normal','bounding'], p = p.ravel())
-    try:
+    mosaic_size = get_autosize(img,mask,area_type = mod)
-        if area_type == 'normal':
+    mosaic_size = int(mosaic_size*random.uniform(0.9,2.1))
-            area = mask_area(mask)
-        elif area_type == 'bounding':
-            w,h = cv2.boundingRect(mask)[2:]
-            area = w*h
-    except:
-        area = 0
-    area = area/(alpha*alpha)
-    if area>50000:
-        img_mosaic = addmosaic_normal(img,mask,alpha*((area-50000)/50000+12),model = model)
-    elif 20000<area<=50000:
-        img_mosaic = addmosaic_normal(img,mask,alpha*((area-20000)/30000+8),model = model)
-    elif 5000<area<=20000:
-        img_mosaic = addmosaic_normal(img,mask,alpha*((area-5000)/20000+7),model = model)
-    elif 0<=area<=5000:
-        img_mosaic = addmosaic_normal(img,mask,alpha*((area-0)/5000+6),model = model)
-    else:
-        pass
-    return img_mosaic
-def addmosaic_random(img,mask,area_type = 'normal'):
+    # mosaic mod
-    # img = resize(img,512)
+    p = np.array([0.25, 0.25, 0.1, 0.4])
-    h,w = img.shape[:2]
+    mod = np.random.choice(['squa_mid','squa_avg','squa_avg_circle_edge','rect_avg'], p = p.ravel())
-    mask = cv2.resize(mask,(w,h))
-    alpha = np.min((w,h))/512
+    # rect_rat for rect_avg
-    #area_avg=5925*4
+    rect_rat = random.uniform(1.1,1.6)
-    try:
-        if area_type == 'normal':
+    # father size
-            area = mask_area(mask)
+    father = int(mosaic_size*random.uniform(0,1.5))
-        elif area_type == 'bounding':
-            w,h = cv2.boundingRect(mask)[2:]
+    return mosaic_size,mod,rect_rat,father
-            area = w*h
-    except:
-        area = 0
+def addmosaic_autosize(img,mask,model,area_type = 'normal'):
-    area = area/(alpha*alpha)
+    mosaic_size = get_autosize(img,mask,area_type = 'normal')
-    if area>50000:
+    img_mosaic = addmosaic_base(img,mask,mosaic_size,model = model)
-        img_mosaic = random_mod(img,mask,alpha*random.uniform(8,30)) #16,30
-    elif 20000<area<=50000:
-        img_mosaic = random_mod(img,mask,alpha*random.uniform(8,20)) #12,20
-    elif 5000<area<=20000:
-        img_mosaic = random_mod(img,mask,alpha*random.uniform(8,15))
-    elif 0<=area<=5000:
-        img_mosaic = random_mod(img,mask,alpha*random.uniform(4,10))
-    else:
-        pass
    return img_mosaic
-def random_mod(img,mask,n):
+def addmosaic_random(img,mask):
-    ran=random.random()
+    mosaic_size,mod,rect_rat,father = get_random_parameter(img,mask)
-    if ran < 0.3:
+    img_mosaic = addmosaic_base(img,mask,mosaic_size,model = mod,rect_rat=rect_rat,father=father)
-        img = addmosaic_normal(img,mask,n,model = 'squa_mid')
+    return img_mosaic
-    if 0.3 <= ran < 0.5:
\ No newline at end of file
-        img = addmosaic_normal(img,mask,n,model = 'squa_avg')
-    elif 0.5 <= ran <0.6:
-        img = addmosaic_normal(img,mask,n,model = 'squa_avg_circle_edge')
-    else:
-        img = addmosaic_normal(img,mask,n,model = 'rect_avg')
-    return img
\ No newline at end of file
--- a/util/util.py
+++ b/util/util.py
@@ -40,6 +40,13 @@ def is_videos(paths):
            tmp.append(path)
    return tmp  
+def is_dirs(paths):
+    tmp = []
+    for path in paths:
+        if os.path.isdir(path):
+            tmp.append(path)
+    return tmp  
 def  writelog(path,log,isprint=False):
    f = open(path,'a+')
    f.write(log+'\n')
@@ -72,7 +79,15 @@ def file_init(opt):
    if not os.path.isdir(opt.result_dir):
        os.makedirs(opt.result_dir)
        print('makedir:',opt.result_dir)
-    clean_tempfiles()
+    clean_tempfiles(True)
+def second2stamp(s):
+    h = int(s/3600)
+    s = int(s%3600)
+    m = int(s/60)
+    s = int(s%60)
+    return "%02d:%02d:%02d" % (h, m, s)
 def get_bar(percent,num = 25):
    bar = '['