diff --git a/cores/core.py b/cores/core.py index fab7da2f17a795ba457874efa416c68f2dd7240b..e990a214056287d198a4cc926d0c9ecd3af26176 100644 --- a/cores/core.py +++ b/cores/core.py @@ -244,11 +244,15 @@ def cleanmosaic_video_byframe(opt,netG,netM): def cleanmosaic_video_fusion(opt,netG,netM): path = opt.media_path - N = 25 - if 'HD' in os.path.basename(opt.model_path): - INPUT_SIZE = 256 - else: - INPUT_SIZE = 128 + N,T,S = 2,5,3 + LEFT_FRAME = (N*S) + POOL_NUM = LEFT_FRAME*2+1 + INPUT_SIZE = 256 + FRAME_POS = np.linspace(0, (T-1)*S,T,dtype=np.int64) + img_pool = [] + previous_frame = None + init_flag = True + fps,imagepaths,height,width = video_init(opt,path) positions = get_mosaic_positions(opt,netM,imagepaths,savemask=True) t1 = time.time() @@ -258,37 +262,41 @@ def cleanmosaic_video_fusion(opt,netG,netM): # clean mosaic print('Clean Mosaic:') length = len(imagepaths) - - img_pool = [] - mosaic_input = np.zeros((INPUT_SIZE,INPUT_SIZE,3*N+1), dtype='uint8') - + for i,imagepath in enumerate(imagepaths,0): x,y,size = positions[i][0],positions[i][1],positions[i][2] - + input_stream = [] # image read stream - mask = cv2.imread(os.path.join(opt.temp_dir+'/mosaic_mask',imagepath),0) - if i==0 : - for j in range(0,N): - img_pool.append(impro.imread(os.path.join(opt.temp_dir+'/video2image',imagepaths[np.clip(i+j-12,0,len(imagepaths)-1)]))) - else: + if i==0 :# init + for j in range(POOL_NUM): + img_pool.append(impro.imread(os.path.join(opt.temp_dir+'/video2image',imagepaths[np.clip(i+j-LEFT_FRAME,0,len(imagepaths)-1)]))) + else: # load next frame img_pool.pop(0) - img_pool.append(impro.imread(os.path.join(opt.temp_dir+'/video2image',imagepaths[np.clip(i+12,0,len(imagepaths)-1)]))) - img_origin = img_pool[12] + img_pool.append(impro.imread(os.path.join(opt.temp_dir+'/video2image',imagepaths[np.clip(i+LEFT_FRAME,0,len(imagepaths)-1)]))) + img_origin = img_pool[LEFT_FRAME] img_result = img_origin.copy() if size>100: try:#Avoid unknown errors - #reshape to network input shape - for k in range(N): - mosaic_input[:,:,k*3:(k+1)*3] = impro.resize(img_pool[k][y-size:y+size,x-size:x+size], INPUT_SIZE) - mask_input = impro.resize(mask,np.min(img_origin.shape[:2]))[y-size:y+size,x-size:x+size] - mosaic_input[:,:,-1] = impro.resize(mask_input, INPUT_SIZE) - mosaic_input_tensor = data.im2tensor(mosaic_input,bgr2rgb=False,gpu_id=opt.gpu_id) - unmosaic_pred = netG(mosaic_input_tensor) - img_fake = data.tensor2im(unmosaic_pred,rgb2bgr = False) + for pos in FRAME_POS: + input_stream.append(impro.resize(img_pool[pos][y-size:y+size,x-size:x+size], INPUT_SIZE)[:,:,::-1]) + if init_flag: + init_flag = False + previous_frame = input_stream[N] + previous_frame = data.im2tensor(previous_frame,bgr2rgb=True,gpu_id=opt.gpu_id) + + input_stream = np.array(input_stream).reshape(1,T,INPUT_SIZE,INPUT_SIZE,3).transpose((0,4,1,2,3)) + input_stream = data.to_tensor(data.normalize(input_stream),gpu_id=opt.gpu_id) + unmosaic_pred = netG(input_stream,previous_frame) + img_fake = data.tensor2im(unmosaic_pred,rgb2bgr = True) + previous_frame = unmosaic_pred + mask = cv2.imread(os.path.join(opt.temp_dir+'/mosaic_mask',imagepath),0) img_result = impro.replace_mosaic(img_origin,img_fake,mask,x,y,size,opt.no_feather) except Exception as e: - print('Warning:',e) + init_flag = True + print('Error:',e) + else: + init_flag = True cv2.imwrite(os.path.join(opt.temp_dir+'/replace_mosaic',imagepath),img_result) os.remove(os.path.join(opt.temp_dir+'/video2image',imagepath)) diff --git a/cores/options.py b/cores/options.py index 58759f8c468c41a5cb3e5fd7e2dd94d2aad8ffca..f1d612ba6e9d79ea0a97e9b88cd4129498ab3118 100644 --- a/cores/options.py +++ b/cores/options.py @@ -11,6 +11,7 @@ class Options(): def initialize(self): #base + self.parser.add_argument('--debug', action='store_true', help='if specified, start debug mode') self.parser.add_argument('--gpu_id', type=str,default='0', help='if -1, use cpu') self.parser.add_argument('--media_path', type=str, default='./imgs/ruoruo.jpg',help='your videos or images path') self.parser.add_argument('-ss', '--start_time', type=str, default='00:00:00',help='start position of video, default is the beginning of video') @@ -58,8 +59,7 @@ class Options(): model_name = os.path.basename(self.opt.model_path) self.opt.temp_dir = os.path.join(self.opt.temp_dir, 'DeepMosaics_temp') - - + if self.opt.gpu_id != '-1': os.environ["CUDA_VISIBLE_DEVICES"] = str(self.opt.gpu_id) import torch @@ -70,53 +70,59 @@ class Options(): if test_flag: if not os.path.exists(self.opt.media_path): - print('Error: Bad media path!') - input('Please press any key to exit.\n') - sys.exit(0) - - if self.opt.mode == 'auto': - if 'clean' in model_name or self.opt.traditional: - self.opt.mode = 'clean' - elif 'add' in model_name: - self.opt.mode = 'add' - elif 'style' in model_name or 'edges' in model_name: - self.opt.mode = 'style' - else: - print('Please input running model!') + print('Error: Media does not exist!') input('Please press any key to exit.\n') sys.exit(0) - - if self.opt.output_size == 0 and self.opt.mode == 'style': - self.opt.output_size = 512 - - if 'edges' in model_name or 'edges' in self.opt.preprocess: - self.opt.edges = True - - if self.opt.netG == 'auto' and self.opt.mode =='clean': - if 'unet_128' in model_name: - self.opt.netG = 'unet_128' - elif 'resnet_9blocks' in model_name: - self.opt.netG = 'resnet_9blocks' - elif 'HD' in model_name and 'video' not in model_name: - self.opt.netG = 'HD' - elif 'video' in model_name: - self.opt.netG = 'video' - else: - print('Type of Generator error!') + if not os.path.exists(self.opt.model_path): + print('Error: Model does not exist!') input('Please press any key to exit.\n') sys.exit(0) - if self.opt.ex_mult == 'auto': - if 'face' in model_name: - self.opt.ex_mult = 1.1 + if self.opt.mode == 'auto': + if 'clean' in model_name or self.opt.traditional: + self.opt.mode = 'clean' + elif 'add' in model_name: + self.opt.mode = 'add' + elif 'style' in model_name or 'edges' in model_name: + self.opt.mode = 'style' + else: + print('Please check model_path!') + input('Please press any key to exit.\n') + sys.exit(0) + + if self.opt.output_size == 0 and self.opt.mode == 'style': + self.opt.output_size = 512 + + if 'edges' in model_name or 'edges' in self.opt.preprocess: + self.opt.edges = True + + if self.opt.netG == 'auto' and self.opt.mode =='clean': + if 'unet_128' in model_name: + self.opt.netG = 'unet_128' + elif 'resnet_9blocks' in model_name: + self.opt.netG = 'resnet_9blocks' + elif 'video' in model_name: + self.opt.netG = 'video' + else: + print('Type of Generator error!') + input('Please press any key to exit.\n') + sys.exit(0) + + if self.opt.ex_mult == 'auto': + if 'face' in model_name: + self.opt.ex_mult = 1.1 + else: + self.opt.ex_mult = 1.5 else: - self.opt.ex_mult = 1.5 - else: - self.opt.ex_mult = float(self.opt.ex_mult) - - if self.opt.mosaic_position_model_path == 'auto': - _path = os.path.join(os.path.split(self.opt.model_path)[0],'mosaic_position.pth') - self.opt.mosaic_position_model_path = _path - # print(self.opt.mosaic_position_model_path) + self.opt.ex_mult = float(self.opt.ex_mult) + + if self.opt.mosaic_position_model_path == 'auto': + _path = os.path.join(os.path.split(self.opt.model_path)[0],'mosaic_position.pth') + if os.path.isfile(_path): + self.opt.mosaic_position_model_path = _path + else: + input('Please check mosaic_position_model_path!') + input('Please press any key to exit.\n') + sys.exit(0) return self.opt \ No newline at end of file diff --git a/deepmosaic.py b/deepmosaic.py index 2571d38df6c1f9d50cc670f84465f47f44fd419d..1d5c4e6530d5bf65e96ee77b3b99133bf23559e0 100644 --- a/deepmosaic.py +++ b/deepmosaic.py @@ -68,8 +68,11 @@ def main(): print('This type of file is not supported') util.clean_tempfiles(opt, tmp_init = False) - + if __name__ == '__main__': + if opt.debug: + main() + sys.exit(0) try: main() print('Finished!') diff --git a/models/loadmodel.py b/models/loadmodel.py index 8e5ed1c543e6f6ee64441addbef98752dd2671ab..16124c469cf098b1d81b352c20d44a0387919ff9 100755 --- a/models/loadmodel.py +++ b/models/loadmodel.py @@ -1,23 +1,23 @@ import torch from . import model_util -from .pix2pix_model import define_G -from .pix2pixHD_model import define_G as define_G_HD -from .video_model import MosaicNet -from .videoHD_model import MosaicNet as MosaicNet_HD +from .pix2pix_model import define_G as pix2pix_G +from .pix2pixHD_model import define_G as pix2pixHD_G +# from .video_model import MosaicNet +# from .videoHD_model import MosaicNet as MosaicNet_HD from .BiSeNet_model import BiSeNet +from .BVDNet import define_G as video_G def show_paramsnumber(net,netname='net'): parameters = sum(param.numel() for param in net.parameters()) parameters = round(parameters/1e6,2) print(netname+' parameters: '+str(parameters)+'M') - def pix2pix(opt): # print(opt.model_path,opt.netG) if opt.netG == 'HD': - netG = define_G_HD(3, 3, 64, 'global' ,4) + netG = pix2pixHD_G(3, 3, 64, 'global' ,4) else: - netG = define_G(3, 3, 64, opt.netG, norm='batch',use_dropout=True, init_type='normal', gpu_ids=[]) + netG = pix2pix_G(3, 3, 64, opt.netG, norm='batch',use_dropout=True, init_type='normal', gpu_ids=[]) show_paramsnumber(netG,'netG') netG.load_state_dict(torch.load(opt.model_path)) netG = model_util.todevice(netG,opt.gpu_id) @@ -27,9 +27,9 @@ def pix2pix(opt): def style(opt): if opt.edges: - netG = define_G(1, 3, 64, 'resnet_9blocks', norm='instance',use_dropout=True, init_type='normal', gpu_ids=[]) + netG = pix2pix_G(1, 3, 64, 'resnet_9blocks', norm='instance',use_dropout=True, init_type='normal', gpu_ids=[]) else: - netG = define_G(3, 3, 64, 'resnet_9blocks', norm='instance',use_dropout=False, init_type='normal', gpu_ids=[]) + netG = pix2pix_G(3, 3, 64, 'resnet_9blocks', norm='instance',use_dropout=False, init_type='normal', gpu_ids=[]) #in other to load old pretrain model #https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/models/base_model.py @@ -51,10 +51,7 @@ def style(opt): return netG def video(opt): - if 'HD' in opt.model_path: - netG = MosaicNet_HD(3*25+1, 3, norm='instance') - else: - netG = MosaicNet(3*25+1, 3,norm = 'batch') + netG = video_G(N=2,n_blocks=1,gpu_id=opt.gpu_id) show_paramsnumber(netG,'netG') netG.load_state_dict(torch.load(opt.model_path)) netG = model_util.todevice(netG,opt.gpu_id) diff --git a/models/videoHD_model.py b/models/videoHD_model.py deleted file mode 100644 index 20e901f2c199e82a9e540b8067fe4917df338bd3..0000000000000000000000000000000000000000 --- a/models/videoHD_model.py +++ /dev/null @@ -1,173 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from .pix2pixHD_model import * - - -class encoder_2d(nn.Module): - def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, - padding_type='reflect'): - assert(n_blocks >= 0) - super(encoder_2d, self).__init__() - activation = nn.ReLU(True) - - model = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0), norm_layer(ngf), activation] - ### downsample - for i in range(n_downsampling): - mult = 2**i - model += [nn.ReflectionPad2d(1),nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=0), - norm_layer(ngf * mult * 2), activation] - - self.model = nn.Sequential(*model) - def forward(self, input): - return self.model(input) - -class decoder_2d(nn.Module): - def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, - padding_type='reflect'): - assert(n_blocks >= 0) - super(decoder_2d, self).__init__() - activation = nn.ReLU(True) - - model = [] - - ### resnet blocks - mult = 2**n_downsampling - for i in range(n_blocks): - model += [ResnetBlock(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer)] - - ### upsample - for i in range(n_downsampling): - mult = 2**(n_downsampling - i) - - # model += [ nn.Upsample(scale_factor = 2, mode='nearest'), - # nn.ReflectionPad2d(1), - # nn.Conv2d(ngf * mult, int(ngf * mult / 2),kernel_size=3, stride=1, padding=0), - # norm_layer(int(ngf * mult / 2)), - # nn.ReLU(True)] - model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1), - norm_layer(int(ngf * mult / 2)), activation] - model += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh()] - self.model = nn.Sequential(*model) - - def forward(self, input): - return self.model(input) - - -class conv_3d(nn.Module): - def __init__(self,inchannel,outchannel,kernel_size=3,stride=2,padding=1,norm_layer_3d=nn.BatchNorm3d,use_bias=True): - super(conv_3d, self).__init__() - self.conv = nn.Sequential( - nn.Conv3d(inchannel, outchannel, kernel_size=kernel_size, stride=stride, padding=padding, bias=use_bias), - norm_layer_3d(outchannel), - nn.ReLU(inplace=True), - ) - - def forward(self, x): - x = self.conv(x) - return x - -class conv_2d(nn.Module): - def __init__(self,inchannel,outchannel,kernel_size=3,stride=1,padding=1,norm_layer_2d=nn.BatchNorm2d,use_bias=True): - super(conv_2d, self).__init__() - self.conv = nn.Sequential( - nn.ReflectionPad2d(padding), - nn.Conv2d(inchannel, outchannel, kernel_size=kernel_size, stride=stride, padding=0, bias=use_bias), - norm_layer_2d(outchannel), - nn.ReLU(inplace=True), - ) - - def forward(self, x): - x = self.conv(x) - return x - - -class encoder_3d(nn.Module): - def __init__(self,in_channel,norm_layer_2d,norm_layer_3d,use_bias): - super(encoder_3d, self).__init__() - self.inconv = conv_3d(1, 64, 7, 2, 3,norm_layer_3d,use_bias) - self.down1 = conv_3d(64, 128, 3, 2, 1,norm_layer_3d,use_bias) - self.down2 = conv_3d(128, 256, 3, 2, 1,norm_layer_3d,use_bias) - self.down3 = conv_3d(256, 512, 3, 2, 1,norm_layer_3d,use_bias) - self.down4 = conv_3d(512, 1024, 3, 1, 1,norm_layer_3d,use_bias) - self.pool = nn.AvgPool3d((5,1,1)) - # self.conver2d = nn.Sequential( - # nn.Conv2d(256*int(in_channel/4), 256, kernel_size=3, stride=1, padding=1, bias=use_bias), - # norm_layer_2d(256), - # nn.ReLU(inplace=True), - # ) - - - def forward(self, x): - - x = x.view(x.size(0),1,x.size(1),x.size(2),x.size(3)) - x = self.inconv(x) - x = self.down1(x) - x = self.down2(x) - x = self.down3(x) - x = self.down4(x) - #print(x.size()) - x = self.pool(x) - #print(x.size()) - # torch.Size([1, 1024, 16, 16]) - # torch.Size([1, 512, 5, 16, 16]) - - - x = x.view(x.size(0),x.size(1),x.size(3),x.size(4)) - - # x = self.conver2d(x) - - return x - - # def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, - # padding_type='reflect') - -class ALL(nn.Module): - def __init__(self, in_channel, out_channel,norm_layer_2d,norm_layer_3d,use_bias): - super(ALL, self).__init__() - - self.encoder_2d = encoder_2d(4,3,64,4,norm_layer=norm_layer_2d,padding_type='reflect') - self.encoder_3d = encoder_3d(in_channel,norm_layer_2d,norm_layer_3d,use_bias) - self.decoder_2d = decoder_2d(4,3,64,4,norm_layer=norm_layer_2d,padding_type='reflect') - # self.shortcut_cov = conv_2d(3,64,7,1,3,norm_layer_2d,use_bias) - self.merge1 = conv_2d(2048,1024,3,1,1,norm_layer_2d,use_bias) - # self.merge2 = nn.Sequential( - # conv_2d(128,64,3,1,1,norm_layer_2d,use_bias), - # nn.ReflectionPad2d(3), - # nn.Conv2d(64, out_channel, kernel_size=7, padding=0), - # nn.Tanh() - # ) - - def forward(self, x): - - N = int((x.size()[1])/3) - x_2d = torch.cat((x[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:], x[:,N-1:N,:,:]), 1) - #shortcut_2d = x[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:] - - x_2d = self.encoder_2d(x_2d) - x_3d = self.encoder_3d(x) - #x = x_2d + x_3d - x = torch.cat((x_2d,x_3d),1) - x = self.merge1(x) - - x = self.decoder_2d(x) - #shortcut_2d = self.shortcut_cov(shortcut_2d) - #x = torch.cat((x,shortcut_2d),1) - #x = self.merge2(x) - - return x - -def MosaicNet(in_channel, out_channel, norm='batch'): - - if norm == 'batch': - # norm_layer_2d = nn.BatchNorm2d - # norm_layer_3d = nn.BatchNorm3d - norm_layer_2d = functools.partial(nn.BatchNorm2d, affine=True, track_running_stats=True) - norm_layer_3d = functools.partial(nn.BatchNorm3d, affine=True, track_running_stats=True) - use_bias = False - elif norm == 'instance': - norm_layer_2d = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False) - norm_layer_3d = functools.partial(nn.InstanceNorm3d, affine=False, track_running_stats=False) - use_bias = True - - return ALL(in_channel, out_channel, norm_layer_2d, norm_layer_3d, use_bias) diff --git a/models/video_model.py b/models/video_model.py deleted file mode 100644 index 4a095c6a577176ac10a1318adf8a71278c234c89..0000000000000000000000000000000000000000 --- a/models/video_model.py +++ /dev/null @@ -1,216 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from .pix2pix_model import * - - -class encoder_2d(nn.Module): - """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations. - - We adapt Torch code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style) - """ - - def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, padding_type='reflect'): - """Construct a Resnet-based generator - - Parameters: - input_nc (int) -- the number of channels in input images - output_nc (int) -- the number of channels in output images - ngf (int) -- the number of filters in the last conv layer - norm_layer -- normalization layer - use_dropout (bool) -- if use dropout layers - n_blocks (int) -- the number of ResNet blocks - padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero - """ - assert(n_blocks >= 0) - super(encoder_2d, self).__init__() - if type(norm_layer) == functools.partial: - use_bias = norm_layer.func == nn.InstanceNorm2d - else: - use_bias = norm_layer == nn.InstanceNorm2d - - model = [nn.ReflectionPad2d(3), - nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias), - norm_layer(ngf), - nn.ReLU(True)] - - n_downsampling = 2 - for i in range(n_downsampling): # add downsampling layers - mult = 2 ** i - model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias), - norm_layer(ngf * mult * 2), - nn.ReLU(True)] - #torch.Size([1, 256, 32, 32]) - - self.model = nn.Sequential(*model) - - def forward(self, input): - """Standard forward""" - return self.model(input) - - -class decoder_2d(nn.Module): - """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations. - - We adapt Torch code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style) - """ - - def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, padding_type='reflect'): - """Construct a Resnet-based generator - - Parameters: - input_nc (int) -- the number of channels in input images - output_nc (int) -- the number of channels in output images - ngf (int) -- the number of filters in the last conv layer - norm_layer -- normalization layer - use_dropout (bool) -- if use dropout layers - n_blocks (int) -- the number of ResNet blocks - padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero - """ - super(decoder_2d, self).__init__() - if type(norm_layer) == functools.partial: - use_bias = norm_layer.func == nn.InstanceNorm2d - else: - use_bias = norm_layer == nn.InstanceNorm2d - - model = [] - - n_downsampling = 2 - mult = 2 ** n_downsampling - for i in range(n_blocks): # add ResNet blocks - model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] - #torch.Size([1, 256, 32, 32]) - - for i in range(n_downsampling): # add upsampling layers - mult = 2 ** (n_downsampling - i) - # model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), - # kernel_size=3, stride=2, - # padding=1, output_padding=1, - # bias=use_bias), - # norm_layer(int(ngf * mult / 2)), - # nn.ReLU(True)] - #https://distill.pub/2016/deconv-checkerboard/ - #https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/190 - - model += [ nn.Upsample(scale_factor = 2, mode='nearest'), - nn.ReflectionPad2d(1), - nn.Conv2d(ngf * mult, int(ngf * mult / 2),kernel_size=3, stride=1, padding=0), - norm_layer(int(ngf * mult / 2)), - nn.ReLU(True)] - # model += [nn.ReflectionPad2d(3)] - # model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] - # model += [nn.Tanh()] - # model += [nn.Sigmoid()] - - self.model = nn.Sequential(*model) - - def forward(self, input): - """Standard forward""" - return self.model(input) - - - -class conv_3d(nn.Module): - def __init__(self,inchannel,outchannel,kernel_size=3,stride=2,padding=1,norm_layer_3d=nn.BatchNorm3d,use_bias=True): - super(conv_3d, self).__init__() - self.conv = nn.Sequential( - nn.Conv3d(inchannel, outchannel, kernel_size=kernel_size, stride=stride, padding=padding, bias=use_bias), - norm_layer_3d(outchannel), - nn.ReLU(inplace=True), - ) - - def forward(self, x): - x = self.conv(x) - return x - -class conv_2d(nn.Module): - def __init__(self,inchannel,outchannel,kernel_size=3,stride=1,padding=1,norm_layer_2d=nn.BatchNorm2d,use_bias=True): - super(conv_2d, self).__init__() - self.conv = nn.Sequential( - nn.ReflectionPad2d(padding), - nn.Conv2d(inchannel, outchannel, kernel_size=kernel_size, stride=stride, padding=0, bias=use_bias), - norm_layer_2d(outchannel), - nn.ReLU(inplace=True), - ) - - def forward(self, x): - x = self.conv(x) - return x - - -class encoder_3d(nn.Module): - def __init__(self,in_channel,norm_layer_2d,norm_layer_3d,use_bias): - super(encoder_3d, self).__init__() - self.down1 = conv_3d(1, 64, 3, 2, 1,norm_layer_3d,use_bias) - self.down2 = conv_3d(64, 128, 3, 2, 1,norm_layer_3d,use_bias) - self.down3 = conv_3d(128, 256, 3, 1, 1,norm_layer_3d,use_bias) - self.conver2d = nn.Sequential( - nn.Conv2d(256*int(in_channel/4), 256, kernel_size=3, stride=1, padding=1, bias=use_bias), - norm_layer_2d(256), - nn.ReLU(inplace=True), - ) - - - def forward(self, x): - - x = x.view(x.size(0),1,x.size(1),x.size(2),x.size(3)) - x = self.down1(x) - x = self.down2(x) - x = self.down3(x) - - x = x.view(x.size(0),x.size(1)*x.size(2),x.size(3),x.size(4)) - - x = self.conver2d(x) - - return x - - - -class ALL(nn.Module): - def __init__(self, in_channel, out_channel,norm_layer_2d,norm_layer_3d,use_bias): - super(ALL, self).__init__() - - self.encoder_2d = encoder_2d(4,-1,64,norm_layer=norm_layer_2d,n_blocks=9) - self.encoder_3d = encoder_3d(in_channel,norm_layer_2d,norm_layer_3d,use_bias) - self.decoder_2d = decoder_2d(4,3,64,norm_layer=norm_layer_2d,n_blocks=9) - self.shortcut_cov = conv_2d(3,64,7,1,3,norm_layer_2d,use_bias) - self.merge1 = conv_2d(512,256,3,1,1,norm_layer_2d,use_bias) - self.merge2 = nn.Sequential( - conv_2d(128,64,3,1,1,norm_layer_2d,use_bias), - nn.ReflectionPad2d(3), - nn.Conv2d(64, out_channel, kernel_size=7, padding=0), - nn.Tanh() - ) - - def forward(self, x): - - N = int((x.size()[1])/3) - x_2d = torch.cat((x[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:], x[:,N-1:N,:,:]), 1) - shortcut_2d = x[:,int((N-1)/2)*3:(int((N-1)/2)+1)*3,:,:] - - x_2d = self.encoder_2d(x_2d) - - x_3d = self.encoder_3d(x) - x = torch.cat((x_2d,x_3d),1) - x = self.merge1(x) - x = self.decoder_2d(x) - shortcut_2d = self.shortcut_cov(shortcut_2d) - x = torch.cat((x,shortcut_2d),1) - x = self.merge2(x) - - return x - -def MosaicNet(in_channel, out_channel, norm='batch'): - - if norm == 'batch': - # norm_layer_2d = nn.BatchNorm2d - # norm_layer_3d = nn.BatchNorm3d - norm_layer_2d = functools.partial(nn.BatchNorm2d, affine=True, track_running_stats=True) - norm_layer_3d = functools.partial(nn.BatchNorm3d, affine=True, track_running_stats=True) - use_bias = False - elif norm == 'instance': - norm_layer_2d = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False) - norm_layer_3d = functools.partial(nn.InstanceNorm3d, affine=False, track_running_stats=False) - use_bias = True - - return ALL(in_channel, out_channel, norm_layer_2d, norm_layer_3d, use_bias) diff --git a/train/clean/train.py b/train/clean/train.py index fbcd2b37b9f29a57c7b2d1905c1c01bf931d7d08..cf8371c1b478b701f3f0fc05846b3efac8019f73 100644 --- a/train/clean/train.py +++ b/train/clean/train.py @@ -32,6 +32,7 @@ opt.parser.add_argument('--finesize',type=int,default=256, help='') opt.parser.add_argument('--loadsize',type=int,default=286, help='') opt.parser.add_argument('--batchsize',type=int,default=1, help='') opt.parser.add_argument('--no_gan', action='store_true', help='if specified, do not use gan') +opt.parser.add_argument('--n_blocks',type=int,default=4, help='') opt.parser.add_argument('--n_layers_D',type=int,default=1, help='') opt.parser.add_argument('--num_D',type=int,default=3, help='') opt.parser.add_argument('--lambda_L2',type=float,default=100, help='') @@ -89,7 +90,7 @@ TBGlobalWriter = SummaryWriter(tensorboard_savedir) if opt.gpu_id != '-1' and len(opt.gpu_id) == 1: torch.backends.cudnn.benchmark = True -netG = BVDNet.define_G(opt.N,gpu_id=opt.gpu_id) +netG = BVDNet.define_G(opt.N,opt.n_blocks,gpu_id=opt.gpu_id) optimizer_G = torch.optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, opt.beta2)) lossfun_L2 = nn.MSELoss() lossfun_VGG = model_util.VGGLoss(opt.gpu_id) diff --git a/util/data.py b/util/data.py index 7d472a3286a7b728434d6d1ee81a8bfd75c3e15c..60628a57093a98cfc6cacc5db19b7648a348e17d 100755 --- a/util/data.py +++ b/util/data.py @@ -13,6 +13,15 @@ def to_tensor(data,gpu_id): data = data.cuda() return data +def normalize(data): + ''' + normalize to -1 ~ 1 + ''' + return (data.astype(np.float32)/255.0-0.5)/0.5 + +def anti_normalize(data): + return np.clip((data*0.5+0.5)*255,0,255).astype(np.uint8) + def tensor2im(image_tensor, gray=False, rgb2bgr = True ,is0_1 = False, batch_index=0): image_tensor =image_tensor.data image_numpy = image_tensor[batch_index].cpu().float().numpy() diff --git a/util/image_processing.py b/util/image_processing.py index c8324757c6c322e5b811c5baef13cf17d265e5d2..6f1dd91308b2e5274806cd894a064f550c8d5748 100755 --- a/util/image_processing.py +++ b/util/image_processing.py @@ -104,6 +104,12 @@ def color_adjust(img,alpha=0,beta=0,b=0,g=0,r=0,ran = False): return (np.clip(img,0,255)).astype('uint8') +def CAdaIN(src,dst): + ''' + make src has dst's style + ''' + return np.std(dst)*((src-np.mean(src))/np.std(src))+np.mean(dst) + def makedataset(target_image,orgin_image): target_image = resize(target_image,256) orgin_image = resize(orgin_image,256) @@ -177,35 +183,31 @@ def mask_area(mask): except: area = 0 return area - +import time def replace_mosaic(img_origin,img_fake,mask,x,y,size,no_feather): - img_fake = cv2.resize(img_fake,(size*2,size*2),interpolation=cv2.INTER_LANCZOS4) + img_fake = cv2.resize(img_fake,(size*2,size*2),interpolation=cv2.INTER_CUBIC) if no_feather: img_origin[y-size:y+size,x-size:x+size]=img_fake - img_result = img_origin + return img_origin else: - #color correction - RGB_origin = img_origin[y-size:y+size,x-size:x+size].mean(0).mean(0) - RGB_fake = img_fake.mean(0).mean(0) - for i in range(3):img_fake[:,:,i] = np.clip(img_fake[:,:,i]+RGB_origin[i]-RGB_fake[i],0,255) + # #color correction + # RGB_origin = img_origin[y-size:y+size,x-size:x+size].mean(0).mean(0) + # RGB_fake = img_fake.mean(0).mean(0) + # for i in range(3):img_fake[:,:,i] = np.clip(img_fake[:,:,i]+RGB_origin[i]-RGB_fake[i],0,255) #eclosion - eclosion_num = int(size/5) - entad = int(eclosion_num/2+2) + eclosion_num = int(size/10)+2 - mask = cv2.resize(mask,(img_origin.shape[1],img_origin.shape[0])) - mask = ch_one2three(mask) - - mask = (cv2.blur(mask, (eclosion_num, eclosion_num))) - mask_tmp = np.zeros_like(mask) - mask_tmp[y-size:y+size,x-size:x+size] = mask[y-size:y+size,x-size:x+size]# Fix edge overflow - mask = mask_tmp/255.0 + mask_crop = cv2.resize(mask,(img_origin.shape[1],img_origin.shape[0]))[y-size:y+size,x-size:x+size] + mask_crop = ch_one2three(mask_crop) + + mask_crop = (cv2.blur(mask_crop, (eclosion_num, eclosion_num))) + mask_crop = mask_crop/255.0 - img_tmp = np.zeros(img_origin.shape) - img_tmp[y-size:y+size,x-size:x+size]=img_fake - img_result = img_origin.copy() - img_result = (img_origin*(1-mask)+img_tmp*mask).astype('uint8') + img_crop = img_origin[y-size:y+size,x-size:x+size] + img_origin[y-size:y+size,x-size:x+size] = np.clip((img_crop*(1-mask_crop)+img_fake*mask_crop),0,255).astype('uint8') + + return img_origin - return img_result def Q_lapulase(resImg): '''