From ef4309ab00505b30dec156ec1c8ec5cc4d928840 Mon Sep 17 00:00:00 2001 From: lzzyzlbb <287246233@qq.com> Date: Fri, 17 Sep 2021 15:33:12 +0800 Subject: [PATCH] 1.fix wav2lip md, 2.fix fom mobile bug (#425) * add fom lite training * add fom lite training * modify according to review * modify according to review * fix error of fom trian error * 1.fix wav2lip md, 2.fix fom mobile bug --- docs/zh_CN/tutorials/wav2lip.md | 6 +- ppgan/models/firstorder_model.py | 12 ++-- ppgan/models/generators/occlusion_aware.py | 64 +++++++++++++--------- 3 files changed, 45 insertions(+), 37 deletions(-) diff --git a/docs/zh_CN/tutorials/wav2lip.md b/docs/zh_CN/tutorials/wav2lip.md index 900a12b..6217c4d 100644 --- a/docs/zh_CN/tutorials/wav2lip.md +++ b/docs/zh_CN/tutorials/wav2lip.md @@ -13,17 +13,17 @@ Wav2Lip实现的是视频人物根据输入音频生成与语音同步的人物 ``` cd applications -python tools/wav2lip.py \ +python tools/wav2lip.py \ --face ../docs/imgs/mona7s.mp4 \ --audio ../docs/imgs/guangquan.m4a \ - --outfile pp_guangquan_mona7s.mp4 + --outfile pp_guangquan_mona7s.mp4 \ --face_enhancement ``` **参数说明:** - face: 视频或图片,视频或图片中的人物唇形将根据音频进行唇形合成,以和音频同步 - audio: 驱动唇形合成的音频,视频中的人物将根据此音频进行唇形合成 - outfile: 合成的视频 -- face_enhancement: 添加人脸增强,默认为false +- face_enhancement: 添加人脸增强,不添加参数默认为不使用增强功能 ### 2.2 训练 1. 我们的模型是基于LRS2数据集训练的。可以参考[这里](https://github.com/Rudrabha/Wav2Lip#training-on-datasets-other-than-lrs2)获得在其它训练集上进行训练的一些建议。 diff --git a/ppgan/models/firstorder_model.py b/ppgan/models/firstorder_model.py index baa9d6a..a8a59f6 100755 --- a/ppgan/models/firstorder_model.py +++ b/ppgan/models/firstorder_model.py @@ -154,7 +154,6 @@ class FirstOrderModel(BaseModel): self.optimizers['optimizer_KP'].clear_grad() self.optimizers['optimizer_Gen'].clear_grad() self.backward_G() - outs = {} self.optimizers['optimizer_KP'].step() self.optimizers['optimizer_Gen'].step() @@ -314,11 +313,6 @@ class FirstOrderModelMobile(FirstOrderModel): self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector']) for param in self.nets['kp_detector'].parameters(): param.stop_gradient = True - elif self.mode == "both": - checkpoint = paddle.load(self.kp_weight_path) - self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector']) - checkpoint = paddle.load(self.gen_weight_path) - self.nets['generator'].set_state_dict(checkpoint['generator']) self.kp_detector_ori.set_state_dict(checkpoint['kp_detector']) for param in self.kp_detector_ori.parameters(): @@ -348,6 +342,11 @@ class FirstOrderModelMobile(FirstOrderModel): elif self.mode == "both": super(FirstOrderModelMobile, self).setup_optimizers(lr_cfg, optimizer) + checkpoint = paddle.load(self.kp_weight_path) + self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector']) + checkpoint = paddle.load(self.gen_weight_path) + self.nets['generator'].set_state_dict(checkpoint['generator']) + # define loss functions self.losses = {} @@ -375,7 +374,6 @@ class FirstOrderModelMobile(FirstOrderModel): self.optimizers['optimizer_Gen'].clear_grad() self.backward_G() self.optimizers['optimizer_Gen'].step() - outs = {} # update D if self.train_params['loss_weights']['generator_gan'] != 0: diff --git a/ppgan/models/generators/occlusion_aware.py b/ppgan/models/generators/occlusion_aware.py index abf5195..a19c115 100644 --- a/ppgan/models/generators/occlusion_aware.py +++ b/ppgan/models/generators/occlusion_aware.py @@ -48,7 +48,8 @@ class OcclusionAwareGenerator(nn.Layer): num_kp=num_kp, num_channels=num_channels, estimate_occlusion_map=estimate_occlusion_map, - **dense_motion_params, mobile_net=mobile_net) + **dense_motion_params, + mobile_net=mobile_net) else: self.dense_motion_network = None @@ -64,10 +65,10 @@ class OcclusionAwareGenerator(nn.Layer): in_features = min(max_features, block_expansion * (2**i)) out_features = min(max_features, block_expansion * (2**(i + 1))) down_blocks.append( - MobileDownBlock2d(in_features, - out_features, - kernel_size=(3, 3), - padding=(1, 1))) + MobileDownBlock2d(in_features, + out_features, + kernel_size=(3, 3), + padding=(1, 1))) else: for i in range(num_down_blocks): in_features = min(max_features, block_expansion * (2**i)) @@ -83,20 +84,22 @@ class OcclusionAwareGenerator(nn.Layer): if mobile_net: for i in range(num_down_blocks): in_features = min(max_features, - block_expansion * (2**(num_down_blocks - i))) - out_features = min(max_features, - block_expansion * (2**(num_down_blocks - i - 1))) + block_expansion * (2**(num_down_blocks - i))) + out_features = min( + max_features, + block_expansion * (2**(num_down_blocks - i - 1))) up_blocks.append( MobileUpBlock2d(in_features, - out_features, - kernel_size=(3, 3), - padding=(1, 1))) + out_features, + kernel_size=(3, 3), + padding=(1, 1))) else: for i in range(num_down_blocks): in_features = min(max_features, block_expansion * (2**(num_down_blocks - i))) - out_features = min(max_features, - block_expansion * (2**(num_down_blocks - i - 1))) + out_features = min( + max_features, + block_expansion * (2**(num_down_blocks - i - 1))) up_blocks.append( UpBlock2d(in_features, out_features, @@ -107,10 +110,12 @@ class OcclusionAwareGenerator(nn.Layer): self.bottleneck = paddle.nn.Sequential() in_features = min(max_features, block_expansion * (2**num_down_blocks)) if mobile_net: - for i in range(num_bottleneck_blocks): + for i in range(num_bottleneck_blocks): self.bottleneck.add_sublayer( 'r' + str(i), - MobileResBlock2d(in_features, kernel_size=(3, 3), padding=(1, 1))) + MobileResBlock2d(in_features, + kernel_size=(3, 3), + padding=(1, 1))) else: for i in range(num_bottleneck_blocks): self.bottleneck.add_sublayer( @@ -125,6 +130,7 @@ class OcclusionAwareGenerator(nn.Layer): self.num_channels = num_channels self.inference = inference self.pad = 5 + self.mobile_net = mobile_net def deform_input(self, inp, deformation): _, h_old, w_old, _ = deformation.shape @@ -137,14 +143,18 @@ class OcclusionAwareGenerator(nn.Layer): align_corners=False) deformation = deformation.transpose([0, 2, 3, 1]) if self.inference: - identity_grid = make_coordinate_grid((h, w), - type=inp.dtype) + identity_grid = make_coordinate_grid((h, w), type=inp.dtype) identity_grid = identity_grid.reshape([1, h, w, 2]) - visualization_matrix = np.zeros((h,w)).astype("float32") - visualization_matrix[self.pad:h-self.pad, self.pad:w-self.pad] = 1.0 - gauss_kernel = paddle.to_tensor(cv2.GaussianBlur(visualization_matrix , (9, 9), 0.0, borderType=cv2.BORDER_ISOLATED)) + visualization_matrix = np.zeros((h, w)).astype("float32") + visualization_matrix[self.pad:h - self.pad, + self.pad:w - self.pad] = 1.0 + gauss_kernel = paddle.to_tensor( + cv2.GaussianBlur(visualization_matrix, (9, 9), + 0.0, + borderType=cv2.BORDER_ISOLATED)) gauss_kernel = gauss_kernel.unsqueeze(0).unsqueeze(-1) - deformation = gauss_kernel * deformation + (1-gauss_kernel) * identity_grid + deformation = gauss_kernel * deformation + ( + 1 - gauss_kernel) * identity_grid return F.grid_sample(inp, deformation, @@ -182,12 +192,12 @@ class OcclusionAwareGenerator(nn.Layer): size=out.shape[2:], mode='bilinear', align_corners=False) - if self.inference: - h,w = occlusion_map.shape[2:] - occlusion_map[:,:,0:self.pad,:] = 1.0 - occlusion_map[:,:,:,0:self.pad] = 1.0 - occlusion_map[:,:,h-self.pad:h,:] = 1.0 - occlusion_map[:,:,:,w-self.pad:w] = 1.0 + if self.inference and not self.mobile_net: + h, w = occlusion_map.shape[2:] + occlusion_map[:, :, 0:self.pad, :] = 1.0 + occlusion_map[:, :, :, 0:self.pad] = 1.0 + occlusion_map[:, :, h - self.pad:h, :] = 1.0 + occlusion_map[:, :, :, w - self.pad:w] = 1.0 out = out * occlusion_map output_dict["deformed"] = self.deform_input(source_image, -- GitLab