未验证 提交 ef4309ab 编写于 作者: L lzzyzlbb 提交者: GitHub

1.fix wav2lip md, 2.fix fom mobile bug (#425)

* add fom lite training

* add fom lite training

* modify according to review

* modify according to review

* fix error of fom trian error

* 1.fix wav2lip md, 2.fix fom mobile bug
上级 25ba89f1
...@@ -13,17 +13,17 @@ Wav2Lip实现的是视频人物根据输入音频生成与语音同步的人物 ...@@ -13,17 +13,17 @@ Wav2Lip实现的是视频人物根据输入音频生成与语音同步的人物
``` ```
cd applications cd applications
python tools/wav2lip.py \ python tools/wav2lip.py \
--face ../docs/imgs/mona7s.mp4 \ --face ../docs/imgs/mona7s.mp4 \
--audio ../docs/imgs/guangquan.m4a \ --audio ../docs/imgs/guangquan.m4a \
--outfile pp_guangquan_mona7s.mp4 --outfile pp_guangquan_mona7s.mp4 \
--face_enhancement --face_enhancement
``` ```
**参数说明:** **参数说明:**
- face: 视频或图片,视频或图片中的人物唇形将根据音频进行唇形合成,以和音频同步 - face: 视频或图片,视频或图片中的人物唇形将根据音频进行唇形合成,以和音频同步
- audio: 驱动唇形合成的音频,视频中的人物将根据此音频进行唇形合成 - audio: 驱动唇形合成的音频,视频中的人物将根据此音频进行唇形合成
- outfile: 合成的视频 - outfile: 合成的视频
- face_enhancement: 添加人脸增强,默认为false - face_enhancement: 添加人脸增强,不添加参数默认为不使用增强功能
### 2.2 训练 ### 2.2 训练
1. 我们的模型是基于LRS2数据集训练的。可以参考[这里](https://github.com/Rudrabha/Wav2Lip#training-on-datasets-other-than-lrs2)获得在其它训练集上进行训练的一些建议。 1. 我们的模型是基于LRS2数据集训练的。可以参考[这里](https://github.com/Rudrabha/Wav2Lip#training-on-datasets-other-than-lrs2)获得在其它训练集上进行训练的一些建议。
......
...@@ -154,7 +154,6 @@ class FirstOrderModel(BaseModel): ...@@ -154,7 +154,6 @@ class FirstOrderModel(BaseModel):
self.optimizers['optimizer_KP'].clear_grad() self.optimizers['optimizer_KP'].clear_grad()
self.optimizers['optimizer_Gen'].clear_grad() self.optimizers['optimizer_Gen'].clear_grad()
self.backward_G() self.backward_G()
outs = {}
self.optimizers['optimizer_KP'].step() self.optimizers['optimizer_KP'].step()
self.optimizers['optimizer_Gen'].step() self.optimizers['optimizer_Gen'].step()
...@@ -314,11 +313,6 @@ class FirstOrderModelMobile(FirstOrderModel): ...@@ -314,11 +313,6 @@ class FirstOrderModelMobile(FirstOrderModel):
self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector']) self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector'])
for param in self.nets['kp_detector'].parameters(): for param in self.nets['kp_detector'].parameters():
param.stop_gradient = True param.stop_gradient = True
elif self.mode == "both":
checkpoint = paddle.load(self.kp_weight_path)
self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector'])
checkpoint = paddle.load(self.gen_weight_path)
self.nets['generator'].set_state_dict(checkpoint['generator'])
self.kp_detector_ori.set_state_dict(checkpoint['kp_detector']) self.kp_detector_ori.set_state_dict(checkpoint['kp_detector'])
for param in self.kp_detector_ori.parameters(): for param in self.kp_detector_ori.parameters():
...@@ -348,6 +342,11 @@ class FirstOrderModelMobile(FirstOrderModel): ...@@ -348,6 +342,11 @@ class FirstOrderModelMobile(FirstOrderModel):
elif self.mode == "both": elif self.mode == "both":
super(FirstOrderModelMobile, super(FirstOrderModelMobile,
self).setup_optimizers(lr_cfg, optimizer) self).setup_optimizers(lr_cfg, optimizer)
checkpoint = paddle.load(self.kp_weight_path)
self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector'])
checkpoint = paddle.load(self.gen_weight_path)
self.nets['generator'].set_state_dict(checkpoint['generator'])
# define loss functions # define loss functions
self.losses = {} self.losses = {}
...@@ -375,7 +374,6 @@ class FirstOrderModelMobile(FirstOrderModel): ...@@ -375,7 +374,6 @@ class FirstOrderModelMobile(FirstOrderModel):
self.optimizers['optimizer_Gen'].clear_grad() self.optimizers['optimizer_Gen'].clear_grad()
self.backward_G() self.backward_G()
self.optimizers['optimizer_Gen'].step() self.optimizers['optimizer_Gen'].step()
outs = {}
# update D # update D
if self.train_params['loss_weights']['generator_gan'] != 0: if self.train_params['loss_weights']['generator_gan'] != 0:
......
...@@ -48,7 +48,8 @@ class OcclusionAwareGenerator(nn.Layer): ...@@ -48,7 +48,8 @@ class OcclusionAwareGenerator(nn.Layer):
num_kp=num_kp, num_kp=num_kp,
num_channels=num_channels, num_channels=num_channels,
estimate_occlusion_map=estimate_occlusion_map, estimate_occlusion_map=estimate_occlusion_map,
**dense_motion_params, mobile_net=mobile_net) **dense_motion_params,
mobile_net=mobile_net)
else: else:
self.dense_motion_network = None self.dense_motion_network = None
...@@ -64,10 +65,10 @@ class OcclusionAwareGenerator(nn.Layer): ...@@ -64,10 +65,10 @@ class OcclusionAwareGenerator(nn.Layer):
in_features = min(max_features, block_expansion * (2**i)) in_features = min(max_features, block_expansion * (2**i))
out_features = min(max_features, block_expansion * (2**(i + 1))) out_features = min(max_features, block_expansion * (2**(i + 1)))
down_blocks.append( down_blocks.append(
MobileDownBlock2d(in_features, MobileDownBlock2d(in_features,
out_features, out_features,
kernel_size=(3, 3), kernel_size=(3, 3),
padding=(1, 1))) padding=(1, 1)))
else: else:
for i in range(num_down_blocks): for i in range(num_down_blocks):
in_features = min(max_features, block_expansion * (2**i)) in_features = min(max_features, block_expansion * (2**i))
...@@ -83,20 +84,22 @@ class OcclusionAwareGenerator(nn.Layer): ...@@ -83,20 +84,22 @@ class OcclusionAwareGenerator(nn.Layer):
if mobile_net: if mobile_net:
for i in range(num_down_blocks): for i in range(num_down_blocks):
in_features = min(max_features, in_features = min(max_features,
block_expansion * (2**(num_down_blocks - i))) block_expansion * (2**(num_down_blocks - i)))
out_features = min(max_features, out_features = min(
block_expansion * (2**(num_down_blocks - i - 1))) max_features,
block_expansion * (2**(num_down_blocks - i - 1)))
up_blocks.append( up_blocks.append(
MobileUpBlock2d(in_features, MobileUpBlock2d(in_features,
out_features, out_features,
kernel_size=(3, 3), kernel_size=(3, 3),
padding=(1, 1))) padding=(1, 1)))
else: else:
for i in range(num_down_blocks): for i in range(num_down_blocks):
in_features = min(max_features, in_features = min(max_features,
block_expansion * (2**(num_down_blocks - i))) block_expansion * (2**(num_down_blocks - i)))
out_features = min(max_features, out_features = min(
block_expansion * (2**(num_down_blocks - i - 1))) max_features,
block_expansion * (2**(num_down_blocks - i - 1)))
up_blocks.append( up_blocks.append(
UpBlock2d(in_features, UpBlock2d(in_features,
out_features, out_features,
...@@ -107,10 +110,12 @@ class OcclusionAwareGenerator(nn.Layer): ...@@ -107,10 +110,12 @@ class OcclusionAwareGenerator(nn.Layer):
self.bottleneck = paddle.nn.Sequential() self.bottleneck = paddle.nn.Sequential()
in_features = min(max_features, block_expansion * (2**num_down_blocks)) in_features = min(max_features, block_expansion * (2**num_down_blocks))
if mobile_net: if mobile_net:
for i in range(num_bottleneck_blocks): for i in range(num_bottleneck_blocks):
self.bottleneck.add_sublayer( self.bottleneck.add_sublayer(
'r' + str(i), 'r' + str(i),
MobileResBlock2d(in_features, kernel_size=(3, 3), padding=(1, 1))) MobileResBlock2d(in_features,
kernel_size=(3, 3),
padding=(1, 1)))
else: else:
for i in range(num_bottleneck_blocks): for i in range(num_bottleneck_blocks):
self.bottleneck.add_sublayer( self.bottleneck.add_sublayer(
...@@ -125,6 +130,7 @@ class OcclusionAwareGenerator(nn.Layer): ...@@ -125,6 +130,7 @@ class OcclusionAwareGenerator(nn.Layer):
self.num_channels = num_channels self.num_channels = num_channels
self.inference = inference self.inference = inference
self.pad = 5 self.pad = 5
self.mobile_net = mobile_net
def deform_input(self, inp, deformation): def deform_input(self, inp, deformation):
_, h_old, w_old, _ = deformation.shape _, h_old, w_old, _ = deformation.shape
...@@ -137,14 +143,18 @@ class OcclusionAwareGenerator(nn.Layer): ...@@ -137,14 +143,18 @@ class OcclusionAwareGenerator(nn.Layer):
align_corners=False) align_corners=False)
deformation = deformation.transpose([0, 2, 3, 1]) deformation = deformation.transpose([0, 2, 3, 1])
if self.inference: if self.inference:
identity_grid = make_coordinate_grid((h, w), identity_grid = make_coordinate_grid((h, w), type=inp.dtype)
type=inp.dtype)
identity_grid = identity_grid.reshape([1, h, w, 2]) identity_grid = identity_grid.reshape([1, h, w, 2])
visualization_matrix = np.zeros((h,w)).astype("float32") visualization_matrix = np.zeros((h, w)).astype("float32")
visualization_matrix[self.pad:h-self.pad, self.pad:w-self.pad] = 1.0 visualization_matrix[self.pad:h - self.pad,
gauss_kernel = paddle.to_tensor(cv2.GaussianBlur(visualization_matrix , (9, 9), 0.0, borderType=cv2.BORDER_ISOLATED)) self.pad:w - self.pad] = 1.0
gauss_kernel = paddle.to_tensor(
cv2.GaussianBlur(visualization_matrix, (9, 9),
0.0,
borderType=cv2.BORDER_ISOLATED))
gauss_kernel = gauss_kernel.unsqueeze(0).unsqueeze(-1) gauss_kernel = gauss_kernel.unsqueeze(0).unsqueeze(-1)
deformation = gauss_kernel * deformation + (1-gauss_kernel) * identity_grid deformation = gauss_kernel * deformation + (
1 - gauss_kernel) * identity_grid
return F.grid_sample(inp, return F.grid_sample(inp,
deformation, deformation,
...@@ -182,12 +192,12 @@ class OcclusionAwareGenerator(nn.Layer): ...@@ -182,12 +192,12 @@ class OcclusionAwareGenerator(nn.Layer):
size=out.shape[2:], size=out.shape[2:],
mode='bilinear', mode='bilinear',
align_corners=False) align_corners=False)
if self.inference: if self.inference and not self.mobile_net:
h,w = occlusion_map.shape[2:] h, w = occlusion_map.shape[2:]
occlusion_map[:,:,0:self.pad,:] = 1.0 occlusion_map[:, :, 0:self.pad, :] = 1.0
occlusion_map[:,:,:,0:self.pad] = 1.0 occlusion_map[:, :, :, 0:self.pad] = 1.0
occlusion_map[:,:,h-self.pad:h,:] = 1.0 occlusion_map[:, :, h - self.pad:h, :] = 1.0
occlusion_map[:,:,:,w-self.pad:w] = 1.0 occlusion_map[:, :, :, w - self.pad:w] = 1.0
out = out * occlusion_map out = out * occlusion_map
output_dict["deformed"] = self.deform_input(source_image, output_dict["deformed"] = self.deform_input(source_image,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册