未验证 提交 ef4309ab 编写于 作者: L lzzyzlbb 提交者: GitHub

1.fix wav2lip md, 2.fix fom mobile bug (#425)

* add fom lite training

* add fom lite training

* modify according to review

* modify according to review

* fix error of fom trian error

* 1.fix wav2lip md, 2.fix fom mobile bug
上级 25ba89f1
......@@ -13,17 +13,17 @@ Wav2Lip实现的是视频人物根据输入音频生成与语音同步的人物
```
cd applications
python tools/wav2lip.py \
python tools/wav2lip.py \
--face ../docs/imgs/mona7s.mp4 \
--audio ../docs/imgs/guangquan.m4a \
--outfile pp_guangquan_mona7s.mp4
--outfile pp_guangquan_mona7s.mp4 \
--face_enhancement
```
**参数说明:**
- face: 视频或图片,视频或图片中的人物唇形将根据音频进行唇形合成,以和音频同步
- audio: 驱动唇形合成的音频,视频中的人物将根据此音频进行唇形合成
- outfile: 合成的视频
- face_enhancement: 添加人脸增强,默认为false
- face_enhancement: 添加人脸增强,不添加参数默认为不使用增强功能
### 2.2 训练
1. 我们的模型是基于LRS2数据集训练的。可以参考[这里](https://github.com/Rudrabha/Wav2Lip#training-on-datasets-other-than-lrs2)获得在其它训练集上进行训练的一些建议。
......
......@@ -154,7 +154,6 @@ class FirstOrderModel(BaseModel):
self.optimizers['optimizer_KP'].clear_grad()
self.optimizers['optimizer_Gen'].clear_grad()
self.backward_G()
outs = {}
self.optimizers['optimizer_KP'].step()
self.optimizers['optimizer_Gen'].step()
......@@ -314,11 +313,6 @@ class FirstOrderModelMobile(FirstOrderModel):
self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector'])
for param in self.nets['kp_detector'].parameters():
param.stop_gradient = True
elif self.mode == "both":
checkpoint = paddle.load(self.kp_weight_path)
self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector'])
checkpoint = paddle.load(self.gen_weight_path)
self.nets['generator'].set_state_dict(checkpoint['generator'])
self.kp_detector_ori.set_state_dict(checkpoint['kp_detector'])
for param in self.kp_detector_ori.parameters():
......@@ -348,6 +342,11 @@ class FirstOrderModelMobile(FirstOrderModel):
elif self.mode == "both":
super(FirstOrderModelMobile,
self).setup_optimizers(lr_cfg, optimizer)
checkpoint = paddle.load(self.kp_weight_path)
self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector'])
checkpoint = paddle.load(self.gen_weight_path)
self.nets['generator'].set_state_dict(checkpoint['generator'])
# define loss functions
self.losses = {}
......@@ -375,7 +374,6 @@ class FirstOrderModelMobile(FirstOrderModel):
self.optimizers['optimizer_Gen'].clear_grad()
self.backward_G()
self.optimizers['optimizer_Gen'].step()
outs = {}
# update D
if self.train_params['loss_weights']['generator_gan'] != 0:
......
......@@ -48,7 +48,8 @@ class OcclusionAwareGenerator(nn.Layer):
num_kp=num_kp,
num_channels=num_channels,
estimate_occlusion_map=estimate_occlusion_map,
**dense_motion_params, mobile_net=mobile_net)
**dense_motion_params,
mobile_net=mobile_net)
else:
self.dense_motion_network = None
......@@ -64,10 +65,10 @@ class OcclusionAwareGenerator(nn.Layer):
in_features = min(max_features, block_expansion * (2**i))
out_features = min(max_features, block_expansion * (2**(i + 1)))
down_blocks.append(
MobileDownBlock2d(in_features,
out_features,
kernel_size=(3, 3),
padding=(1, 1)))
MobileDownBlock2d(in_features,
out_features,
kernel_size=(3, 3),
padding=(1, 1)))
else:
for i in range(num_down_blocks):
in_features = min(max_features, block_expansion * (2**i))
......@@ -83,20 +84,22 @@ class OcclusionAwareGenerator(nn.Layer):
if mobile_net:
for i in range(num_down_blocks):
in_features = min(max_features,
block_expansion * (2**(num_down_blocks - i)))
out_features = min(max_features,
block_expansion * (2**(num_down_blocks - i - 1)))
block_expansion * (2**(num_down_blocks - i)))
out_features = min(
max_features,
block_expansion * (2**(num_down_blocks - i - 1)))
up_blocks.append(
MobileUpBlock2d(in_features,
out_features,
kernel_size=(3, 3),
padding=(1, 1)))
out_features,
kernel_size=(3, 3),
padding=(1, 1)))
else:
for i in range(num_down_blocks):
in_features = min(max_features,
block_expansion * (2**(num_down_blocks - i)))
out_features = min(max_features,
block_expansion * (2**(num_down_blocks - i - 1)))
out_features = min(
max_features,
block_expansion * (2**(num_down_blocks - i - 1)))
up_blocks.append(
UpBlock2d(in_features,
out_features,
......@@ -107,10 +110,12 @@ class OcclusionAwareGenerator(nn.Layer):
self.bottleneck = paddle.nn.Sequential()
in_features = min(max_features, block_expansion * (2**num_down_blocks))
if mobile_net:
for i in range(num_bottleneck_blocks):
for i in range(num_bottleneck_blocks):
self.bottleneck.add_sublayer(
'r' + str(i),
MobileResBlock2d(in_features, kernel_size=(3, 3), padding=(1, 1)))
MobileResBlock2d(in_features,
kernel_size=(3, 3),
padding=(1, 1)))
else:
for i in range(num_bottleneck_blocks):
self.bottleneck.add_sublayer(
......@@ -125,6 +130,7 @@ class OcclusionAwareGenerator(nn.Layer):
self.num_channels = num_channels
self.inference = inference
self.pad = 5
self.mobile_net = mobile_net
def deform_input(self, inp, deformation):
_, h_old, w_old, _ = deformation.shape
......@@ -137,14 +143,18 @@ class OcclusionAwareGenerator(nn.Layer):
align_corners=False)
deformation = deformation.transpose([0, 2, 3, 1])
if self.inference:
identity_grid = make_coordinate_grid((h, w),
type=inp.dtype)
identity_grid = make_coordinate_grid((h, w), type=inp.dtype)
identity_grid = identity_grid.reshape([1, h, w, 2])
visualization_matrix = np.zeros((h,w)).astype("float32")
visualization_matrix[self.pad:h-self.pad, self.pad:w-self.pad] = 1.0
gauss_kernel = paddle.to_tensor(cv2.GaussianBlur(visualization_matrix , (9, 9), 0.0, borderType=cv2.BORDER_ISOLATED))
visualization_matrix = np.zeros((h, w)).astype("float32")
visualization_matrix[self.pad:h - self.pad,
self.pad:w - self.pad] = 1.0
gauss_kernel = paddle.to_tensor(
cv2.GaussianBlur(visualization_matrix, (9, 9),
0.0,
borderType=cv2.BORDER_ISOLATED))
gauss_kernel = gauss_kernel.unsqueeze(0).unsqueeze(-1)
deformation = gauss_kernel * deformation + (1-gauss_kernel) * identity_grid
deformation = gauss_kernel * deformation + (
1 - gauss_kernel) * identity_grid
return F.grid_sample(inp,
deformation,
......@@ -182,12 +192,12 @@ class OcclusionAwareGenerator(nn.Layer):
size=out.shape[2:],
mode='bilinear',
align_corners=False)
if self.inference:
h,w = occlusion_map.shape[2:]
occlusion_map[:,:,0:self.pad,:] = 1.0
occlusion_map[:,:,:,0:self.pad] = 1.0
occlusion_map[:,:,h-self.pad:h,:] = 1.0
occlusion_map[:,:,:,w-self.pad:w] = 1.0
if self.inference and not self.mobile_net:
h, w = occlusion_map.shape[2:]
occlusion_map[:, :, 0:self.pad, :] = 1.0
occlusion_map[:, :, :, 0:self.pad] = 1.0
occlusion_map[:, :, h - self.pad:h, :] = 1.0
occlusion_map[:, :, :, w - self.pad:w] = 1.0
out = out * occlusion_map
output_dict["deformed"] = self.deform_input(source_image,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册