未验证 提交 36027357 编写于 作者: L lzzyzlbb 提交者: GitHub

Add fom mobile model(change 7*7 into 3*3 conv) (#466)

* fix benchmark

* fix fom mobile model

* add fom mobile

* add fom mobile

* add fom mobile

* add fom mobile
上级 3dbb5ac0
......@@ -112,7 +112,7 @@ model:
lr_scheduler:
name: MultiStepDecay
epoch_milestones: [237360, 356040]
epoch_milestones: [2000000000000] #just fix learning rate, and change into [237360, 356040] for "both" mode fine tune
lr_generator: 2.0e-4
lr_discriminator: 2.0e-4
lr_kp_detector: 2.0e-4
......@@ -139,11 +139,11 @@ log_config:
visiual_interval: 10
validate:
interval: 3000
interval: 20000000000 #close validate step to speed up training, if you need to see the process, change it into 20000
save_img: true
snapshot_config:
interval: 1
interval: 5
optimizer:
name: Adam
......
......@@ -148,10 +148,12 @@ python -u tools/first-order-demo.py \
--mobile_net
```
目前压缩采用mobilenet+剪枝的方法,和之前对比:
| | 大小(M) | reconstruction loss |
| | 大小(M) | l1 loss |
| :--------------: | :--------------: | :-----------------: |
| 原始 | 229 | 0.012058867 |
| 压缩 | 6.1 | 0.015025159 |
| 原始 | 229 | 0.041781392 |
| 压缩 | 10.1 | 0.047878753 |
face keypoints 的评估指标参见(https://github.com/AliaksandrSiarohin/pose-evaluation)
**训练:**
先将configs/firstorder_vox_mobile_256.yaml 中的mode设置成kp_detector, 训练压缩版
......
......@@ -91,7 +91,7 @@ class FirstOrderPredictor(BasePredictor):
self.image_size = image_size
if weight_path is None:
if mobile_net:
vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox_mobile.pdparams'
vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-mobile.pdparams'
else:
if self.image_size == 512:
......
......@@ -73,6 +73,8 @@ class FirstOrderModel(BaseModel):
discriminator_cfg.update({'train_params': train_params})
self.nets['Dis'] = build_discriminator(discriminator_cfg)
self.visualizer = Visualizer()
self.test_loss = []
self.is_train = False
def setup_lr_schedulers(self, lr_cfg):
self.kp_lr = MultiStepDecay(learning_rate=lr_cfg['lr_kp_detector'],
......@@ -131,9 +133,7 @@ class FirstOrderModel(BaseModel):
"""Run forward pass; called by both functions <optimize_parameters> and <test>."""
self.losses_generator, self.generated = \
self.nets['Gen_Full'](self.input_data.copy(), self.nets['discriminator'])
self.visual_items['driving_source_gen'] = self.visualizer.visualize(
self.input_data['driving'].detach(),
self.input_data['source'].detach(), self.generated)
def backward_G(self):
loss_values = [val.mean() for val in self.losses_generator.values()]
......@@ -150,6 +150,7 @@ class FirstOrderModel(BaseModel):
self.losses.update(dict(zip(losses_discriminator.keys(), loss_values)))
def train_iter(self, optimizers=None):
self.train = True
self.forward()
# update G
self.set_requires_grad(self.nets['discriminator'], False)
......@@ -167,10 +168,12 @@ class FirstOrderModel(BaseModel):
self.optimizers['optimizer_Dis'].step()
def test_iter(self, metrics=None):
if not self.is_train:
self.is_train = True
self.setup_net_parallel()
self.nets['kp_detector'].eval()
self.nets['generator'].eval()
loss_list = []
with paddle.no_grad():
kp_source = self.nets['kp_detector'](self.input_data['video'][:, :,
0])
......@@ -181,10 +184,13 @@ class FirstOrderModel(BaseModel):
out = self.nets['generator'](source,
kp_source=kp_source,
kp_driving=kp_driving)
out.update({'kp_source': kp_source, 'kp_driving': kp_driving})
loss = paddle.abs(out['prediction'] -
driving).mean().cpu().numpy()
loss_list.append(loss)
print("Reconstruction loss: %s" % np.mean(loss_list))
self.test_loss.append(loss)
self.visual_items['driving_source_gen'] = self.visualizer.visualize(
driving, source, out)
print("Reconstruction loss: %s" % np.mean(self.test_loss))
self.nets['kp_detector'].train()
self.nets['generator'].train()
......@@ -289,6 +295,9 @@ class FirstOrderModelMobile(FirstOrderModel):
self.kp_weight_path = kp_weight_path
self.gen_weight_path = gen_weight_path
self.visualizer = Visualizer()
self.test_loss = []
self.is_train = False
def setup_net_parallel(self):
if isinstance(self.nets['Gen_Full'], paddle.DataParallel):
......@@ -302,24 +311,24 @@ class FirstOrderModelMobile(FirstOrderModel):
self.nets['generator'] = self.nets['Gen_Full'].generator
self.nets['discriminator'] = self.nets['Dis'].discriminator
self.kp_detector_ori = self.Gen_Full_ori.kp_extractor
if self.is_train:
return
from ppgan.utils.download import get_path_from_url
vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-cpk.pdparams'
weight_path = get_path_from_url(vox_cpk_weight_url)
checkpoint = paddle.load(weight_path)
if (self.mode == "kp_detector"):
print("load pretrained generator... ")
self.nets['generator'].set_state_dict(checkpoint['generator'])
for param in self.nets['generator'].parameters():
param.stop_gradient = True
elif self.mode == "generator":
print("load pretrained kp_detector... ")
self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector'])
for param in self.nets['kp_detector'].parameters():
param.stop_gradient = True
self.kp_detector_ori.set_state_dict(checkpoint['kp_detector'])
for param in self.kp_detector_ori.parameters():
param.stop_gradient = True
def setup_optimizers(self, lr_cfg, optimizer):
self.setup_net_parallel()
# init params
......@@ -344,6 +353,7 @@ class FirstOrderModelMobile(FirstOrderModel):
elif self.mode == "both":
super(FirstOrderModelMobile,
self).setup_optimizers(lr_cfg, optimizer)
print("load both pretrained kp_detector and generator")
checkpoint = paddle.load(self.kp_weight_path)
self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector'])
checkpoint = paddle.load(self.gen_weight_path)
......@@ -362,6 +372,7 @@ class FirstOrderModelMobile(FirstOrderModel):
self.nets['Gen_Full'](self.input_data.copy(), self.nets['discriminator'])
def train_iter(self, optimizers=None):
self.is_train = True
if (self.mode == "both"):
super(FirstOrderModelMobile, self).train_iter(optimizers=optimizers)
return
......
......@@ -41,6 +41,25 @@ class OcclusionAwareGenerator(nn.Layer):
else:
self.dense_motion_network = None
if mobile_net:
self.first = nn.Sequential(
SameBlock2d(num_channels,
num_channels,
kernel_size=3,
padding=1,
mobile_net=mobile_net),
SameBlock2d(num_channels,
num_channels,
kernel_size=3,
padding=1,
mobile_net=mobile_net),
SameBlock2d(num_channels,
block_expansion,
kernel_size=3,
padding=1,
mobile_net=mobile_net)
)
else:
self.first = SameBlock2d(num_channels,
block_expansion,
kernel_size=(7, 7),
......@@ -109,7 +128,27 @@ class OcclusionAwareGenerator(nn.Layer):
self.bottleneck.add_sublayer(
'r' + str(i),
ResBlock2d(in_features, kernel_size=(3, 3), padding=(1, 1)))
if mobile_net:
self.final = nn.Sequential(
nn.Conv2D(block_expansion,
block_expansion,
kernel_size=3,
weight_attr=nn.initializer.KaimingUniform(),
padding=1),
nn.ReLU(),
nn.Conv2D(block_expansion,
block_expansion,
kernel_size=3,
weight_attr=nn.initializer.KaimingUniform(),
padding=1),
nn.ReLU(),
nn.Conv2D(block_expansion,
num_channels,
kernel_size=3,
weight_attr=nn.initializer.KaimingUniform(),
padding=1)
)
else:
self.final = nn.Conv2D(block_expansion,
num_channels,
kernel_size=(7, 7),
......
......@@ -31,12 +31,53 @@ class DenseMotionNetwork(nn.Layer):
num_blocks=num_blocks,
mobile_net=mobile_net)
if mobile_net:
self.mask = nn.Sequential(
nn.Conv2D(self.hourglass.out_filters,
self.hourglass.out_filters,
kernel_size=3,
weight_attr=nn.initializer.KaimingUniform(),
padding=1),
nn.ReLU(),
nn.Conv2D(self.hourglass.out_filters,
self.hourglass.out_filters,
kernel_size=3,
weight_attr=nn.initializer.KaimingUniform(),
padding=1),
nn.ReLU(),
nn.Conv2D(self.hourglass.out_filters,
num_kp + 1,
kernel_size=3,
weight_attr=nn.initializer.KaimingUniform(),
padding=1))
else:
self.mask = nn.Conv2D(self.hourglass.out_filters,
num_kp + 1,
kernel_size=(7, 7),
padding=(3, 3))
if estimate_occlusion_map:
if mobile_net:
self.occlusion = nn.Sequential(
nn.Conv2D(self.hourglass.out_filters,
self.hourglass.out_filters,
kernel_size=3,
padding=1,
weight_attr=nn.initializer.KaimingUniform()),
nn.ReLU(),
nn.Conv2D(self.hourglass.out_filters,
self.hourglass.out_filters,
kernel_size=3,
weight_attr=nn.initializer.KaimingUniform(),
padding=1),
nn.ReLU(),
nn.Conv2D(self.hourglass.out_filters,
1,
kernel_size=3,
padding=1,
weight_attr=nn.initializer.KaimingUniform())
)
else:
self.occlusion = nn.Conv2D(self.hourglass.out_filters,
1,
kernel_size=(7, 7),
......@@ -50,7 +91,8 @@ class DenseMotionNetwork(nn.Layer):
if self.scale_factor != 1:
self.down = AntiAliasInterpolation2d(num_channels,
self.scale_factor)
self.scale_factor,
mobile_net=mobile_net)
def create_heatmap_representations(self, source_image, kp_driving,
kp_source):
......
......@@ -302,7 +302,8 @@ class SameBlock2d(nn.Layer):
kernel_size=kernel_size,
padding=padding,
groups=groups,
bias_attr=(mobile_net == False))
bias_attr=(mobile_net == False),
weight_attr=nn.initializer.KaimingUniform())
self.norm = SyncBatchNorm(out_features)
def forward(self, x):
......@@ -425,8 +426,12 @@ class AntiAliasInterpolation2d(nn.Layer):
"""
Band-limited downsampling, for better preservation of the input signal.
"""
def __init__(self, channels, scale):
def __init__(self, channels, scale, mobile_net=False):
super(AntiAliasInterpolation2d, self).__init__()
if mobile_net:
sigma = 0.25
kernel_size = 3
else:
sigma = (1 / scale - 1) / 2
kernel_size = 2 * round(sigma * 4) + 1
self.ka = kernel_size // 2
......
......@@ -31,7 +31,24 @@ class KPDetector(nn.Layer):
max_features=max_features,
num_blocks=num_blocks,
mobile_net=mobile_net)
if mobile_net:
self.kp = nn.Sequential(
nn.Conv2D(in_channels=self.predictor.out_filters,
out_channels=self.predictor.out_filters,
kernel_size=3,
weight_attr=nn.initializer.KaimingUniform(),
padding=pad),
nn.Conv2D(in_channels=self.predictor.out_filters,
out_channels=self.predictor.out_filters,
kernel_size=3,
weight_attr=nn.initializer.KaimingUniform(),
padding=pad),
nn.Conv2D(in_channels=self.predictor.out_filters,
out_channels=num_kp,
kernel_size=3,
weight_attr=nn.initializer.KaimingUniform(),
padding=pad))
else:
self.kp = nn.Conv2D(in_channels=self.predictor.out_filters,
out_channels=num_kp,
kernel_size=(7, 7),
......@@ -39,6 +56,31 @@ class KPDetector(nn.Layer):
if estimate_jacobian:
self.num_jacobian_maps = 1 if single_jacobian_map else num_kp
if mobile_net:
self.jacobian = nn.Sequential(
nn.Conv2D(in_channels=self.predictor.out_filters,
out_channels=self.predictor.out_filters,
kernel_size=3,
padding=pad),
nn.Conv2D(in_channels=self.predictor.out_filters,
out_channels=self.predictor.out_filters,
kernel_size=3,
padding=pad),
nn.Conv2D(in_channels=self.predictor.out_filters,
out_channels=4 * self.num_jacobian_maps,
kernel_size=3,
padding=pad),
)
self.jacobian[0].weight.set_value(
paddle.zeros(self.jacobian[0].weight.shape, dtype='float32'))
self.jacobian[1].weight.set_value(
paddle.zeros(self.jacobian[1].weight.shape, dtype='float32'))
self.jacobian[2].weight.set_value(
paddle.zeros(self.jacobian[2].weight.shape, dtype='float32'))
self.jacobian[2].bias.set_value(
paddle.to_tensor([1, 0, 0, 1] *
self.num_jacobian_maps).astype('float32'))
else:
self.jacobian = nn.Conv2D(in_channels=self.predictor.out_filters,
out_channels=4 * self.num_jacobian_maps,
kernel_size=(7, 7),
......@@ -55,7 +97,8 @@ class KPDetector(nn.Layer):
self.scale_factor = scale_factor
if self.scale_factor != 1:
self.down = AntiAliasInterpolation2d(num_channels,
self.scale_factor)
self.scale_factor,
mobile_net=mobile_net)
def gaussian2kp(self, heatmap):
"""
......
import paddle.inference as paddle_infer
import argparse
import numpy as np
import cv2
import imageio
import time
from tqdm import tqdm
import paddle.fluid as fluid
import os
from functools import reduce
import paddle
def read_img(path):
img = imageio.imread(path)
if img.ndim == 2:
img = np.expand_dims(img, axis=2)
# som images have 4 channels
if img.shape[2] > 3:
img = img[:,:,:3]
return img
def read_video(path):
reader = imageio.get_reader(path)
fps = reader.get_meta_data()['fps']
driving_video = []
try:
for im in reader:
driving_video.append(im)
except RuntimeError:
print("Read driving video error!")
pass
reader.close()
return driving_video, fps
def face_detection(img_ori, weight_path):
config = paddle_infer.Config(
os.path.join(weight_path, '__model__'),
os.path.join(weight_path, '__params__'))
config.disable_gpu()
# disable print log when predict
config.disable_glog_info()
# enable shared memory
config.enable_memory_optim()
# disable feed, fetch OP, needed by zero_copy_run
config.switch_use_feed_fetch_ops(False)
predictor = paddle_infer.create_predictor(config)
img = img_ori.astype(np.float32)
mean = np.array([123, 117, 104])[np.newaxis, np.newaxis, :]
std = np.array([127.502231, 127.502231, 127.502231])[np.newaxis, np.newaxis, :]
img -= mean
img /= std
img = img[:, :, [2, 1, 0]]
img = img[np.newaxis].transpose([0, 3, 1, 2])
input_names = predictor.get_input_names()
input_tensor = predictor.get_input_handle(input_names[0])
input_tensor.copy_from_cpu(img)
predictor.run()
output_names = predictor.get_output_names()
boxes_tensor = predictor.get_output_handle(output_names[0])
np_boxes = boxes_tensor.copy_to_cpu()
if reduce(lambda x, y: x * y, np_boxes.shape) < 6:
print('[WARNNING] No object detected.')
exit()
w, h = img.shape[2:]
np_boxes[:, 2] *= h
np_boxes[:, 3] *= w
np_boxes[:, 4] *= h
np_boxes[:, 5] *= w
expect_boxes = (np_boxes[:, 1] > 0.5) & (np_boxes[:, 0] > -1)
rect = np_boxes[expect_boxes, :][0][2:]
bh = rect[3] - rect[1]
bw = rect[2] - rect[0]
cy = rect[1] + int(bh / 2)
cx = rect[0] + int(bw / 2)
margin = max(bh, bw)
y1 = max(0, cy - margin)
x1 = max(0, cx - int(0.8 * margin))
y2 = min(h, cy + margin)
x2 = min(w, cx + int(0.8 * margin))
return int(y1), int(y2), int(x1), int(x2)
def main():
args = parse_args()
source_path = args.source_path
driving_path = args.driving_path
source_img = read_img(source_path)
#Todo:add blazeface static model
#left, right, up, bottom = face_detection(source_img, "/workspace/PaddleDetection/static/inference_model/blazeface/")
source = source_img #[left:right, up:bottom]
source = cv2.resize(source, (256, 256)) / 255.0
source = source[np.newaxis].astype(np.float32).transpose([0, 3, 1, 2])
driving_video, fps = read_video(driving_path)
driving_video = [cv2.resize(frame, (256, 256)) / 255.0 for frame in driving_video]
driving_len = len(driving_video)
driving_video = np.array(driving_video).astype(np.float32).transpose([0, 3, 1, 2])
# 创建 config
kp_detector_config = paddle_infer.Config(args.model_profix+"/kp_detector.pdmodel", args.model_profix+"/kp_detector.pdiparams")
generator_config = paddle_infer.Config(args.model_profix+"/generator.pdmodel", args.model_profix+"/generator.pdiparams")
kp_detector_config.set_mkldnn_cache_capacity(10)
kp_detector_config.enable_mkldnn()
generator_config.set_mkldnn_cache_capacity(10)
generator_config.enable_mkldnn()
kp_detector_config.disable_gpu()
kp_detector_config.set_cpu_math_library_num_threads(6)
generator_config.disable_gpu()
generator_config.set_cpu_math_library_num_threads(6)
# 根据 config 创建 predictor
kp_detector_predictor = paddle_infer.create_predictor(kp_detector_config)
generator_predictor = paddle_infer.create_predictor(generator_config)
# 获取输入的名称
kp_detector_input_names = kp_detector_predictor.get_input_names()
kp_detector_input_handle = kp_detector_predictor.get_input_handle(kp_detector_input_names[0])
kp_detector_input_handle.reshape([args.batch_size, 3, 256, 256])
kp_detector_input_handle.copy_from_cpu(source)
kp_detector_predictor.run()
kp_detector_output_names = kp_detector_predictor.get_output_names()
kp_detector_output_handle = kp_detector_predictor.get_output_handle(kp_detector_output_names[0])
source_j = kp_detector_output_handle.copy_to_cpu()
kp_detector_output_handle = kp_detector_predictor.get_output_handle(kp_detector_output_names[1])
source_v = kp_detector_output_handle.copy_to_cpu()
kp_detector_input_handle.reshape([args.batch_size, 3, 256, 256])
kp_detector_input_handle.copy_from_cpu(driving_video[0:1])
kp_detector_predictor.run()
kp_detector_output_names = kp_detector_predictor.get_output_names()
kp_detector_output_handle = kp_detector_predictor.get_output_handle(kp_detector_output_names[0])
driving_init_j = kp_detector_output_handle.copy_to_cpu()
kp_detector_output_handle = kp_detector_predictor.get_output_handle(kp_detector_output_names[1])
driving_init_v = kp_detector_output_handle.copy_to_cpu()
start_time = time.time()
results = []
for i in tqdm(range(0, driving_len)):
kp_detector_input_handle.copy_from_cpu(driving_video[i:i+1])
kp_detector_predictor.run()
kp_detector_output_names = kp_detector_predictor.get_output_names()
kp_detector_output_handle = kp_detector_predictor.get_output_handle(kp_detector_output_names[0])
driving_j = kp_detector_output_handle.copy_to_cpu()
kp_detector_output_handle = kp_detector_predictor.get_output_handle(kp_detector_output_names[1])
driving_v = kp_detector_output_handle.copy_to_cpu()
generator_inputs = [source, source_j, source_v, driving_j, driving_v, driving_init_j, driving_init_v]
generator_input_names = generator_predictor.get_input_names()
for i in range(len(generator_input_names)):
generator_input_handle = generator_predictor.get_input_handle(generator_input_names[i])
generator_input_handle.copy_from_cpu(generator_inputs[i])
generator_predictor.run()
generator_output_names = generator_predictor.get_output_names()
generator_output_handle = generator_predictor.get_output_handle(generator_output_names[0])
output_data = generator_output_handle.copy_to_cpu()
output_data = np.transpose(output_data, [0, 2, 3, 1])[0] * 255.0
#Todo:add blazeface static model
#frame = source_img.copy()
#frame[left:right, up:bottom] = cv2.resize(output_data.astype(np.uint8), (bottom - up, right - left), cv2.INTER_AREA)
results.append(output_data.astype(np.uint8))
print(time.time() - start_time)
imageio.mimsave(args.output_path, [frame for frame in results], fps=fps)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--model_profix", type=str, help="model filename profix")
parser.add_argument("--batch_size", type=int, default=1, help="batch size")
parser.add_argument("--source_path", type=str, default=1, help="source_path")
parser.add_argument("--driving_path", type=str, default=1, help="driving_path")
parser.add_argument("--output_path", type=str, default=1, help="output_path")
return parser.parse_args()
if __name__ == "__main__":
main()
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册