diff --git a/configs/firstorder_vox_mobile_256.yaml b/configs/firstorder_vox_mobile_256.yaml index c1eb26cd8bfcbd7ba620ee446a267ccc72afde1d..59234916e9021c44af888e02bd04c94350e97107 100644 --- a/configs/firstorder_vox_mobile_256.yaml +++ b/configs/firstorder_vox_mobile_256.yaml @@ -4,8 +4,8 @@ output_dir: output_dir dataset: train: name: FirstOrderDataset - batch_size: 1 - num_workers: 1 + batch_size: 8 + num_workers: 4 use_shared_memory: False phase: train dataroot: data/first_order/Voxceleb/ @@ -38,7 +38,10 @@ dataset: model: - name: FirstOrderModel + name: FirstOrderModelMobile + mode: generator # should be kp_detector, generator, both + kp_weight_path: None + gen_weight_path: None common_params: num_kp: 10 num_channels: 3 @@ -64,6 +67,25 @@ model: num_blocks: 5 scale_factor: 0.25 mobile_net: True + generator_ori: + name: FirstOrderGenerator + kp_detector_cfg: + temperature: 0.1 + block_expansion: 32 + max_features: 1024 + scale_factor: 0.25 + num_blocks: 5 + generator_cfg: + block_expansion: 64 + max_features: 512 + num_down_blocks: 2 + num_bottleneck_blocks: 6 + estimate_occlusion_map: True + dense_motion_params: + block_expansion: 64 + max_features: 1024 + num_blocks: 5 + scale_factor: 0.25 discriminator: name: FirstOrderDiscriminator discriminator_cfg: diff --git a/docs/zh_CN/tutorials/motion_driving.md b/docs/zh_CN/tutorials/motion_driving.md index bdf7a2dffd88775dd40755d978a37ed610ba0cf1..3210a0530a76f0d8a7a8c4fb90eeaac96143ffd2 100644 --- a/docs/zh_CN/tutorials/motion_driving.md +++ b/docs/zh_CN/tutorials/motion_driving.md @@ -103,6 +103,7 @@ export CUDA_VISIBLE_DEVICES=0 python tools/main.py --config-file configs/dataset_name.yaml ``` - GPU多卡训练: +需要将 “/ppgan/modules/first_order.py”中的nn.BatchNorm 改为nn.SyncBatchNorm ``` export CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch \ @@ -133,7 +134,6 @@ python -m paddle.distributed.launch \ ### 3. 模型压缩 -数据处理同上述,模型分为kp_detector和generator,首先固定原始generator部分,训练压缩版的kp_detector部分,然后固定原始kp_detector部分,去训练generator部分,最后将两个压缩的模型一起训练,同时添加中间的蒸馏loss。 **预测:** ``` @@ -153,14 +153,19 @@ python -u tools/first-order-demo.py \ | 原始 | 229 | 0.012058867 | | 压缩 | 6.1 | 0.015025159 | +**训练:** +先将configs/firstorder_vox_mobile_256.yaml 中的mode设置成kp_detector, 训练压缩版 +的kp_detector的模型,固定原始generator模型;然后将configs/firstorder_vox_mobile_256.yaml 中的mode设置成generator,训练压缩版的generator的模型,固定原始kp_detector模型;最后将mode设置为both,修改配置文件中的kp_weight_path和gen_weight_path为>已经训练好的模型路径,一起训练。 +``` +export CUDA_VISIBLE_DEVICES=0 +python tools/main.py --config-file configs/firstorder_vox_mobile_256.yaml +``` + ### 4. 模型部署 #### 4.1 导出模型 使用`tools/fom_export.py`脚本导出模型已经部署时使用的配置文件,配置文件名字为`firstorder_vox_mobile_256.yml`。模型导出脚本如下: ```bash # 导出FOM模型 -需要将 “/ppgan/modules/first_order.py”中的nn.SyncBatchNorm 改为nn.BatchNorm,因为export目前不支持SyncBatchNorm -将 out = out[:, :, ::int_inv_scale, ::int_inv_scale] 改为 -out = paddle.fluid.layers.resize_nearest(out, scale=self.scale) python tools/export_model.py \ --config-file configs/firstorder_vox_mobile_256.yaml \ @@ -169,10 +174,10 @@ python tools/export_model.py \ --export_model output_inference/ ``` 预测模型会导出到`output_inference/fom_dy2st/`目录下,分别为`model.pdiparams`, `model.pdiparams.info`, `model.pdmodel`。 - +- [预训练模型](https://paddlegan.bj.bcebos.com/applications/first_order_model/paddle_lite/inference/lite.zip) #### 4.2 PaddleLite部署 -- [使用Paddle Lite部署FOM模型](./lite/README.md) +- [使用Paddle Lite部署FOM模型](https://github.com/PaddlePaddle/PaddleGAN/tree/develop/deploy/lite) - [FOM-Lite-Demo](https://paddlegan.bj.bcebos.com/applications/first_order_model/paddle_lite/apk/face_detection_demo%202.zip)。更多内容,请参考[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite) 目前问题: (a).Paddle Lite运行效果略差于Paddle Inference,正在优化中 @@ -190,4 +195,3 @@ python tools/export_model.py \ } ``` - diff --git a/ppgan/models/firstorder_model.py b/ppgan/models/firstorder_model.py index 6626f7b994da45d57baf8a818744089e3275bebc..75cb8289338b5aef873d3807a41404010a7b46d8 100755 --- a/ppgan/models/firstorder_model.py +++ b/ppgan/models/firstorder_model.py @@ -31,6 +31,7 @@ import paddle.nn.functional as F import cv2 import os + def init_weight(net): def reset_func(m): if isinstance(m, (nn.BatchNorm, nn.BatchNorm2D, nn.SyncBatchNorm)): @@ -86,7 +87,7 @@ class FirstOrderModel(BaseModel): "gen_lr": self.gen_lr, "dis_lr": self.dis_lr } - + def setup_net_parallel(self): if isinstance(self.nets['Gen_Full'], paddle.DataParallel): self.nets['kp_detector'] = self.nets[ @@ -200,13 +201,14 @@ class FirstOrderModel(BaseModel): kp_driving['jacobian'], paddle.inverse(kp_driving_initial['jacobian'])) kp_norm['jacobian'] = paddle.matmul(jacobian_diff, - kp_source['jacobian']) - out = self.generator(source, kp_source=kp_source, kp_driving=kp_norm) + kp_source['jacobian']) + out = self.generator(source, + kp_source=kp_source, + kp_driving=kp_norm) return out['prediction'] - def export_model(self, export_model=None, output_dir=None, inputs_size=[]): - + source = paddle.rand(shape=inputs_size[0], dtype='float32') driving = paddle.rand(shape=inputs_size[1], dtype='float32') value = paddle.rand(shape=inputs_size[2], dtype='float32') @@ -216,16 +218,172 @@ class FirstOrderModel(BaseModel): driving1 = {'value': value, 'jacobian': j} driving2 = {'value': value2, 'jacobian': j2} driving3 = {'value': value, 'jacobian': j} - + outpath = os.path.join(output_dir, "fom_dy2st") if not os.path.exists(outpath): os.makedirs(outpath) - paddle.jit.save(self.nets['Gen_Full'].kp_extractor, os.path.join(outpath, "kp_detector"), input_spec=[source]) + paddle.jit.save(self.nets['Gen_Full'].kp_extractor, + os.path.join(outpath, "kp_detector"), + input_spec=[source]) infer_generator = self.InferGenerator() infer_generator.set_generator(self.nets['Gen_Full'].generator) - paddle.jit.save(infer_generator, os.path.join(outpath, "generator"), input_spec=[source, driving1, driving2, driving3]) + paddle.jit.save(infer_generator, + os.path.join(outpath, "generator"), + input_spec=[source, driving1, driving2, driving3]) + + +@MODELS.register() +class FirstOrderModelMobile(FirstOrderModel): + """ This class implements the FirstOrderMotionMobile model, modified according to the FirstOrderMotion paper: + https://proceedings.neurips.cc/paper/2019/file/31c0b36aef265d9221af80872ceb62f9-Paper.pdf. + """ + def __init__(self, + common_params, + train_params, + generator_ori, + generator, + mode, + kp_weight_path=None, + gen_weight_path=None, + discriminator=None): + super(FirstOrderModel, self).__init__() + modes = ["kp_detector", "generator", "both"] + assert mode in modes + # def local var + self.input_data = None + self.generated = None + self.losses_generator = None + self.train_params = train_params + # fix origin fom model for distill + generator_ori_cfg = generator_ori + generator_ori_cfg.update({'common_params': common_params}) + generator_ori_cfg.update({'train_params': train_params}) + generator_ori_cfg.update( + {'dis_scales': discriminator.discriminator_cfg.scales}) + self.Gen_Full_ori = build_generator(generator_ori_cfg) + discriminator_cfg = discriminator + discriminator_cfg.update({'common_params': common_params}) + discriminator_cfg.update({'train_params': train_params}) + self.nets['Dis'] = build_discriminator(discriminator_cfg) + # define networks + generator_cfg = generator + generator_cfg.update({'common_params': common_params}) + generator_cfg.update({'train_params': train_params}) + generator_cfg.update( + {'dis_scales': discriminator.discriminator_cfg.scales}) + if (mode == "kp_detector"): + print("just train kp_detector, fix generator") + generator_cfg.update( + {'generator_cfg': generator_ori_cfg['generator_cfg']}) + elif mode == "generator": + print("just train generator, fix kp_detector") + generator_cfg.update( + {'kp_detector_cfg': generator_ori_cfg['kp_detector_cfg']}) + elif mode == "both": + print("train both kp_detector and generator") + self.mode = mode + self.nets['Gen_Full'] = build_generator(generator_cfg) + self.kp_weight_path = kp_weight_path + self.gen_weight_path = gen_weight_path + self.visualizer = Visualizer() + + def setup_net_parallel(self): + if isinstance(self.nets['Gen_Full'], paddle.DataParallel): + self.nets['kp_detector'] = self.nets[ + 'Gen_Full']._layers.kp_extractor + self.nets['generator'] = self.nets['Gen_Full']._layers.generator + self.kp_detector_ori = self.Gen_Full_ori._layers.kp_extractor + self.nets['generator'] = self.nets['Gen_Full']._layers.generator + self.nets['discriminator'] = self.nets['Dis']._layers.discriminator + else: + self.nets['kp_detector'] = self.nets['Gen_Full'].kp_extractor + self.nets['generator'] = self.nets['Gen_Full'].generator + self.kp_detector_ori = self.Gen_Full_ori.kp_extractor + self.nets['discriminator'] = self.nets['Dis'].discriminator + + from ppgan.utils.download import get_path_from_url + vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-cpk.pdparams' + weight_path = get_path_from_url(vox_cpk_weight_url) + checkpoint = paddle.load(weight_path) + if (self.mode == "kp_detector"): + self.nets['generator'].set_state_dict(checkpoint['generator']) + for param in self.nets['generator'].parameters(): + param.stop_gradient = True + elif self.mode == "generator": + self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector']) + for param in self.nets['kp_detector'].parameters(): + param.stop_gradient = True + elif self.mode == "both": + checkpoint = paddle.load(self.kp_weight_path) + self.nets['kp_detector'].set_state_dict(checkpoint['kp_detector']) + checkpoint = paddle.load(self.gen_weight_path) + self.nets['generator'].set_state_dict(checkpoint['generator']) + + self.kp_detector_ori.set_state_dict(checkpoint['kp_detector']) + for param in self.kp_detector_ori.parameters(): + param.stop_gradient = True + + def setup_optimizers(self, lr_cfg, optimizer): + self.setup_net_parallel() + # init params + init_weight(self.nets['discriminator']) + self.optimizers['optimizer_Dis'] = build_optimizer( + optimizer, + self.dis_lr, + parameters=self.nets['discriminator'].parameters()) + + if (self.mode == "kp_detector"): + init_weight(self.nets['kp_detector']) + self.optimizers['optimizer_KP'] = build_optimizer( + optimizer, + self.kp_lr, + parameters=self.nets['kp_detector'].parameters()) + elif self.mode == "generator": + init_weight(self.nets['generator']) + self.optimizers['optimizer_Gen'] = build_optimizer( + optimizer, + self.gen_lr, + parameters=self.nets['generator'].parameters()) + elif self.mode == "both": + super(FirstOrderModelMobile, + self).setup_optimizers(lr_cfg, optimizer) + # define loss functions + self.losses = {} + + def forward(self): + """Run forward pass; called by both functions and .""" + if (self.mode == "kp_detector_distill"): + self.losses_generator, self.generated = \ + self.nets['Gen_Full'](self.input_data.copy(), self.nets['discriminator'], self.kp_detector_ori) + else: + self.losses_generator, self.generated = \ + self.nets['Gen_Full'](self.input_data.copy(), self.nets['discriminator']) + + def train_iter(self, optimizers=None): + if (self.mode == "both"): + super(FirstOrderModelMobile, self).train_iter(optimizers=optimizers) + return + self.forward() + # update G + self.set_requires_grad(self.nets['discriminator'], False) + if (self.mode == "kp_detector"): + self.optimizers['optimizer_KP'].clear_grad() + self.backward_G() + self.optimizers['optimizer_KP'].step() + if (self.mode == "generator"): + self.optimizers['optimizer_Gen'].clear_grad() + self.backward_G() + self.optimizers['optimizer_Gen'].step() + outs = {} + + # update D + if self.train_params['loss_weights']['generator_gan'] != 0: + self.set_requires_grad(self.nets['discriminator'], True) + self.optimizers['optimizer_Dis'].clear_grad() + self.backward_D() + self.optimizers['optimizer_Dis'].step() class Visualizer: diff --git a/ppgan/models/generators/generator_firstorder.py b/ppgan/models/generators/generator_firstorder.py index a9e8b223f98cb7d6bb231d13e765f9cbc6524279..0c99cc1be95ff97f6df92c8df9cafcb0c644e33b 100755 --- a/ppgan/models/generators/generator_firstorder.py +++ b/ppgan/models/generators/generator_firstorder.py @@ -68,7 +68,7 @@ class FirstOrderGenerator(nn.Layer): if sum(self.loss_weights['perceptual']) != 0: self.vgg = VGG19() - def forward(self, x, discriminator): + def forward(self, x, discriminator, kp_extractor_ori=None): kp_source = self.kp_extractor(x['source']) kp_driving = self.kp_extractor(x['driving']) generated = self.generator(x['source'], @@ -151,6 +151,19 @@ class FirstOrderGenerator(nn.Layer): value = paddle.abs(eye - value).mean() loss_values['equivariance_jacobian'] = self.loss_weights[ 'equivariance_jacobian'] * value + + if kp_extractor_ori is not None: + recon_loss = paddle.nn.loss.L1Loss() + + kp_distillation_loss_source = recon_loss( + kp_extractor_ori(x['source'])['value'], + self.kp_extractor(x['source'])['value']) + kp_distillation_loss_driving = recon_loss( + kp_extractor_ori(x['driving'])['value'], + self.kp_extractor(x['driving'])['value']) + loss_values[ + "kp_distillation_loss"] = kp_distillation_loss_source + kp_distillation_loss_driving + return loss_values, generated diff --git a/ppgan/modules/first_order.py b/ppgan/modules/first_order.py index af7c1241eee7b63feec98ff17dda49a433ad6baf..0f71068d8cb72207cd76a1342d151ddc4b86845b 100644 --- a/ppgan/modules/first_order.py +++ b/ppgan/modules/first_order.py @@ -20,9 +20,8 @@ import paddle.nn.functional as F def SyncBatchNorm(*args, **kwargs): - """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm instead""" - if paddle.get_device() == 'cpu': - return nn.BatchNorm(*args, **kwargs) + if paddle.distributed.get_world_size() > 1: + return nn.SyncBatchNorm(*args, **kwargs) else: return nn.BatchNorm(*args, **kwargs) @@ -123,20 +122,30 @@ class ResBlock2d(nn.Layer): out += x return out + class MobileResBlock2d(nn.Layer): """ Res block, preserve spatial resolution. """ - def __init__(self, in_features, kernel_size, padding): super(MobileResBlock2d, self).__init__() out_features = in_features * 2 - self.conv_pw = nn.Conv2D(in_channels=in_features, out_channels=out_features, kernel_size=1, - padding=0, bias_attr=False) - self.conv_dw = nn.Conv2D(in_channels=out_features, out_channels=out_features, kernel_size=kernel_size, - padding=padding, groups=out_features, bias_attr=False) - self.conv_pw_linear = nn.Conv2D(in_channels=out_features, out_channels=in_features, kernel_size=1, - padding=0, bias_attr=False) + self.conv_pw = nn.Conv2D(in_channels=in_features, + out_channels=out_features, + kernel_size=1, + padding=0, + bias_attr=False) + self.conv_dw = nn.Conv2D(in_channels=out_features, + out_channels=out_features, + kernel_size=kernel_size, + padding=padding, + groups=out_features, + bias_attr=False) + self.conv_pw_linear = nn.Conv2D(in_channels=out_features, + out_channels=in_features, + kernel_size=1, + padding=0, + bias_attr=False) self.norm1 = SyncBatchNorm(in_features) self.norm_pw = SyncBatchNorm(out_features) self.norm_dw = SyncBatchNorm(out_features) @@ -184,21 +193,33 @@ class UpBlock2d(nn.Layer): out = F.relu(out) return out + class MobileUpBlock2d(nn.Layer): """ Upsampling block for use in decoder. """ - - def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1): + def __init__(self, + in_features, + out_features, + kernel_size=3, + padding=1, + groups=1): super(MobileUpBlock2d, self).__init__() - self.conv = nn.Conv2D(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size, - padding=padding, groups=in_features, bias_attr=False) - self.conv1 = nn.Conv2D(in_channels=in_features, out_channels=out_features, kernel_size=1, - padding=0, bias_attr=False) + self.conv = nn.Conv2D(in_channels=in_features, + out_channels=in_features, + kernel_size=kernel_size, + padding=padding, + groups=in_features, + bias_attr=False) + self.conv1 = nn.Conv2D(in_channels=in_features, + out_channels=out_features, + kernel_size=1, + padding=0, + bias_attr=False) self.norm = SyncBatchNorm(in_features) self.norm1 = SyncBatchNorm(out_features) - + def forward(self, x): out = F.interpolate(x, scale_factor=2) out = self.conv(out) @@ -210,7 +231,6 @@ class MobileUpBlock2d(nn.Layer): return out - class DownBlock2d(nn.Layer): """ Downsampling block for use in encoder. @@ -242,17 +262,29 @@ class MobileDownBlock2d(nn.Layer): """ Downsampling block for use in encoder. """ - - def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1): + def __init__(self, + in_features, + out_features, + kernel_size=3, + padding=1, + groups=1): super(MobileDownBlock2d, self).__init__() - self.conv = nn.Conv2D(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size, - padding=padding, groups=in_features, bias_attr=False) + self.conv = nn.Conv2D(in_channels=in_features, + out_channels=in_features, + kernel_size=kernel_size, + padding=padding, + groups=in_features, + bias_attr=False) self.norm = SyncBatchNorm(in_features) self.pool = nn.AvgPool2D(kernel_size=(2, 2)) - self.conv1 = nn.Conv2D(in_features, out_features, kernel_size=1, padding=0, stride=1, bias_attr=False) + self.conv1 = nn.Conv2D(in_features, + out_features, + kernel_size=1, + padding=0, + stride=1, + bias_attr=False) self.norm1 = SyncBatchNorm(out_features) - def forward(self, x): out = self.conv(x) @@ -282,7 +314,7 @@ class SameBlock2d(nn.Layer): kernel_size=kernel_size, padding=padding, groups=groups, - bias_attr=(mobile_net==False)) + bias_attr=(mobile_net == False)) self.norm = SyncBatchNorm(out_features) def forward(self, x): @@ -301,7 +333,7 @@ class Encoder(nn.Layer): in_features, num_blocks=3, max_features=256, - mobile_net = False): + mobile_net=False): super(Encoder, self).__init__() down_blocks = [] @@ -310,13 +342,16 @@ class Encoder(nn.Layer): down_blocks.append( MobileDownBlock2d(in_features if i == 0 else min( max_features, block_expansion * (2**i)), - min(max_features, block_expansion * (2**(i + 1))), - kernel_size=3, padding=1)) + min(max_features, + block_expansion * (2**(i + 1))), + kernel_size=3, + padding=1)) else: down_blocks.append( DownBlock2d(in_features if i == 0 else min( max_features, block_expansion * (2**i)), - min(max_features, block_expansion * (2**(i + 1))), + min(max_features, + block_expansion * (2**(i + 1))), kernel_size=3, padding=1)) self.down_blocks = nn.LayerList(down_blocks) @@ -337,7 +372,7 @@ class Decoder(nn.Layer): in_features, num_blocks=3, max_features=256, - mobile_net = False): + mobile_net=False): super(Decoder, self).__init__() up_blocks = [] @@ -346,14 +381,18 @@ class Decoder(nn.Layer): out_filters = min(max_features, block_expansion * (2**i)) if mobile_net: in_filters = (1 if i == num_blocks - 1 else 2) * min( - max_features, block_expansion * (2**(i + 1))) + max_features, block_expansion * (2**(i + 1))) up_blocks.append( - MobileUpBlock2d(in_filters, out_filters, kernel_size=3, padding=1)) + MobileUpBlock2d(in_filters, + out_filters, + kernel_size=3, + padding=1)) else: in_filters = (1 if i == num_blocks - 1 else 2) * min( max_features, block_expansion * (2**(i + 1))) up_blocks.append( - UpBlock2d(in_filters, out_filters, kernel_size=3, padding=1)) + UpBlock2d(in_filters, out_filters, kernel_size=3, + padding=1)) self.up_blocks = nn.LayerList(up_blocks) self.out_filters = block_expansion + in_features @@ -378,10 +417,16 @@ class Hourglass(nn.Layer): max_features=256, mobile_net=False): super(Hourglass, self).__init__() - self.encoder = Encoder(block_expansion, in_features, num_blocks, - max_features, mobile_net=mobile_net) - self.decoder = Decoder(block_expansion, in_features, num_blocks, - max_features, mobile_net=mobile_net) + self.encoder = Encoder(block_expansion, + in_features, + num_blocks, + max_features, + mobile_net=mobile_net) + self.decoder = Decoder(block_expansion, + in_features, + num_blocks, + max_features, + mobile_net=mobile_net) self.out_filters = self.decoder.out_filters def forward(self, x):