From 1d8cd182219e83653570ce03d9be8b5d5a955187 Mon Sep 17 00:00:00 2001 From: FNRE <287246233@qq.com> Date: Wed, 19 May 2021 10:59:33 +0800 Subject: [PATCH] fix fom error (#319) * 1.fix error with 4 channels of image of fom predictor. 2.fix error of fom evaluate 3.fix lapstyle vgg network --- configs/firstorder_vox_256.yaml | 2 +- ppgan/apps/first_order_predictor.py | 12 +- ppgan/datasets/firstorder_dataset.py | 3 +- ppgan/models/firstorder_model.py | 9 +- ppgan/models/generators/generater_lapstyle.py | 112 +++++++++--------- 5 files changed, 76 insertions(+), 62 deletions(-) diff --git a/configs/firstorder_vox_256.yaml b/configs/firstorder_vox_256.yaml index 884eb9b..4fd1258 100755 --- a/configs/firstorder_vox_256.yaml +++ b/configs/firstorder_vox_256.yaml @@ -115,7 +115,7 @@ log_config: visiual_interval: 10 validate: - interval: 10 + interval: 3000 save_img: false snapshot_config: diff --git a/ppgan/apps/first_order_predictor.py b/ppgan/apps/first_order_predictor.py index fe1a49d..011aafa 100644 --- a/ppgan/apps/first_order_predictor.py +++ b/ppgan/apps/first_order_predictor.py @@ -103,6 +103,16 @@ class FirstOrderPredictor(BasePredictor): self.cfg, self.weight_path) self.multi_person = multi_person + def read_img(self, path): + img = imageio.imread(path) + img = img.astype(np.float32) + if img.ndim == 2: + img = np.expand_dims(img, axis=2) + # som images have 4 channels + if img.shape[2] > 3: + img = img[:,:,:3] + return img + def run(self, source_image, driving_video): def get_prediction(face_image): if self.find_best_frame or self.best_frame is not None: @@ -138,7 +148,7 @@ class FirstOrderPredictor(BasePredictor): adapt_movement_scale=self.adapt_scale) return predictions - source_image = imageio.imread(source_image) + source_image = self.read_img(source_image) reader = imageio.get_reader(driving_video) fps = reader.get_meta_data()['fps'] driving_video = [] diff --git a/ppgan/datasets/firstorder_dataset.py b/ppgan/datasets/firstorder_dataset.py index 41d5cc2..e2ad0f5 100755 --- a/ppgan/datasets/firstorder_dataset.py +++ b/ppgan/datasets/firstorder_dataset.py @@ -251,7 +251,8 @@ class FramesDataset(Dataset): out['driving'] = out['source'] out['source'] = buf else: - video = np.stack(video_array, axis=0) / 255.0 + video = np.stack(video_array, axis=0).astype( + np.float32) / 255.0 out['video'] = video.transpose(3, 0, 1, 2) out['name'] = video_name return out diff --git a/ppgan/models/firstorder_model.py b/ppgan/models/firstorder_model.py index 0b690aa..482450d 100755 --- a/ppgan/models/firstorder_model.py +++ b/ppgan/models/firstorder_model.py @@ -86,18 +86,20 @@ class FirstOrderModel(BaseModel): "gen_lr": self.gen_lr, "dis_lr": self.dis_lr } - - def setup_optimizers(self, lr_cfg, optimizer): + + def setup_net_parallel(self): if isinstance(self.nets['Gen_Full'], paddle.DataParallel): self.nets['kp_detector'] = self.nets[ 'Gen_Full']._layers.kp_extractor self.nets['generator'] = self.nets['Gen_Full']._layers.generator self.nets['discriminator'] = self.nets['Dis']._layers.discriminator else: - self.nets['kp_detector'] = self.nets['Gen_Full'].kp_extractor self.nets['generator'] = self.nets['Gen_Full'].generator self.nets['discriminator'] = self.nets['Dis'].discriminator + + def setup_optimizers(self, lr_cfg, optimizer): + self.setup_net_parallel() # init params init_weight(self.nets['kp_detector']) init_weight(self.nets['generator']) @@ -163,6 +165,7 @@ class FirstOrderModel(BaseModel): self.optimizers['optimizer_Dis'].step() def test_iter(self, metrics=None): + self.setup_net_parallel() self.nets['kp_detector'].eval() self.nets['generator'].eval() loss_list = [] diff --git a/ppgan/models/generators/generater_lapstyle.py b/ppgan/models/generators/generater_lapstyle.py index 53c0911..2871a69 100644 --- a/ppgan/models/generators/generater_lapstyle.py +++ b/ppgan/models/generators/generater_lapstyle.py @@ -167,61 +167,6 @@ class DecoderNet(nn.Layer): return out -vgg = nn.Sequential( - nn.Conv2D(3, 3, (1, 1)), - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(3, 64, (3, 3)), - nn.ReLU(), # relu1-1 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(64, 64, (3, 3)), - nn.ReLU(), # relu1-2 - nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True), - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(64, 128, (3, 3)), - nn.ReLU(), # relu2-1 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(128, 128, (3, 3)), - nn.ReLU(), # relu2-2 - nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True), - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(128, 256, (3, 3)), - nn.ReLU(), # relu3-1 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(256, 256, (3, 3)), - nn.ReLU(), # relu3-2 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(256, 256, (3, 3)), - nn.ReLU(), # relu3-3 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(256, 256, (3, 3)), - nn.ReLU(), # relu3-4 - nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True), - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(256, 512, (3, 3)), - nn.ReLU(), # relu4-1, this is the last layer used - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(512, 512, (3, 3)), - nn.ReLU(), # relu4-2 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(512, 512, (3, 3)), - nn.ReLU(), # relu4-3 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(512, 512, (3, 3)), - nn.ReLU(), # relu4-4 - nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True), - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(512, 512, (3, 3)), - nn.ReLU(), # relu5-1 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(512, 512, (3, 3)), - nn.ReLU(), # relu5-2 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(512, 512, (3, 3)), - nn.ReLU(), # relu5-3 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(512, 512, (3, 3)), - nn.ReLU() # relu5-4 -) @GENERATORS.register() @@ -233,7 +178,62 @@ class Encoder(nn.Layer): """ def __init__(self): super(Encoder, self).__init__() - vgg_net = vgg + vgg_net = nn.Sequential( + nn.Conv2D(3, 3, (1, 1)), + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(3, 64, (3, 3)), + nn.ReLU(), # relu1-1 + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(64, 64, (3, 3)), + nn.ReLU(), # relu1-2 + nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True), + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(64, 128, (3, 3)), + nn.ReLU(), # relu2-1 + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(128, 128, (3, 3)), + nn.ReLU(), # relu2-2 + nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True), + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(128, 256, (3, 3)), + nn.ReLU(), # relu3-1 + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(256, 256, (3, 3)), + nn.ReLU(), # relu3-2 + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(256, 256, (3, 3)), + nn.ReLU(), # relu3-3 + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(256, 256, (3, 3)), + nn.ReLU(), # relu3-4 + nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True), + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(256, 512, (3, 3)), + nn.ReLU(), # relu4-1, this is the last layer used + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(512, 512, (3, 3)), + nn.ReLU(), # relu4-2 + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(512, 512, (3, 3)), + nn.ReLU(), # relu4-3 + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(512, 512, (3, 3)), + nn.ReLU(), # relu4-4 + nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True), + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(512, 512, (3, 3)), + nn.ReLU(), # relu5-1 + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(512, 512, (3, 3)), + nn.ReLU(), # relu5-2 + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(512, 512, (3, 3)), + nn.ReLU(), # relu5-3 + nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Conv2D(512, 512, (3, 3)), + nn.ReLU() # relu5-4 + ) + weight_path = get_path_from_url( 'https://paddlegan.bj.bcebos.com/models/vgg_normalised.pdparams') vgg_net.set_dict(paddle.load(weight_path)) -- GitLab