refine psgan code (#67)

* update psgan pretrained model link in the tutorial doc

refine psgan code (#67)
* update psgan pretrained model link in the tutorial doc
e54169d5 · lijianshe02 · GitHub · 25335873 · e54169d5 · e54169d5
16 changed file
--- a/docs/en_US/tutorials/motion_driving.md
+++ b/docs/en_US/tutorials/motion_driving.md
@@ -13,9 +13,10 @@
 Users can upload the prepared source image and driving video, then substitute the path of source image and driving video for the `source_image` and `driving_video` parameter in the following running command. It will geneate a video file named `result.mp4` in the `output` folder, which is the animated video file.
 ```
-python -u tools/first-order-demo.py \
+cd applications/
-     --driving_video ./ravel_10.mp4  \
+python -u tools/first-order-demo.py  \
-     --source_image ./sudaqiang.png \
+     --driving_video ../docs/imgs/fom_dv.mp4 \
+     --source_image ../docs/imgs/fom_source_image.png \
     --relative --adapt_scale
 ```

--- a/docs/en_US/tutorials/psgan.md
+++ b/docs/en_US/tutorials/psgan.md
@@ -10,15 +10,17 @@ This paper is to address the makeup transfer task, which aims to transfer the ma
 ## 2. How to use
 ### 2.1 Test
+Pretrained model can be downloaded under following link: [psgan_weight](https://paddlegan.bj.bcebos.com/models/psgan_weight.pkl)
 Running the following command to complete the makeup transfer task. It will geneate the transfered image in the current path when the program running sucessfully.
 ```
-cd applications
+python tools/psgan_infer.py \  
-python tools/ps_demo.py \  
  --config-file configs/makeup.yaml \
  --model_path /your/model/path \
-  --source_path  /your/source/image/path  \
+  --source_path  docs/imgs/ps_source.png  \
-  --reference_dir /your/ref/image/path
+  --reference_dir docs/imgs/ref/ps_ref \
+  --evaluate-only True
 ```
 **params:**
 - config-file: PSGAN network configuration file, yaml format
@@ -77,7 +79,7 @@ Notation: In train phase, the `isTrain` value in makeup.yaml file is `True`, but
 Model|Dataset|BatchSize|Inference speed|Download
 ---|:--:|:--:|:--:|:--:
-PSGAN|MT-Dataset| 1 | 1.9s/image (GPU:P40) | [model]()
+PSGAN|MT-Dataset| 1 | 1.9s/image (GPU:P40) | [model](https://paddlegan.bj.bcebos.com/models/psgan_weight.pkl)
 ## 3. Result
 ![](../../imgs/makeup_shifter.png)

--- a/docs/imgs/fom_dv.mp4
+++ b/docs/imgs/fom_dv.mp4
--- a/docs/imgs/fom_source_image.png
+++ b/docs/imgs/fom_source_image.png
--- a/docs/imgs/ps_source.png
+++ b/docs/imgs/ps_source.png
--- a/docs/imgs/ref/ps_ref.png
+++ b/docs/imgs/ref/ps_ref.png
--- a/docs/zh_CN/tutorials/motion_driving.md
+++ b/docs/zh_CN/tutorials/motion_driving.md
@@ -17,9 +17,10 @@ First order motion model的任务是image animation，给定一张源图片，
 用户可以上传自己准备的视频和图片，并在如下命令中的source_image参数和driving_video参数分别换成自己的图片和视频路径，然后运行如下命令，就可以完成动作表情迁移，程序运行成功后，会在ouput文件夹生成名为result.mp4的视频文件，该文件即为动作迁移后的视频。本项目中提供了原始图片和驱动视频供展示使用。运行的命令如下所示：
 ```
+cd applications/
 python -u tools/first-order-demo.py  \
-     --driving_video ./ravel_10.mp4 \
+     --driving_video ../docs/imgs/fom_dv.mp4 \
-     --source_image ./sudaqiang.png \
+     --source_image ../docs/imgs/fom_source_image.png \
     --relative --adapt_scale
 ```

--- a/docs/zh_CN/tutorials/psgan.md
+++ b/docs/zh_CN/tutorials/psgan.md
@@ -10,15 +10,17 @@
 ## 2. 使用方法
 ### 2.1 测试
+预训练模型可以从如下地址下载: [psgan_weight](https://paddlegan.bj.bcebos.com/models/psgan_weight.pkl)
 运行如下命令，就可以完成妆容迁移，程序运行成功后，会在当前文件夹生成妆容迁移后的图片文件。本项目中提供了原始图片和参考供展示使用，具体命令如下所示：
 ```
-cd applications/
+python tools/psgan_infer.py \  
-python tools/ps_demo.py \  
  --config-file configs/makeup.yaml \
  --model_path /your/model/path \
-  --source_path  /your/source/image/path  \
+  --source_path  docs/imgs/ps_source.png  \
-  --reference_dir /your/ref/image/path
+  --reference_dir docs/imgs/ref/ps_ref \
+  --evaluate-only True
 ```
 **参数说明:**
 - config-file: PSGAN网络到参数配置文件，格式为yaml
@@ -73,7 +75,7 @@ data
 ### 2.3 模型
 Model|Dataset|BatchSize|Inference speed|Download
 ---|:--:|:--:|:--:|:--:
-PSGAN|MT-Dataset| 1 | 1.9s(GPU:P40) | [model]()
+PSGAN|MT-Dataset| 1 | 1.9s(GPU:P40) | [model](https://paddlegan.bj.bcebos.com/models/psgan_weight.pkl)
 ## 3. 妆容迁移结果展示

--- a/applications/tools/ps_demo.py
+++ b/applications/tools/ps_demo.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,6 +31,7 @@ from ppgan.utils.filesystem import load
 from ppgan.engine.trainer import Trainer
 from ppgan.models.builder import build_model
 from ppgan.utils.preprocess import *
+from .base_predictor import BasePredictor
 def toImage(net_output):
@@ -52,14 +53,17 @@ def mask2image(mask: np.array, format="HWC"):
    return canvas
+PS_WEIGHT_URL = "https://paddlegan.bj.bcebos.com/models/psgan_weight.pkl"
 class PreProcess:
    def __init__(self, config, need_parser=True):
        self.img_size = 256
        self.transform = transform = T.Compose([
            T.Resize(size=256),
-            T.Permute(to_rgb=False),
+            T.ToTensor(),
        ])
-        self.norm = T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])
+        self.norm = T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        if need_parser:
            self.face_parser = futils.mask.FaceParser()
        self.up_ratio = 0.6 / 0.85
@@ -82,8 +86,6 @@ class PreProcess:
        mask = cv2.resize(mask.numpy(), (self.img_size, self.img_size),
                          interpolation=cv2.INTER_NEAREST)
        mask = mask.astype(np.uint8)
-        mask_color = mask2image(mask)
-        cv2.imwrite('mask_temp.png', mask_color)
        mask_tensor = paddle.to_tensor(mask)
        lms = futils.dlib.landmarks(image, face) * self.img_size / image.width
@@ -97,7 +99,7 @@ class PreProcess:
        image = self.transform(np_image)
        return [
-            self.norm(image),
+            self.norm(image).unsqueeze(0),
            np.float32(mask_aug),
            np.float32(P_np),
            np.float32(mask)
@@ -145,11 +147,12 @@ class Inference:
            if with_face:
                return None, None
            return
-        for i in range(len(source_input) - 1):
+        for i in range(1, len(source_input) - 1):
            source_input[i] = paddle.to_tensor(
                np.expand_dims(source_input[i], 0))
-        for i in range(len(reference_input) - 1):
+        for i in range(1, len(reference_input) - 1):
            reference_input[i] = paddle.to_tensor(
                np.expand_dims(reference_input[i], 0))
@@ -163,10 +166,9 @@ class Inference:
            'consis_mask': consis_mask
        }
        state_dicts = load(self.model_path)
-        net = getattr(self.model, 'netG')
+        for net_name, net in self.model.nets.items():
-        net.set_dict(state_dicts['netG'])
+            net.set_state_dict(state_dicts[net_name])
        result, _ = self.model.test(input_data)
-        print('result shape: ', result.shape)
        min_, max_ = result.min(), result.max()
        result += -min_
        result = paddle.divide(result, max_ - min_ + 1e-5)
@@ -174,38 +176,42 @@ class Inference:
        if with_face:
            return img, crop_face
-        img.save('before.png')
        return img
-def main(args, cfg, save_path='transferred_image.png'):
+class PSGANPredictor(BasePredictor):
+    def __init__(self, args, cfg, output_path='output'):
-    setup(args, cfg)
+        self.args = args
+        self.cfg = cfg
-    inference = Inference(cfg, args.model_path)
+        self.weight_path = self.args.model_path
-    postprocess = PostProcess(cfg)
+        if self.weight_path is None:
+            cur_path = os.path.abspath(os.path.dirname(__file__))
-    source = Image.open(args.source_path).convert("RGB")
+            self.weight_path = get_path_from_url(PS_WEIGHT_URL, cur_path)
-    reference_paths = list(Path(args.reference_dir).glob("*"))
+        self.output_path = output_path
-    np.random.shuffle(reference_paths)
-    for reference_path in reference_paths:
+    def run(self):
-        if not reference_path.is_file():
+        setup(self.args, self.cfg)
-            print(reference_path, "is not a valid file.")
+        inference = Inference(self.cfg, self.weight_path)
-            continue
+        postprocess = PostProcess(self.cfg)
-        reference = Image.open(reference_path).convert("RGB")
+        source = Image.open(self.args.source_path).convert("RGB")
+        reference_paths = list(Path(self.args.reference_dir).glob("*"))
-        # Transfer the psgan from reference to source.
+        np.random.shuffle(reference_paths)
-        image, face = inference.transfer(source, reference, with_face=True)
+        for reference_path in reference_paths:
-        image.save('before.png')
+            if not reference_path.is_file():
-        source_crop = source.crop(
+                print(reference_path, "is not a valid file.")
-            (face.left(), face.top(), face.right(), face.bottom()))
+                continue
-        image = postprocess(source_crop, image)
-        image.save(save_path)
+            reference = Image.open(reference_path).convert("RGB")
+            # Transfer the psgan from reference to source.
-if __name__ == '__main__':
+            image, face = inference.transfer(source, reference, with_face=True)
-    args = parse_args()
+            source_crop = source.crop(
-    cfg = get_config(args.config_file)
+                (face.left(), face.top(), face.right(), face.bottom()))
-    main(args, cfg)
+            image = postprocess(source_crop, image)
+            ref_img_name = os.path.split(reference_path)[1]
+            save_path = os.path.join(self.output_path,
+                                     'transfered_ref_' + ref_img_name)
+            image.save(save_path)
--- a/ppgan/faceutils/dlibutils/lms.dat
+++ b/ppgan/faceutils/dlibutils/lms.dat
--- a/ppgan/faceutils/mask/face_parser.py
+++ b/ppgan/faceutils/mask/face_parser.py
@@ -23,7 +23,7 @@ from paddle.utils.download import get_path_from_url
 import pickle
 from .model import BiSeNet
-BISENET_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/bisnet.pdparams'
+BISENET_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/bisenet.pdparams'
 class FaceParser:
@@ -65,7 +65,7 @@ class FaceParser:
        image = image.transpose((2, 0, 1))
        image = self.transforms(image)
-        state_dict, _ = paddle.load(self.save_pth)
+        state_dict = paddle.load(self.save_pth)
        self.net.set_dict(state_dict)
        self.net.eval()
@@ -75,8 +75,6 @@ class FaceParser:
            out = self.net(image)[0]
            parsing = out.squeeze(0).argmax(0)  #argmax(0).astype('float32')
-        #parsing = paddle.nn.functional.embedding(x=self.dict, weight=parsing)
        parse_np = parsing.numpy()
        h, w = parse_np.shape
        result = np.zeros((h, w))

--- a/ppgan/models/generators/makeup.py
+++ b/ppgan/models/generators/makeup.py
@@ -296,31 +296,65 @@ class MANet(paddle.nn.Layer):
        # x -> src img
        x = self.encoder(x)
        _, c, h, w = x.shape
-        x_flat = x.reshape([-1, c, h * w])
-        x_flat = self.w * x_flat
-        if x_p is not None:
-            x_flat = paddle.concat([x_flat, x_p], axis=1)
        _, c2, h2, w2 = y.shape
-        y_flat = y.reshape([-1, c2, h2 * w2])
-        y_flat = self.w * y_flat
+        mask_x = F.interpolate(mask_x, size=(64, 64))
-        if y_p is not None:
+        mask_x = mask_x.transpose((1, 0, 2, 3))
-            y_flat = paddle.concat([y_flat, y_p], axis=1)
+        mask_x_re = mask_x.tile([1, x.shape[1], 1, 1])
-        a_ = paddle.matmul(x_flat, y_flat, transpose_x=True) * 200.0
+        mask_x_diff_re = mask_x.tile([1, x_p.shape[1], 1, 1])
+        mask_y = F.interpolate(mask_y, size=(64, 64))
-        # mask softmax
+        mask_y = mask_y.transpose((1, 0, 2, 3))
-        if consistency_mask is not None:
+        mask_y_re = mask_y.tile([1, y.shape[1], 1, 1])
-            a_ = a_ - 100.0 * (1 - consistency_mask)
+        mask_y_diff_re = mask_y.tile([1, y_p.shape[1], 1, 1])
+        x_re = x.tile([3, 1, 1, 1])
+        y_re = y.tile([3, 1, 1, 1])
+        x_flat = x_re * mask_x_re
+        y_flat = y_re * mask_y_re
+        x_p = x_p.tile([3, 1, 1, 1]) * mask_x_diff_re
+        y_p = y_p.tile([3, 1, 1, 1]) * mask_y_diff_re
+        norm_x = paddle.norm(x_p, axis=1,
+                             keepdim=True).tile([1, x_p.shape[1], 1, 1])
+        norm_x = paddle.where(norm_x == 0, paddle.to_tensor(1e10), norm_x)
+        x_p = x_p / norm_x
+        norm_y = paddle.norm(y_p, axis=1,
+                             keepdim=True).tile([1, y_p.shape[1], 1, 1])
+        norm_y = paddle.where(norm_y == 0, paddle.to_tensor(1e10), norm_y)
+        y_p = y_p / norm_y
+        x_flat = paddle.concat([x_flat * 0.01, x_p], axis=1)
+        y_flat = paddle.concat([y_flat * 0.01, y_p], axis=1)
+        x_flat_re = x_flat.reshape([3, x_flat.shape[1], h * w])
+        y_flat_re = y_flat.reshape([3, y_flat.shape[1], h2 * w2])
+        a_ = paddle.matmul(x_flat_re, y_flat_re, transpose_x=True)
+        with paddle.no_grad():
+            a_mask = a_ != 0
+        a_ *= 200
        a = F.softmax(a_, axis=-1)
+        a = a * a_mask
        gamma, beta = self.simple_spade(y)
+        gamma = gamma.tile([3, 1, 1, 1]) * mask_y
+        beta = beta.tile([3, 1, 1, 1]) * mask_y
        beta = beta.reshape([-1, h2 * w2, 1])
        beta = paddle.matmul(a, beta)
+        beta = beta.transpose((0, 2, 1))
        beta = beta.reshape([-1, 1, h2, w2])
        gamma = gamma.reshape([-1, h2 * w2, 1])
        gamma = paddle.matmul(a, gamma)
+        gamma = gamma.transpose((0, 2, 1))
        gamma = gamma.reshape([-1, 1, h2, w2])
+        beta = (beta[0] + beta[1] + beta[2]).unsqueeze(0)
+        gamma = (gamma[0] + gamma[1] + gamma[2]).unsqueeze(0)
        x = x * (1 + gamma) + beta
        for i in range(self.repeat_num):

--- a/ppgan/models/makeup_model.py
+++ b/ppgan/models/makeup_model.py
@@ -323,9 +323,9 @@ class MakeupModel(BaseModel):
        g_B_eye_loss_his = self.criterionL1(fake_B_eye_masked, fake_match_eye_B)
        self.loss_G_A_his = (g_A_eye_loss_his + g_A_lip_loss_his +
-                             g_A_skin_loss_his * 0.1) * 0.01
+                             g_A_skin_loss_his * 0.1) * 0.1
        self.loss_G_B_his = (g_B_eye_loss_his + g_B_lip_loss_his +
-                             g_B_skin_loss_his * 0.1) * 0.01
+                             g_B_skin_loss_his * 0.1) * 0.1
        self.losses['G_A_his_loss'] = self.loss_G_A_his
        self.losses['G_B_his_loss'] = self.loss_G_A_his
@@ -343,9 +343,9 @@ class MakeupModel(BaseModel):
        self.loss_B_vgg = self.criterionL2(vgg_fake_B,
                                           vgg_r) * lambda_B * lambda_vgg
-        self.loss_rec = (self.loss_cycle_A + self.loss_cycle_B +
+        self.loss_rec = (self.loss_cycle_A * 0.2 + self.loss_cycle_B * 0.2 +
-                         self.loss_A_vgg + self.loss_B_vgg) * 0.2
+                         self.loss_A_vgg + self.loss_B_vgg) * 0.5
-        self.loss_idt = (self.loss_idt_A + self.loss_idt_B) * 0.2
+        self.loss_idt = (self.loss_idt_A + self.loss_idt_B) * 0.1
        self.losses['G_A_vgg_loss'] = self.loss_A_vgg
        self.losses['G_B_vgg_loss'] = self.loss_B_vgg

--- a/ppgan/utils/options.py
+++ b/ppgan/utils/options.py
@@ -57,7 +57,7 @@ def parse_args():
    parser.add_argument("--reference_dir",
                        default="",
                        help="path to reference images")
-    parser.add_argument("--model_path", default="", help="model for loading")
+    parser.add_argument("--model_path", default=None, help="model for loading")
    args = parser.parse_args()

--- a/ppgan/utils/preprocess.py
+++ b/ppgan/utils/preprocess.py
@@ -30,11 +30,9 @@ def generate_P_from_lmks(lmks, resize, w, h):
    diff = fix - lmks
    diff = diff.transpose(1, 2, 0)
    diff = cv2.resize(diff, diff_size, interpolation=cv2.INTER_NEAREST)
-    diff = diff.transpose(2, 0, 1).reshape(136, -1)
+    diff = diff.transpose(2, 0, 1)
-    norm = np.linalg.norm(diff, axis=0)
-    P_np = diff / norm
-    return P_np
+    return diff
 def copy_area(tar, src, lms):

--- a/tools/psgan_infer.py
+++ b/tools/psgan_infer.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+from ppgan.utils.options import parse_args
+from ppgan.utils.config import get_config
+from ppgan.apps.psgan_predictor import PSGANPredictor
+if __name__ == '__main__':
+    args = parse_args()
+    cfg = get_config(args.config_file)
+    predictor = PSGANPredictor(args, cfg)
+    predictor.run()