diff --git a/README.md b/README.md index 97645ee444f2fd5dfe78f3782a4738ef43d65af0..86e16f65d1a1941159a814c2d3bad2fdbb5567d3 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ GAN--生成对抗网络,被“卷积网络之父”**Yann LeCun(杨立昆) * [CycleGAN](./docs/zh_CN/tutorials/pix2pix_cyclegan.md) * [PSGAN](./docs/zh_CN/tutorials/psgan.md) * [First Order Motion Model](./docs/zh_CN/tutorials/motion_driving.md) +* [FaceParsing](./docs/zh_CN/tutorials/face_parse.md) ## 复合应用 diff --git a/README_en.md b/README_en.md index c8a721a327a046e8f63fb1d63170b89a609caf5a..5f79886f85bf4e7c4d47bc95908101cb7042f319 100644 --- a/README_en.md +++ b/README_en.md @@ -35,6 +35,7 @@ GAN-Generative Adversarial Network, was praised by "the Father of Convolutional * [CycleGAN](./docs/en_US/tutorials/pix2pix_cyclegan.md) * [PSGAN](./docs/en_US/tutorials/psgan.md) * [First Order Motion Model](./docs/en_US/tutorials/motion_driving.md) +* [FaceParsing](./docs/en_US/tutorials/face_parse.md) ## Composite Application diff --git a/applications/tools/face_parse.py b/applications/tools/face_parse.py new file mode 100644 index 0000000000000000000000000000000000000000..5eded74084dd96731b3b98ce4d736ea5192694fb --- /dev/null +++ b/applications/tools/face_parse.py @@ -0,0 +1,18 @@ +import argparse + +import paddle +from ppgan.apps.face_parse_predictor import FaceParsePredictor + +parser = argparse.ArgumentParser() +parser.add_argument("--input_image", type=str, help="path to source image") + +parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.") + +if __name__ == "__main__": + args = parser.parse_args() + + if args.cpu: + paddle.set_device('cpu') + + predictor = FaceParsePredictor() + predictor.run(args.input_image) diff --git a/docs/en_US/tutorials/face_parse.md b/docs/en_US/tutorials/face_parse.md new file mode 100644 index 0000000000000000000000000000000000000000..3bf4acbd5d489e5a2cdce1426a4377cb44a1939d --- /dev/null +++ b/docs/en_US/tutorials/face_parse.md @@ -0,0 +1,36 @@ +# Face Parsing + +## 1. Face parsing introduction + +Face parsing address the task that how to parse facial components from face images. We utiize BiseNet to handle this problem and focus on computing the pixel-wise label map of a face image. It is useful for a variety of tasks, including recognition, animation, and synthesis. This application is now working in our makeup transfer model. + +## 2. How to use + +### 2.1 Test + +Runing the following command to complete the face parsing task. The output results will be the segmanted face components mask for the input image. + +``` +cd applications +python face_parse.py --input_image ../docs/imgs/face.png +``` + +**params:** + +- input_image: path of the input face image + +## Results +![](../../imgs/face_parse_out.png) + +### 4. Reference + +``` +@misc{yu2018bisenet, + title={BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation}, + author={Changqian Yu and Jingbo Wang and Chao Peng and Changxin Gao and Gang Yu and Nong Sang}, + year={2018}, + eprint={1808.00897}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` diff --git a/docs/imgs/face.png b/docs/imgs/face.png new file mode 100644 index 0000000000000000000000000000000000000000..e856cbf182be0f56783525fd668ede7fd1d4d168 Binary files /dev/null and b/docs/imgs/face.png differ diff --git a/docs/imgs/face_parse_out.png b/docs/imgs/face_parse_out.png new file mode 100644 index 0000000000000000000000000000000000000000..5cf052358e00b90554f2f13debab1409caf8498d Binary files /dev/null and b/docs/imgs/face_parse_out.png differ diff --git a/docs/zh_CN/apis/apps.md b/docs/zh_CN/apis/apps.md index c59ad969fa603e3259c788dd623f289cef497964..79ea36b68ac1b2d83d0487ecb0f4cedcad68c02b 100644 --- a/docs/zh_CN/apis/apps.md +++ b/docs/zh_CN/apis/apps.md @@ -1,6 +1,6 @@ # Applications接口说明 -ppgan.apps包含超分、插针、上色、换妆、图像动画生成等应用,接口使用简洁,并内置了已训练好的模型,可以直接用来做应用。 +ppgan.apps包含超分、插针、上色、换妆、图像动画生成、人脸解析等应用,接口使用简洁,并内置了已训练好的模型,可以直接用来做应用。 ## 公共用法 @@ -244,7 +244,7 @@ run(video_path) > > **返回值** > -> > - tuple(str, str): 前者超分后的视频每帧图片的保存路径,后者为昨晚超分的视频路径。 +> > - tuple(str, str): 前者超分后的视频每帧图片的保存路径,后者为做完超分的视频路径。 @@ -254,7 +254,7 @@ run(video_path) ppgan.apps.DAINPredictor(output='output', weight_path=None,time_step=None, use_gpu=True, key_frame_thread=0,remove_duplicates=False) ``` -> 构建插针DAIN模型的实例。DAIN: Depth-Aware Video Frame Interpolation,论文链接: https://arxiv.org/abs/1904.00830 ,对视频做插针,获得帧率更高的视频。 +> 构建插帧DAIN模型的实例。DAIN: Depth-Aware Video Frame Interpolation,论文链接: https://arxiv.org/abs/1904.00830 ,对视频做插帧,获得帧率更高的视频。 > > **示例** > @@ -269,7 +269,7 @@ ppgan.apps.DAINPredictor(output='output', weight_path=None,time_step=None, use > > > - output_path (str): 设置预测输出的保存路径,默认是output。注意,保存路径为设置output/DAIN。 > > - weight_path (str): 指定模型路径,默认是None,则会自动下载内置的已经训练好的模型。 -> > - time_step (float): 帧率变化的倍数为 1./time_step,例如,如果time_step为0.5,则2倍插针,为0.25,则为4倍插针。 +> > - time_step (float): 帧率变化的倍数为 1./time_step,例如,如果time_step为0.5,则2倍插针,为0.25,则为4倍插帧。 > > - use_gpu (bool): 是否使用GPU做预测,默认是True。 > > - remove_duplicates (bool): 是否去除重复帧,默认是False。 @@ -295,7 +295,7 @@ run(video_path) ppgan.apps.FirstOrderPredictor(output='output', weight_path=None,config=None, relative=False, adapt_scale=False,find_best_frame=False, best_frame=None) ``` -> 构建FirsrOrder模型的实例,此模型用来做Image Animation,既给定一张源图片和一个驱动视频,生成一段视频,其中住体是源图片,动作是驱动视频中的动作。论文是First Order Motion Model for Image Animation,论文链接: https://arxiv.org/abs/2003.00196 。 +> 构建FirsrOrder模型的实例,此模型用来做Image Animation,即给定一张源图片和一个驱动视频,生成一段视频,其中主体是源图片,动作是驱动视频中的动作。论文是First Order Motion Model for Image Animation,论文链接: https://arxiv.org/abs/2003.00196 。 > > **示例** > @@ -330,3 +330,24 @@ run(source_image,driving_video) > **返回值** > > > 无。 + +## ppgan.apps.FaceParsePredictor + +```pyhton +ppgan.apps.FaceParsePredictor(output_path='output') +``` +> 构建人脸解析模型实例,此模型用来做人脸解析, 即给定一个输入的人脸图像,人脸解析将为每个语义成分(如头发、嘴唇、鼻子、耳朵等)分配一个像素级标签。我们用BiseNet来完成这项任务。论文是 BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation, 论文链接: https://arxiv.org/abs/1808.00897v1. + +> **参数:** +> +> > - input_image: 输入待解析的图片文件路径 + +> **示例:** +> +> ``` +> from ppgan.apps import FaceParsePredictor +> parser = FaceParsePredictor() +> parser.run('docs/imgs/face.png') +> ``` +> **返回值:** +> > - mask(numpy.ndarray): 返回解析完成的人脸成分mask矩阵, 数据类型为numpy.ndarray diff --git a/docs/zh_CN/tutorials/face_parse.md b/docs/zh_CN/tutorials/face_parse.md new file mode 100644 index 0000000000000000000000000000000000000000..24c1a622f644f065cdaf12cd1ab6e6544064d44b --- /dev/null +++ b/docs/zh_CN/tutorials/face_parse.md @@ -0,0 +1,34 @@ +# 人脸解析 + +## 1. 人脸解析简介 + +人脸解析是语义图像分割的一种特殊情况,人脸解析是计算人脸图像中不同语义成分(如头发、嘴唇、鼻子、眼睛等)的像素级标签映射。给定一个输入的人脸图像,人脸解析将为每个语义成分分配一个像素级标签。我们利用BiseNet来解决这个问题。人脸解析工具在很多任务中都有应用,如识别,动画以及合成等。这个工具我们目前应用在换妆模型上。 +## 2. 使用方法 + +### 2.1 测试 + +运行如下命令,可以完成人脸解析任务,程序运行成功后,会在`output`文件夹生成解析后的图片文件。具体命令如下所示: +``` +cd applications +python face_parse.py --input_image ../docs/imgs/face.png +``` + +**参数:** + +- input_image: 输入待解析的图片文件路径 + +## 3. 结果展示 +![](../../imgs/face_parse_out.png) + +### 4. 参考文献 + +``` +@misc{yu2018bisenet, + title={BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation}, + author={Changqian Yu and Jingbo Wang and Chao Peng and Changxin Gao and Gang Yu and Nong Sang}, + year={2018}, + eprint={1808.00897}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` diff --git a/ppgan/apps/__init__.py b/ppgan/apps/__init__.py index ed269bf8e8e1e904d6e8c23f9b447ae498f7711d..19e2cf8a05df53a716abb75707642d0d436ce6e6 100644 --- a/ppgan/apps/__init__.py +++ b/ppgan/apps/__init__.py @@ -18,3 +18,4 @@ from .deoldify_predictor import DeOldifyPredictor from .realsr_predictor import RealSRPredictor from .edvr_predictor import EDVRPredictor from .first_order_predictor import FirstOrderPredictor +from .face_parse_predictor import FaceParsePredictor diff --git a/ppgan/apps/face_parse_predictor.py b/ppgan/apps/face_parse_predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..4cf0084e1821f45497ae8712f2804b39702bd238 --- /dev/null +++ b/ppgan/apps/face_parse_predictor.py @@ -0,0 +1,59 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import argparse + +from PIL import Image +import numpy as np +import cv2 + +import ppgan.faceutils as futils +from ppgan.utils.preprocess import * +from ppgan.utils.visual import mask2image +from .base_predictor import BasePredictor + + +class FaceParsePredictor(BasePredictor): + def __init__(self, output_path='output'): + self.output_path = output_path + self.input_size = (512, 512) + self.up_ratio = 0.6 / 0.85 + self.down_ratio = 0.2 / 0.85 + self.width_ratio = 0.2 / 0.85 + self.face_parser = futils.mask.FaceParser() + + def run(self, image): + image = Image.open(image).convert("RGB") + face = futils.dlib.detect(image) + + if not face: + return + face_on_image = face[0] + image, face, crop_face = futils.dlib.crop(image, face_on_image, + self.up_ratio, + self.down_ratio, + self.width_ratio) + np_image = np.array(image) + mask = self.face_parser.parse( + np.float32(cv2.resize(np_image, self.input_size))) + mask = cv2.resize(mask.numpy(), (256, 256)) + mask = mask.astype(np.uint8) + mask = mask2image(mask) + if not os.path.exists(output_path): + os.makedirs(output_path) + save_path = os.path.join(self.output_path, 'face_parse.png') + cv2.imwrite(save_path, mask) + return mask diff --git a/ppgan/models/makeup_model.py b/ppgan/models/makeup_model.py index 4c3963ce6c7d9b6b81335545ba86e3980fe90656..74834f779baf88d415a4e06a12d990fdf9091b2e 100644 --- a/ppgan/models/makeup_model.py +++ b/ppgan/models/makeup_model.py @@ -328,7 +328,7 @@ class MakeupModel(BaseModel): g_B_skin_loss_his * 0.1) * 0.1 self.losses['G_A_his_loss'] = self.loss_G_A_his - self.losses['G_B_his_loss'] = self.loss_G_A_his + self.losses['G_B_his_loss'] = self.loss_G_B_his #vgg loss vgg_s = self.vgg(self.real_A) diff --git a/ppgan/utils/visual.py b/ppgan/utils/visual.py index 78cb0ad8ca4de2759d9983c263fa96192f12e99e..2b4a184d08f27d7eb6ebe8c6d3b1d12b940bacb3 100644 --- a/ppgan/utils/visual.py +++ b/ppgan/utils/visual.py @@ -55,3 +55,13 @@ def save_image(image_numpy, image_path, aspect_ratio=1.0): if aspect_ratio < 1.0: image_pil = image_pil.resize((int(h / aspect_ratio), w), Image.BICUBIC) image_pil.save(image_path) + + +def mask2image(mask: np.array, format="HWC"): + H, W = mask.shape + + canvas = np.zeros((H, W, 3), dtype=np.uint8) + for i in range(int(mask.max())): + color = np.random.rand(1, 1, 3) * 255 + canvas += (mask == i)[:, :, None] * color.astype(np.uint8) + return canvas