diff --git a/modules/image/Image_gan/gan/first_order_motion/README.md b/modules/image/Image_gan/gan/first_order_motion/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ffca34eb9f96a9037a0b95e23b2ae20ded537b16
--- /dev/null
+++ b/modules/image/Image_gan/gan/first_order_motion/README.md
@@ -0,0 +1,95 @@
+# first_order_motion
+
+|模型名称|first_order_motion|
+| :--- | :---: |
+|类别|图像 - 图像生成|
+|网络|S3FD|
+|数据集|-|
+|是否支持Fine-tuning|否|
+|模型大小|343MB|
+|最新更新日期|2021-12-24|
+|数据指标|-|
+
+
+## 一、模型基本信息
+
+- ### 应用效果展示
+ - 样例结果示例:
+
+
+
+ 输入图像
+
+
+
+ 输入视频
+
+
+
+ 输出视频
+
+
+
+- ### 模型介绍
+
+ - First Order Motion的任务是图像动画/Image Animation,即输入为一张源图片和一个驱动视频,源图片中的人物则会做出驱动视频中的动作。
+
+
+## 二、安装
+
+- ### 1、环境依赖
+ - paddlepaddle >= 2.1.0
+ - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst)
+
+- ### 2、安装
+
+ - ```shell
+ $ hub install first_order_motion
+ ```
+ - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md)
+ | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md)
+
+## 三、模型API预测
+
+- ### 1、命令行预测
+
+ - ```shell
+ $ hub run first_order_motion --source_image "/PATH/TO/IMAGE" --driving_video "/PATH/TO/VIDEO" --use_gpu
+ ```
+ - 通过命令行方式实现视频驱动生成模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst)
+
+- ### 2、预测代码示例
+
+ - ```python
+ import paddlehub as hub
+
+ module = hub.Module(name="first_order_motion")
+ module.generate(source_image="/PATH/TO/IMAGE", driving_video="/PATH/TO/VIDEO", ratio=0.4, image_size=256, output_dir='./motion_driving_result/', filename='result.mp4', use_gpu=False)
+ ```
+
+- ### 3、API
+
+ - ```python
+ generate(self, source_image=None, driving_video=None, ratio=0.4, image_size=256, output_dir='./motion_driving_result/', filename='result.mp4', use_gpu=False)
+ ```
+ - 视频驱动生成API。
+
+ - **参数**
+ - source_image (str): 原始图片,支持单人图片和多人图片,视频中人物的表情动作将迁移到该原始图片中的人物上。
+ - driving_video (str): 驱动视频,视频中人物的表情动作作为待迁移的对象。
+ - ratio (float): 贴回驱动生成的人脸区域占原图的比例, 用户需要根据生成的效果调整该参数,尤其对于多人脸距离比较近的情况下需要调整改参数, 默认为0.4,调整范围是[0.4, 0.5]。
+ - image_size (int): 图片人脸大小,默认为256,可设置为512。
+ - output\_dir (str): 结果保存的文件夹名;
+ - filename (str): 结果保存的文件名。
+ - use\_gpu (bool): 是否使用 GPU;
+
+
+## 四、更新历史
+
+* 1.0.0
+
+ 初始发布
+
+ - ```shell
+ $ hub install first_order_motion==1.0.0
+ ```
diff --git a/modules/image/Image_gan/gan/first_order_motion/model.py b/modules/image/Image_gan/gan/first_order_motion/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..35b180d4283f86644ab16d1170e99f6d8bb5d5cf
--- /dev/null
+++ b/modules/image/Image_gan/gan/first_order_motion/model.py
@@ -0,0 +1,352 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import os
+import sys
+import math
+import pickle
+
+import yaml
+import imageio
+import numpy as np
+from tqdm import tqdm
+from scipy.spatial import ConvexHull
+import cv2
+import paddle
+from ppgan.utils.download import get_path_from_url
+from ppgan.utils.animate import normalize_kp
+from ppgan.modules.keypoint_detector import KPDetector
+from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator
+from ppgan.faceutils import face_detection
+
+
+class FirstOrderPredictor:
+ def __init__(self,
+ weight_path=None,
+ config=None,
+ image_size=256,
+ relative=True,
+ adapt_scale=False,
+ find_best_frame=False,
+ best_frame=None,
+ face_detector='sfd',
+ multi_person=False,
+ face_enhancement=True,
+ batch_size=1,
+ mobile_net=False):
+ if config is not None and isinstance(config, str):
+ with open(config) as f:
+ self.cfg = yaml.load(f, Loader=yaml.SafeLoader)
+ elif isinstance(config, dict):
+ self.cfg = config
+ elif config is None:
+ self.cfg = {
+ 'model': {
+ 'common_params': {
+ 'num_kp': 10,
+ 'num_channels': 3,
+ 'estimate_jacobian': True
+ },
+ 'generator': {
+ 'kp_detector_cfg': {
+ 'temperature': 0.1,
+ 'block_expansion': 32,
+ 'max_features': 1024,
+ 'scale_factor': 0.25,
+ 'num_blocks': 5
+ },
+ 'generator_cfg': {
+ 'block_expansion': 64,
+ 'max_features': 512,
+ 'num_down_blocks': 2,
+ 'num_bottleneck_blocks': 6,
+ 'estimate_occlusion_map': True,
+ 'dense_motion_params': {
+ 'block_expansion': 64,
+ 'max_features': 1024,
+ 'num_blocks': 5,
+ 'scale_factor': 0.25
+ }
+ }
+ }
+ }
+ }
+ self.image_size = image_size
+ if weight_path is None:
+ if mobile_net:
+ vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-mobile.pdparams'
+
+ else:
+ if self.image_size == 512:
+ vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-cpk-512.pdparams'
+ else:
+ vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-cpk.pdparams'
+ weight_path = get_path_from_url(vox_cpk_weight_url)
+
+ self.weight_path = weight_path
+ self.relative = relative
+ self.adapt_scale = adapt_scale
+ self.find_best_frame = find_best_frame
+ self.best_frame = best_frame
+ self.face_detector = face_detector
+ self.generator, self.kp_detector = self.load_checkpoints(self.cfg, self.weight_path)
+ self.multi_person = multi_person
+ self.face_enhancement = face_enhancement
+ self.batch_size = batch_size
+ if face_enhancement:
+ from ppgan.faceutils.face_enhancement import FaceEnhancement
+ self.faceenhancer = FaceEnhancement(batch_size=batch_size)
+
+ def read_img(self, path):
+ img = imageio.imread(path)
+ if img.ndim == 2:
+ img = np.expand_dims(img, axis=2)
+ # som images have 4 channels
+ if img.shape[2] > 3:
+ img = img[:, :, :3]
+ return img
+
+ def run(self, source_image, driving_video, ratio, image_size, output_dir, filename):
+ self.ratio = ratio
+ self.image_size = image_size
+ self.output = output_dir
+ self.filename = filename
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+
+ def get_prediction(face_image):
+ if self.find_best_frame or self.best_frame is not None:
+ i = self.best_frame if self.best_frame is not None else self.find_best_frame_func(
+ source_image, driving_video)
+
+ print("Best frame: " + str(i))
+ driving_forward = driving_video[i:]
+ driving_backward = driving_video[:(i + 1)][::-1]
+ predictions_forward = self.make_animation(
+ face_image,
+ driving_forward,
+ self.generator,
+ self.kp_detector,
+ relative=self.relative,
+ adapt_movement_scale=self.adapt_scale)
+ predictions_backward = self.make_animation(
+ face_image,
+ driving_backward,
+ self.generator,
+ self.kp_detector,
+ relative=self.relative,
+ adapt_movement_scale=self.adapt_scale)
+ predictions = predictions_backward[::-1] + predictions_forward[1:]
+ else:
+ predictions = self.make_animation(
+ face_image,
+ driving_video,
+ self.generator,
+ self.kp_detector,
+ relative=self.relative,
+ adapt_movement_scale=self.adapt_scale)
+ return predictions
+
+ source_image = self.read_img(source_image)
+ reader = imageio.get_reader(driving_video)
+ fps = reader.get_meta_data()['fps']
+ driving_video = []
+ try:
+ for im in reader:
+ driving_video.append(im)
+ except RuntimeError:
+ print("Read driving video error!")
+ pass
+ reader.close()
+
+ driving_video = [cv2.resize(frame, (self.image_size, self.image_size)) / 255.0 for frame in driving_video]
+ results = []
+
+ bboxes = self.extract_bbox(source_image.copy())
+ print(str(len(bboxes)) + " persons have been detected")
+
+ # for multi person
+ for rec in bboxes:
+ face_image = source_image.copy()[rec[1]:rec[3], rec[0]:rec[2]]
+ face_image = cv2.resize(face_image, (self.image_size, self.image_size)) / 255.0
+ predictions = get_prediction(face_image)
+ results.append({'rec': rec, 'predict': [predictions[i] for i in range(predictions.shape[0])]})
+ if len(bboxes) == 1 or not self.multi_person:
+ break
+ out_frame = []
+
+ for i in range(len(driving_video)):
+ frame = source_image.copy()
+ for result in results:
+ x1, y1, x2, y2, _ = result['rec']
+ h = y2 - y1
+ w = x2 - x1
+ out = result['predict'][i]
+ out = cv2.resize(out.astype(np.uint8), (x2 - x1, y2 - y1))
+ if len(results) == 1:
+ frame[y1:y2, x1:x2] = out
+ break
+ else:
+ patch = np.zeros(frame.shape).astype('uint8')
+ patch[y1:y2, x1:x2] = out
+ mask = np.zeros(frame.shape[:2]).astype('uint8')
+ cx = int((x1 + x2) / 2)
+ cy = int((y1 + y2) / 2)
+ cv2.circle(mask, (cx, cy), math.ceil(h * self.ratio), (255, 255, 255), -1, 8, 0)
+ frame = cv2.copyTo(patch, mask, frame)
+
+ out_frame.append(frame)
+ imageio.mimsave(os.path.join(self.output, self.filename), [frame for frame in out_frame], fps=fps)
+
+ def load_checkpoints(self, config, checkpoint_path):
+
+ generator = OcclusionAwareGenerator(
+ **config['model']['generator']['generator_cfg'], **config['model']['common_params'], inference=True)
+
+ kp_detector = KPDetector(**config['model']['generator']['kp_detector_cfg'], **config['model']['common_params'])
+
+ checkpoint = paddle.load(self.weight_path)
+ generator.set_state_dict(checkpoint['generator'])
+
+ kp_detector.set_state_dict(checkpoint['kp_detector'])
+
+ generator.eval()
+ kp_detector.eval()
+
+ return generator, kp_detector
+
+ def make_animation(self,
+ source_image,
+ driving_video,
+ generator,
+ kp_detector,
+ relative=True,
+ adapt_movement_scale=True):
+ with paddle.no_grad():
+ predictions = []
+ source = paddle.to_tensor(source_image[np.newaxis].astype(np.float32)).transpose([0, 3, 1, 2])
+
+ driving = paddle.to_tensor(np.array(driving_video).astype(np.float32)).transpose([0, 3, 1, 2])
+ kp_source = kp_detector(source)
+ kp_driving_initial = kp_detector(driving[0:1])
+ kp_source_batch = {}
+ kp_source_batch["value"] = paddle.tile(kp_source["value"], repeat_times=[self.batch_size, 1, 1])
+ kp_source_batch["jacobian"] = paddle.tile(kp_source["jacobian"], repeat_times=[self.batch_size, 1, 1, 1])
+ source = paddle.tile(source, repeat_times=[self.batch_size, 1, 1, 1])
+ begin_idx = 0
+ for frame_idx in tqdm(range(int(np.ceil(float(driving.shape[0]) / self.batch_size)))):
+ frame_num = min(self.batch_size, driving.shape[0] - begin_idx)
+ driving_frame = driving[begin_idx:begin_idx + frame_num]
+ kp_driving = kp_detector(driving_frame)
+ kp_source_img = {}
+ kp_source_img["value"] = kp_source_batch["value"][0:frame_num]
+ kp_source_img["jacobian"] = kp_source_batch["jacobian"][0:frame_num]
+
+ kp_norm = normalize_kp(
+ kp_source=kp_source,
+ kp_driving=kp_driving,
+ kp_driving_initial=kp_driving_initial,
+ use_relative_movement=relative,
+ use_relative_jacobian=relative,
+ adapt_movement_scale=adapt_movement_scale)
+
+ out = generator(source[0:frame_num], kp_source=kp_source_img, kp_driving=kp_norm)
+ img = np.transpose(out['prediction'].numpy(), [0, 2, 3, 1]) * 255.0
+
+ if self.face_enhancement:
+ img = self.faceenhancer.enhance_from_batch(img)
+
+ predictions.append(img)
+ begin_idx += frame_num
+ return np.concatenate(predictions)
+
+ def find_best_frame_func(self, source, driving):
+ import face_alignment
+
+ def normalize_kp(kp):
+ kp = kp - kp.mean(axis=0, keepdims=True)
+ area = ConvexHull(kp[:, :2]).volume
+ area = np.sqrt(area)
+ kp[:, :2] = kp[:, :2] / area
+ return kp
+
+ fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=True)
+
+ kp_source = fa.get_landmarks(255 * source)[0]
+ kp_source = normalize_kp(kp_source)
+ norm = float('inf')
+ frame_num = 0
+ for i, image in tqdm(enumerate(driving)):
+ kp_driving = fa.get_landmarks(255 * image)[0]
+ kp_driving = normalize_kp(kp_driving)
+ new_norm = (np.abs(kp_source - kp_driving)**2).sum()
+ if new_norm < norm:
+ norm = new_norm
+ frame_num = i
+ return frame_num
+
+ def extract_bbox(self, image):
+ detector = face_detection.FaceAlignment(
+ face_detection.LandmarksType._2D, flip_input=False, face_detector=self.face_detector)
+
+ frame = [image]
+ predictions = detector.get_detections_for_image(np.array(frame))
+ person_num = len(predictions)
+ if person_num == 0:
+ return np.array([])
+ results = []
+ face_boxs = []
+ h, w, _ = image.shape
+ for rect in predictions:
+ bh = rect[3] - rect[1]
+ bw = rect[2] - rect[0]
+ cy = rect[1] + int(bh / 2)
+ cx = rect[0] + int(bw / 2)
+ margin = max(bh, bw)
+ y1 = max(0, cy - margin)
+ x1 = max(0, cx - int(0.8 * margin))
+ y2 = min(h, cy + margin)
+ x2 = min(w, cx + int(0.8 * margin))
+ area = (y2 - y1) * (x2 - x1)
+ results.append([x1, y1, x2, y2, area])
+ # if a person has more than one bbox, keep the largest one
+ # maybe greedy will be better?
+ sorted(results, key=lambda area: area[4], reverse=True)
+ results_box = [results[0]]
+ for i in range(1, person_num):
+ num = len(results_box)
+ add_person = True
+ for j in range(num):
+ pre_person = results_box[j]
+ iou = self.IOU(pre_person[0], pre_person[1], pre_person[2], pre_person[3], pre_person[4], results[i][0],
+ results[i][1], results[i][2], results[i][3], results[i][4])
+ if iou > 0.5:
+ add_person = False
+ break
+ if add_person:
+ results_box.append(results[i])
+ boxes = np.array(results_box)
+ return boxes
+
+ def IOU(self, ax1, ay1, ax2, ay2, sa, bx1, by1, bx2, by2, sb):
+ #sa = abs((ax2 - ax1) * (ay2 - ay1))
+ #sb = abs((bx2 - bx1) * (by2 - by1))
+ x1, y1 = max(ax1, bx1), max(ay1, by1)
+ x2, y2 = min(ax2, bx2), min(ay2, by2)
+ w = x2 - x1
+ h = y2 - y1
+ if w < 0 or h < 0:
+ return 0.0
+ else:
+ return 1.0 * w * h / (sa + sb - w * h)
diff --git a/modules/image/Image_gan/gan/first_order_motion/module.py b/modules/image/Image_gan/gan/first_order_motion/module.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3d5ecb07b5756865d0e41678f2234520cbd46f6
--- /dev/null
+++ b/modules/image/Image_gan/gan/first_order_motion/module.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import argparse
+import copy
+
+import paddle
+import paddlehub as hub
+from paddlehub.module.module import moduleinfo, runnable, serving
+import numpy as np
+import cv2
+from skimage.io import imread
+from skimage.transform import rescale, resize
+
+from .model import FirstOrderPredictor
+
+
+@moduleinfo(
+ name="first_order_motion", type="CV/gan", author="paddlepaddle", author_email="", summary="", version="1.0.0")
+class FirstOrderMotion:
+ def __init__(self):
+ self.pretrained_model = os.path.join(self.directory, "vox-cpk.pdparams")
+ self.network = FirstOrderPredictor(weight_path=self.pretrained_model, face_enhancement=True)
+
+ def generate(self,
+ source_image=None,
+ driving_video=None,
+ ratio=0.4,
+ image_size=256,
+ output_dir='./motion_driving_result/',
+ filename='result.mp4',
+ use_gpu=False):
+ '''
+ source_image (str): path to image
+ driving_video (str) : path to driving_video
+ ratio: margin ratio
+ image_size: size of image
+ output_dir: the dir to save the results
+ filename: filename to save the results
+ use_gpu: if True, use gpu to perform the computation, otherwise cpu.
+ '''
+ paddle.disable_static()
+ place = 'gpu:0' if use_gpu else 'cpu'
+ place = paddle.set_device(place)
+ if source_image == None or driving_video == None:
+ print('No image or driving video provided. Please input an image and a driving video.')
+ return
+ self.network.run(source_image, driving_video, ratio, image_size, output_dir, filename)
+
+ @runnable
+ def run_cmd(self, argvs: list):
+ """
+ Run as a command.
+ """
+ self.parser = argparse.ArgumentParser(
+ description="Run the {} module.".format(self.name),
+ prog='hub run {}'.format(self.name),
+ usage='%(prog)s',
+ add_help=True)
+
+ self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required")
+ self.arg_config_group = self.parser.add_argument_group(
+ title="Config options", description="Run configuration for controlling module behavior, not required.")
+ self.add_module_config_arg()
+ self.add_module_input_arg()
+ self.args = self.parser.parse_args(argvs)
+ self.generate(
+ source_image=self.args.source_image,
+ driving_video=self.args.driving_video,
+ ratio=self.args.ratio,
+ image_size=self.args.image_size,
+ output_dir=self.args.output_dir,
+ use_gpu=self.args.use_gpu)
+ return
+
+ def add_module_config_arg(self):
+ """
+ Add the command config options.
+ """
+ self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not")
+
+ self.arg_config_group.add_argument(
+ '--output_dir', type=str, default='motion_driving_result', help='output directory for saving result.')
+ self.arg_config_group.add_argument("--filename", default='result.mp4', help="filename to output")
+
+ def add_module_input_arg(self):
+ """
+ Add the command input options.
+ """
+ self.arg_input_group.add_argument("--source_image", type=str, help="path to source image")
+ self.arg_input_group.add_argument("--driving_video", type=str, help="path to driving video")
+ self.arg_input_group.add_argument("--ratio", dest="ratio", type=float, default=0.4, help="margin ratio")
+ self.arg_input_group.add_argument(
+ "--image_size", dest="image_size", type=int, default=256, help="size of image")
diff --git a/modules/image/Image_gan/gan/first_order_motion/requirements.txt b/modules/image/Image_gan/gan/first_order_motion/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67e9bb6fa840355e9ed0d44b7134850f1fe22fe1
--- /dev/null
+++ b/modules/image/Image_gan/gan/first_order_motion/requirements.txt
@@ -0,0 +1 @@
+ppgan