From a95ce33bcdf7875e4a435177d54fce806b4df045 Mon Sep 17 00:00:00 2001 From: wangna11BD <79366697+wangna11BD@users.noreply.github.com> Date: Mon, 18 Oct 2021 21:38:18 +0800 Subject: [PATCH] fix docs (#445) --- README_cn.md | 8 +-- docs/en_US/apis/apps.md | 71 ++++--------------- .../industrial_solution/photo_color_en.md | 5 +- .../industrial_solution/photo_color_cn.md | 4 +- docs/zh_CN/industrial_solution/photo_sr_cn.md | 6 +- .../industrial_solution/video_color_cn.md | 6 +- .../industrial_solution/video_frame_cn.md | 6 +- docs/zh_CN/industrial_solution/video_sr_cn.md | 8 +-- paddlegan-wechaty-demo/README.md | 17 +++-- 9 files changed, 41 insertions(+), 90 deletions(-) mode change 120000 => 100644 docs/en_US/apis/apps.md diff --git a/README_cn.md b/README_cn.md index b0a4ab4..94197c0 100644 --- a/README_cn.md +++ b/README_cn.md @@ -71,7 +71,7 @@ GAN--生成对抗网络,被“卷积网络之父”**Yann LeCun(杨立昆)
- + - ⭐ **新增人脸融合能力,结合新版Frirst Order Motion,实现人脸完美融合并带有丰富表情(๑^ں^๑)** ⭐ - **[完整在线教程](https://aistudio.baidu.com/aistudio/projectdetail/2254031 )** @@ -104,8 +104,8 @@ GAN--生成对抗网络,被“卷积网络之父”**Yann LeCun(杨立昆) - [快速开始](./docs/zh_CN/get_started.md) - [数据准备](./docs/zh_CN/data_prepare.md) -- [API接口使用文档](./docs/zh_CN/apis/apps.md) -- [配置文件/Config使用说明](./docs/zh_CN/config_doc.md) +- [API接口使用文档](./docs/en_US/apis/apps.md) +- [配置文件/Config使用说明](./docs/en_US/config_doc.md) ## 模型库 @@ -165,7 +165,7 @@ GAN--生成对抗网络,被“卷积网络之父”**Yann LeCun(杨立昆) ### 老视频修复
- +
diff --git a/docs/en_US/apis/apps.md b/docs/en_US/apis/apps.md deleted file mode 120000 index b51ff01..0000000 --- a/docs/en_US/apis/apps.md +++ /dev/null @@ -1,502 +0,0 @@ -# Introduction of Prediction Interface - -PaddleGAN(ppgan.apps)provides prediction APIs covering multiple applications, including super resolution, video frame interpolation, colorization, makeup shifter, image animation, face parsing, etc. The integral pre-trained high-performance models enable users' flexible and efficient usage and inference. - -* Colorization: - * [DeOldify](#ppgan.apps.DeOldifyPredictor) - * [DeepRemaster](#ppgan.apps.DeepRemasterPredictor) -* Super Resolution: - * [RealSR](#ppgan.apps.RealSRPredictor) - * [EDVR](#ppgan.apps.EDVRPredictor) -* Video Frame Interpolation: - * [DAIN](#ppgan.apps.DAINPredictor) -* Motion Driving: - * [FirstOrder](#ppgan.apps.FirstOrderPredictor) -* Face: - * [FaceFaceParse](#ppgan.apps.FaceParsePredictor) -* Image Animation: - * [AnimeGAN](#ppgan.apps.AnimeGANPredictor) -* Lip-syncing: - * [Wav2Lip](#ppgan.apps.Wav2LipPredictor) - - -## Public Usage - -### Switch of CPU and GPU - -By default, GPU devices with the [PaddlePaddle](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/windows-pip.html) GPU environment package installed conduct inference by using GPU. If the CPU environment package is installed, CPU is used for inference. - -If manual switch of CPU and GPU is needed,you can do the following: - - -``` -import paddle -paddle.set_device('cpu') #set as CPU -#paddle.set_device('gpu') #set as GPU -``` - -## ppgan.apps.DeOldifyPredictor - -```python -ppgan.apps.DeOldifyPredictor(output='output', weight_path=None, render_factor=32) -``` - -> Build the instance of DeOldify. DeOldify is a coloring model based on GAN. The interface supports the colorization of images or videos. The recommended video format is mp4. -> -> **Example** -> -> ```python -> from ppgan.apps import DeOldifyPredictor -> deoldify = DeOldifyPredictor() -> deoldify.run("docs/imgs/test_old.jpeg") -> ``` - -> **Parameters** -> -> > - output (str): path of the output image, default: output. Note that the save path should be set as output/DeOldify. -> > - weight_path (str): path of the model, default: None,pre-trained integral model will then be automatically downloaded. -> > - artistic (bool): whether to use "artistic" model, which may produce interesting colors, but there are more glitches. -> > - render_factor (int): the zoom factor during image rendering and colorization. The image will be zoomed to a square with side length of 16xrender_factor before being colorized. For example, with a default value of 32,the entered image will be resized to (16x32=512) 512x512. Normally,the smaller the render_factor,the faster the computation and the more vivid the colors. Therefore, old images with low quality usually benefits from lowering the value of rendering factor. The higher the value, the better the image quality, but the color may fade slightly. - -### run - -```python -run(input) -``` - -> The execution interface after building the instance. - -> **Parameters** -> -> > - input (str|np.ndarray|Image.Image): the input image or video files。For images, it could be its path, np.ndarray, or PIL.Image type. For videos, it could only be the file path. -> ->**Return Value** -> ->> - tuple(pred_img(np.array), out_paht(str)): for image input, return the predicted image, PIL.Image type and the path where the image is saved. -> > - tuple(frame_path(str), out_path(str)): for video input, frame_path is the save path of the images after colorizing each frame of the video, and out_path is the save path of the colorized video. - -### run_image - -```python -run_image(img) -``` - -> The interface of image colorization. - -> **Parameters** -> -> > - img (str|np.ndarray|Image.Image): input image,it could be the path of the image, np.ndarray, or PIL.Image type. -> ->**Return Value** -> ->> - pred_img(PIL.Image): return the predicted image, PIL.Image type. - -### run_video - -```python -run_video(video) -``` - -> The interface of video colorization. - -> **Parameters** -> -> > - Video (str): path of the input video files. -> -> **Return Value** -> -> > - tuple(frame_path(str), out_path(str)): frame_path is the save path of the images after colorizing each frame of the video, and out_path is the save path of the colorized video. - - - -## ppgan.apps.DeepRemasterPredictor - -```python -ppgan.apps.DeepRemasterPredictor(output='output', weight_path=None, colorization=False, reference_dir=None, mindim=360) -``` - -> Build the instance of DeepRemasterPredictor. DeepRemaster is a GAN-based coloring and restoring model, which can provide input reference frames. Only video input is available now, and the recommended format is mp4. -> -> **Example** -> -> ``` -> from ppgan.apps import DeepRemasterPredictor -> deep_remaster = DeepRemasterPredictor() -> deep_remaster.run("docs/imgs/test_old.jpeg") -> ``` -> -> - -> **Parameters** -> -> > - output (str): path of the output image, default: output. Note that the path should be set as output/DeepRemaster. -> > - weight_path (str): path of the model, default: None,pre-trained integral model will then be automatically downloaded. -> > - colorization (bool): whether to enable the coloring function, default: False, only the restoring function will be executed. -> > - reference_dir(str|None): path of the reference frame when the coloring function is on, no reference frame is also allowed. -> > - mindim(int): minimum side length of the resized image before prediction. - -### run - -```python -run(video_path) -``` - -> The execution interface after building the instance. - -> **Parameters** -> -> > - video_path (str): path of the video file. -> > -> > **Return Value** -> > -> > - tuple(str, str)): return two types of str, the former is the save path of each frame of the colorized video, the latter is the save path of the colorized video. - - - -## ppgan.apps.RealSRPredictor - -```python -ppgan.apps.RealSRPredictor(output='output', weight_path=None) -``` - -> Build the instance of RealSR。RealSR, Real-World Super-Resolution via Kernel Estimation and Noise Injection, is launched by CVPR 2020 Workshops in its super resolution model based on real-world images training. The interface imposes 4x super resolution on the input image or video. The recommended video format is mp4. -> -> *Note: the size of the input image should be less than 1000x1000pix。 -> -> **Example** -> -> ``` -> from ppgan.apps import RealSRPredictor -> sr = RealSRPredictor() -> sr.run("docs/imgs/test_sr.jpeg") -> ``` - -> **Parameters** -> -> > - output (str): path of the output image, default: output. Note that the path should be set as output/RealSR. -> > - weight_path (str): path of the model, default: None,pre-trained integral model will then be automatically downloaded. - -```python -run(video_path) -``` - -> The execution interface after building the instance. - -> **Parameters** -> -> > - video_path (str): path of the video file. -> ->**Return Value** -> ->> - tuple(pred_img(np.array), out_paht(str)): for image input, return the predicted image, PIL.Image type and the path where the image is saved. -> > - tuple(frame_path(str), out_path(str)): for video input, frame_path is the save path of each frame of the video after super resolution, and out_path is the save path of the video after super resolution. - -### run_image - -```python -run_image(img) -``` - -> The interface of image super resolution. - -> **Parameter** -> -> > - img (str|np.ndarray|Image.Image): input image, it could be the path of the image, np.ndarray, or PIL.Image type. -> -> **Return Value** -> -> > - pred_img(PIL.Image): return the predicted image, PIL.Image type. - -### run_video - -```python -run_video(video) -``` - -> The interface of video super resolution. - -> **Parameter** -> -> > - Video (str): path of the video file. -> -> **Return Value** -> -> > - tuple(frame_path(str), out_path(str)): frame_path is the save path of each frame of the video after super resolution, and out_path is the save path of the video after super resolution. - - - -## ppgan.apps.EDVRPredictor - -```python -ppgan.apps.EDVRPredictor(output='output', weight_path=None) -``` - -> Build the instance of RealSR. EDVR is a model designed for video super resolution. For more details, see the paper, EDVR: Video Restoration with Enhanced Deformable Convolutional Networks (https://arxiv.org/abs/1905.02716). The interface imposes 2x super resolution on the input video. The recommended video format is mp4. -> -> *Note: The interface is only available in static graph, add the following codes to enable static graph before using it: -> -> ``` -> import paddle -> paddle.enable_static() #enable static graph -> paddle.disable_static() #disable static graph -> ``` -> -> **Parameter** -> -> ``` -> from ppgan.apps import EDVRPredictor -> sr = EDVRPredictor() -> # test a video file -> sr.run("docs/imgs/test.mp4") -> ``` - -> **参数** -> -> > - output (str): path of the output image, default: output. Note that the path should be set as output/EDVR. -> > - weight_path (str): path of the model, default: None,pre-trained integral model will then be automatically downloaded. - -```python -run(video_path) -``` - -> The execution interface after building the instance. - -> **Parameter** -> -> > - video_path (str): path of the video files. -> -> **Return Value** -> -> > - tuple(str, str): the former is the save path of each frame of the video after super resolution, the latter is the save path of the video after super resolution. - - - -## ppgan.apps.DAINPredictor - -```python -ppgan.apps.DAINPredictor(output='output', weight_path=None,time_step=None, use_gpu=True, key_frame_thread=0,remove_duplicates=False) -``` - -> Build the instance of DAIN model. DAIN supports video frame interpolation, producing videos with higher frame rate. For more details, see the paper, DAIN: Depth-Aware Video Frame interpolation (https://arxiv.org/abs/1904.00830). -> -> *Note: The interface is only available in static graph, add the following codes to enable static graph before using it: -> -> ``` -> import paddle -> paddle.enable_static() #enable static graph -> paddle.disable_static() #disable static graph -> ``` -> -> **Example** -> -> ``` -> from ppgan.apps import DAINPredictor -> dain = DAINPredictor(time_step=0.5) # With no defualt value, time_step need to be manually specified -> # test a video file -> dain.run("docs/imgs/test.mp4") -> ``` - -> **Parameters** -> -> > - output_path (str): path of the predicted output, default: output. Note that the path should be set as output/DAIN. -> > - weight_path (str): path of the model, default: None, pre-trained integral model will then be automatically downloaded. -> > - time_step (float): the frame rate changes by a factor of 1./time_step, e.g. 2x frames if time_step is 0.5 and 4x frames if it is 0.25. -> > - use_gpu (bool): whether to make predictions by using GPU, default: True. -> > - remove_duplicates (bool): whether to remove duplicates, default: False. - -```python -run(video_path) -``` - -> The execution interface after building the instance. - -> **Parameters** -> -> > - video_path (str): path of the video file. -> -> **Return Value** -> -> > - tuple(str, str): for video input, frame_path is the save path of the image after colorizing each frame of the video, and out_path is the save path of the colorized video. - - - -## ppgan.apps.FirstOrderPredictor - -```python -ppgan.apps.FirstOrderPredictor(output='output', weight_path=None,config=None, relative=False, adapt_scale=False,find_best_frame=False, best_frame=None) -``` - -> Build the instance of FirstOrder model. The model is dedicated to Image Animation, i.e., generating a video sequence so that an object in a source image is animated according to the motion of a driving video. -> -> For more details, see paper, First Order Motion Model for Image Animation (https://arxiv.org/abs/2003.00196) . -> -> **Example** -> -> ``` -> from ppgan.apps import FirstOrderPredictor -> animate = FirstOrderPredictor() -> # test a video file -> animate.run("source.png","driving.mp4") -> ``` - -> **Parameters** -> -> > - output_path (str): path of the predicted output, default: output. Note that the path should be set as output/result.mp4. -> > - weight_path (str): path of the model, default: None, pre-trained integral model will then be automatically downloaded. -> > - config (dict|str|None): model configuration, it can be a dictionary type or a YML file, and the default value None is adopted. When the weight is None by default, the config also needs to adopt the default value None. otherwise, the configuration here should be consistent with the corresponding weight. -> > - relative (bool): indicate whether the relative or absolute coordinates of key points in the video are used in the program, default: False. -> > - adapt_scale (bool): adapt movement scale based on convex hull of key points, default: False. -> > - find_best_frame (bool): whether to start generating from the frame that best matches the source image, which exclusively applies to face applications and requires libraries with face alignment. -> > - best_frame (int): set the number of the starting frame, default: None, that is, starting from the first frame(counting from 1). - -```python -run(source_image,driving_video) -``` - -> The execution interface after building the instance, the predicted video is save in output/result.mp4. - -> **Parameters** -> -> > - source_image (str): input the source image。 -> > - driving_video (str): input the driving video, mp4 format recommended. -> -> **Return Value** -> -> > None. - -## ppgan.apps.FaceParsePredictor - -```pyhton -ppgan.apps.FaceParsePredictor(output_path='output') -``` -> Build the instance of the face parsing model. The model is devoted to address the task of distributing a pixel-wise label to each semantic components (e.g. hair, lips, nose, ears, etc.) in accordance with the input facial image. The task proceeds with the help of BiseNet. -> -> For more details, see the paper, BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation (https://arxiv.org/abs/1808.00897v1). -> -> *Note: dlib package is needed for this interface, use the following codes to install it: -> -> ``` -> pip install dlib -> ``` - -> It may take long to install this package under Windows, please be patient. -> -> **Parameters:** -> -> > - input_image: path of the input image to be parsed -> > - output_path: path of the output to be saved - -> **Example:** -> -> ``` -> from ppgan.apps import FaceParsePredictor -> parser = FaceParsePredictor() -> parser.run('docs/imgs/face.png') -> ``` -> **Return Value:** -> -> > - mask(numpy.ndarray): return the mask matrix of the parsed facial components, data type: numpy.ndarray. - -## ppgan.apps.AnimeGANPredictor - -```pyhton -ppgan.apps.AnimeGANPredictor(output_path='output_dir',weight_path=None,use_adjust_brightness=True) -``` -> Adopt the AnimeGAN v2 to realize the animation of scenery images. -> -> For more details, see the paper, AnimeGAN: A Novel Lightweight GAN for Photo Animation (https://link.springer.com/chapter/10.1007/978-981-15-5577-0_18). - -> **Parameters:** -> -> > - input_image: path of the input image to be parsed. - -> **Example:** -> -> ``` -> from ppgan.apps import AnimeGANPredictor -> predictor = AnimeGANPredictor() -> predictor.run('docs/imgs/animeganv2_test.jpg') -> ``` -> **Return Value:** -> -> > - anime_image(numpy.ndarray): return the stylized scenery image. - - -## ppgan.apps.MiDaSPredictor - -```pyhton -ppgan.apps.MiDaSPredictor(output=None, weight_path=None) -``` - -> MiDaSv2 is a monocular depth estimation model (see https://github.com/intel-isl/MiDaS). Monocular depth estimation is a method used to compute depth from a singe RGB image. -> -> For more details, see the paper Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer (https://arxiv.org/abs/1907.01341v3). - -> **Example** -> -> ```python -> from ppgan.apps import MiDaSPredictor -> # if set output, will write depth pfm and png file in output/MiDaS -> model = MiDaSPredictor() -> prediction = model.run() -> ``` -> -> Color display of the depth image: -> -> ```python -> import numpy as np -> import PIL.Image as Image -> import matplotlib as mpl -> import matplotlib.cm as cm -> -> vmax = np.percentile(prediction, 95) -> normalizer = mpl.colors.Normalize(vmin=prediction.min(), vmax=vmax) -> mapper = cm.ScalarMappable(norm=normalizer, cmap='magma') -> colormapped_im = (mapper.to_rgba(prediction)[:, :, :3] * 255).astype(np.uint8) -> im = Image.fromarray(colormapped_im) -> im.save('test_disp.jpeg') -> ``` -> -> **Parameters:** -> -> > - output (str): path of the output, if it is None, no pfm and png depth image will be saved. -> > - weight_path (str): path of the model, default: None, pre-trained integral model will then be automatically downloaded. - -> **Return Value:** -> -> > - prediction (numpy.ndarray): return the prediction. -> > - pfm_f (str): return the save path of pfm files if the output path is set. -> > - png_f (str): return the save path of png files if the output path is set. - - -## ppgan.apps.Wav2LipPredictor - -```python -ppgan.apps.Wav2LipPredictor(face=None, ausio_seq=None, outfile=None) - -``` - -> Build the instance for the Wav2Lip model, which is used for lip generation, i.e., achieving the synchronization of lip movements on a talking face video and the voice from an input audio. -> -> For more details, see the paper, A Lip Sync Expert Is All You Need for Speech to Lip Generation In the Wild (http://arxiv.org/abs/2008.10010). -> -> **Example** -> -> ``` -> from ppgan.apps import Wav2LipPredictor - -> import ppgan -> predictor = Wav2LipPredictor() -> predictor.run('/home/aistudio/先烈.jpeg', '/home/aistudio/pp_guangquan_zhenzhu46s.mp4','wav2lip') - -> ``` - -> **Parameters:** - -> - face (str): path of images or videos containing human face. -> - audio_seq (str): path of the input audio, any processable format in ffmpeg is supported, including `.wav`, `.mp3`, `.m4a` etc. -> - outfile (str): path of the output video file. - ->**Return Value** -> ->> None diff --git a/docs/en_US/apis/apps.md b/docs/en_US/apis/apps.md new file mode 100644 index 0000000..81b851c --- /dev/null +++ b/docs/en_US/apis/apps.md @@ -0,0 +1,455 @@ +# Introduction of Prediction Interface + +PaddleGAN(ppgan.apps)provides prediction APIs covering multiple applications, including super resolution, video frame interpolation, colorization, makeup shifter, image animation, face parsing, etc. The integral pre-trained high-performance models enable users' flexible and efficient usage and inference. + +* Colorization: + * [DeOldify](#ppgan.apps.DeOldifyPredictor) + * [DeepRemaster](#ppgan.apps.DeepRemasterPredictor) +* Super Resolution: + * [RealSR](#ppgan.apps.RealSRPredictor) + * [EDVR](#ppgan.apps.EDVRPredictor) +* Video Frame Interpolation: + * [DAIN](#ppgan.apps.DAINPredictor) +* Motion Driving: + * [FirstOrder](#ppgan.apps.FirstOrderPredictor) +* Face: + * [FaceFaceParse](#ppgan.apps.FaceParsePredictor) +* Image Animation: + * [AnimeGAN](#ppgan.apps.AnimeGANPredictor) +* Lip-syncing: + * [Wav2Lip](#ppgan.apps.Wav2LipPredictor) + + +## Public Usage + +### Switch of CPU and GPU + +By default, GPU devices with the [PaddlePaddle](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/windows-pip.html) GPU environment package installed conduct inference by using GPU. If the CPU environment package is installed, CPU is used for inference. + +If manual switch of CPU and GPU is needed,you can do the following: + + +``` +import paddle +paddle.set_device('cpu') #set as CPU +#paddle.set_device('gpu') #set as GPU +``` + +## ppgan.apps.DeOldifyPredictor + +```python +ppgan.apps.DeOldifyPredictor(output='output', weight_path=None, render_factor=32) +``` + +> Build the instance of DeOldify. DeOldify is a coloring model based on GAN. The interface supports the colorization of images or videos. The recommended video format is mp4. +> +> **Example** +> +> ```python +> from ppgan.apps import DeOldifyPredictor +> deoldify = DeOldifyPredictor() +> deoldify.run("docs/imgs/test_old.jpeg") +> ``` +> **Parameters** +> +> > - output (str): path of the output image, default: output. Note that the save path should be set as output/DeOldify. +> > - weight_path (str): path of the model, default: None,pre-trained integral model will then be automatically downloaded. +> > - artistic (bool): whether to use "artistic" model, which may produce interesting colors, but there are more glitches. +> > - render_factor (int): the zoom factor during image rendering and colorization. The image will be zoomed to a square with side length of 16xrender_factor before being colorized. For example, with a default value of 32,the entered image will be resized to (16x32=512) 512x512. Normally,the smaller the render_factor,the faster the computation and the more vivid the colors. Therefore, old images with low quality usually benefits from lowering the value of rendering factor. The higher the value, the better the image quality, but the color may fade slightly. +### run + +```python +run(input) +``` + +> The execution interface after building the instance. +> **Parameters** +> +> > - input (str|np.ndarray|Image.Image): the input image or video files。For images, it could be its path, np.ndarray, or PIL.Image type. For videos, it could only be the file path. +> +>**Return Value** +> +>> - tuple(pred_img(np.array), out_paht(str)): for image input, return the predicted image, PIL.Image type and the path where the image is saved. +> > - tuple(frame_path(str), out_path(str)): for video input, frame_path is the save path of the images after colorizing each frame of the video, and out_path is the save path of the colorized video. +### run_image + +```python +run_image(img) +``` + +> The interface of image colorization. +> **Parameters** +> +> > - img (str|np.ndarray|Image.Image): input image,it could be the path of the image, np.ndarray, or PIL.Image type. +> +>**Return Value** +> +>> - pred_img(PIL.Image): return the predicted image, PIL.Image type. +### run_video + +```python +run_video(video) +``` + +> The interface of video colorization. +> **Parameters** +> +> > - Video (str): path of the input video files. +> +> **Return Value** +> +> > - tuple(frame_path(str), out_path(str)): frame_path is the save path of the images after colorizing each frame of the video, and out_path is the save path of the colorized video. + + +## ppgan.apps.DeepRemasterPredictor + +```python +ppgan.apps.DeepRemasterPredictor(output='output', weight_path=None, colorization=False, reference_dir=None, mindim=360) +``` + +> Build the instance of DeepRemasterPredictor. DeepRemaster is a GAN-based coloring and restoring model, which can provide input reference frames. Only video input is available now, and the recommended format is mp4. +> +> **Example** +> +> ``` +> from ppgan.apps import DeepRemasterPredictor +> deep_remaster = DeepRemasterPredictor() +> deep_remaster.run("docs/imgs/test_old.jpeg") +> ``` +> +> +> **Parameters** +> +> > - output (str): path of the output image, default: output. Note that the path should be set as output/DeepRemaster. +> > - weight_path (str): path of the model, default: None,pre-trained integral model will then be automatically downloaded. +> > - colorization (bool): whether to enable the coloring function, default: False, only the restoring function will be executed. +> > - reference_dir(str|None): path of the reference frame when the coloring function is on, no reference frame is also allowed. +> > - mindim(int): minimum side length of the resized image before prediction. +### run + +```python +run(video_path) +``` + +> The execution interface after building the instance. +> **Parameters** +> +> > - video_path (str): path of the video file. +> > +> > **Return Value** +> > +> > - tuple(str, str)): return two types of str, the former is the save path of each frame of the colorized video, the latter is the save path of the colorized video. + + +## ppgan.apps.RealSRPredictor + +```python +ppgan.apps.RealSRPredictor(output='output', weight_path=None) +``` + +> Build the instance of RealSR。RealSR, Real-World Super-Resolution via Kernel Estimation and Noise Injection, is launched by CVPR 2020 Workshops in its super resolution model based on real-world images training. The interface imposes 4x super resolution on the input image or video. The recommended video format is mp4. +> +> *Note: the size of the input image should be less than 1000x1000pix。 +> +> **Example** +> +> ``` +> from ppgan.apps import RealSRPredictor +> sr = RealSRPredictor() +> sr.run("docs/imgs/test_sr.jpeg") +> ``` +> **Parameters** +> +> > - output (str): path of the output image, default: output. Note that the path should be set as output/RealSR. +> > - weight_path (str): path of the model, default: None,pre-trained integral model will then be automatically downloaded. +```python +run(video_path) +``` + +> The execution interface after building the instance. +> **Parameters** +> +> > - video_path (str): path of the video file. +> +>**Return Value** +> +>> - tuple(pred_img(np.array), out_paht(str)): for image input, return the predicted image, PIL.Image type and the path where the image is saved. +> > - tuple(frame_path(str), out_path(str)): for video input, frame_path is the save path of each frame of the video after super resolution, and out_path is the save path of the video after super resolution. +### run_image + +```python +run_image(img) +``` + +> The interface of image super resolution. +> **Parameter** +> +> > - img (str|np.ndarray|Image.Image): input image, it could be the path of the image, np.ndarray, or PIL.Image type. +> +> **Return Value** +> +> > - pred_img(PIL.Image): return the predicted image, PIL.Image type. +### run_video + +```python +run_video(video) +``` + +> The interface of video super resolution. +> **Parameter** +> +> > - Video (str): path of the video file. +> +> **Return Value** +> +> > - tuple(frame_path(str), out_path(str)): frame_path is the save path of each frame of the video after super resolution, and out_path is the save path of the video after super resolution. + + +## ppgan.apps.EDVRPredictor + +```python +ppgan.apps.EDVRPredictor(output='output', weight_path=None) +``` + +> Build the instance of RealSR. EDVR is a model designed for video super resolution. For more details, see the paper, EDVR: Video Restoration with Enhanced Deformable Convolutional Networks (https://arxiv.org/abs/1905.02716). The interface imposes 2x super resolution on the input video. The recommended video format is mp4. +> +> *Note: The interface is only available in static graph, add the following codes to enable static graph before using it: +> +> ``` +> import paddle +> paddle.enable_static() #enable static graph +> paddle.disable_static() #disable static graph +> ``` +> +> **Parameter** +> +> ``` +> from ppgan.apps import EDVRPredictor +> sr = EDVRPredictor() +> # test a video file +> sr.run("docs/imgs/test.mp4") +> ``` +> **参数** +> +> > - output (str): path of the output image, default: output. Note that the path should be set as output/EDVR. +> > - weight_path (str): path of the model, default: None,pre-trained integral model will then be automatically downloaded. +```python +run(video_path) +``` + +> The execution interface after building the instance. +> **Parameter** +> +> > - video_path (str): path of the video files. +> +> **Return Value** +> +> > - tuple(str, str): the former is the save path of each frame of the video after super resolution, the latter is the save path of the video after super resolution. + + +## ppgan.apps.DAINPredictor + +```python +ppgan.apps.DAINPredictor(output='output', weight_path=None,time_step=None, use_gpu=True, key_frame_thread=0,remove_duplicates=False) +``` + +> Build the instance of DAIN model. DAIN supports video frame interpolation, producing videos with higher frame rate. For more details, see the paper, DAIN: Depth-Aware Video Frame interpolation (https://arxiv.org/abs/1904.00830). +> +> *Note: The interface is only available in static graph, add the following codes to enable static graph before using it: +> +> ``` +> import paddle +> paddle.enable_static() #enable static graph +> paddle.disable_static() #disable static graph +> ``` +> +> **Example** +> +> ``` +> from ppgan.apps import DAINPredictor +> dain = DAINPredictor(time_step=0.5) # With no defualt value, time_step need to be manually specified +> # test a video file +> dain.run("docs/imgs/test.mp4") +> ``` +> **Parameters** +> +> > - output_path (str): path of the predicted output, default: output. Note that the path should be set as output/DAIN. +> > - weight_path (str): path of the model, default: None, pre-trained integral model will then be automatically downloaded. +> > - time_step (float): the frame rate changes by a factor of 1./time_step, e.g. 2x frames if time_step is 0.5 and 4x frames if it is 0.25. +> > - use_gpu (bool): whether to make predictions by using GPU, default: True. +> > - remove_duplicates (bool): whether to remove duplicates, default: False. +```python +run(video_path) +``` + +> The execution interface after building the instance. +> **Parameters** +> +> > - video_path (str): path of the video file. +> +> **Return Value** +> +> > - tuple(str, str): for video input, frame_path is the save path of the image after colorizing each frame of the video, and out_path is the save path of the colorized video. + + +## ppgan.apps.FirstOrderPredictor + +```python +ppgan.apps.FirstOrderPredictor(output='output', weight_path=None,config=None, relative=False, adapt_scale=False,find_best_frame=False, best_frame=None) +``` + +> Build the instance of FirstOrder model. The model is dedicated to Image Animation, i.e., generating a video sequence so that an object in a source image is animated according to the motion of a driving video. +> +> For more details, see paper, First Order Motion Model for Image Animation (https://arxiv.org/abs/2003.00196) . +> +> **Example** +> +> ``` +> from ppgan.apps import FirstOrderPredictor +> animate = FirstOrderPredictor() +> # test a video file +> animate.run("source.png","driving.mp4") +> ``` +> **Parameters** +> +> > - output_path (str): path of the predicted output, default: output. Note that the path should be set as output/result.mp4. +> > - weight_path (str): path of the model, default: None, pre-trained integral model will then be automatically downloaded. +> > - config (dict|str|None): model configuration, it can be a dictionary type or a YML file, and the default value None is adopted. When the weight is None by default, the config also needs to adopt the default value None. otherwise, the configuration here should be consistent with the corresponding weight. +> > - relative (bool): indicate whether the relative or absolute coordinates of key points in the video are used in the program, default: False. +> > - adapt_scale (bool): adapt movement scale based on convex hull of key points, default: False. +> > - find_best_frame (bool): whether to start generating from the frame that best matches the source image, which exclusively applies to face applications and requires libraries with face alignment. +> > - best_frame (int): set the number of the starting frame, default: None, that is, starting from the first frame(counting from 1). +```python +run(source_image,driving_video) +``` + +> The execution interface after building the instance, the predicted video is save in output/result.mp4. +> **Parameters** +> +> > - source_image (str): input the source image。 +> > - driving_video (str): input the driving video, mp4 format recommended. +> +> **Return Value** +> +> > None. +## ppgan.apps.FaceParsePredictor + +```pyhton +ppgan.apps.FaceParsePredictor(output_path='output') +``` +> Build the instance of the face parsing model. The model is devoted to address the task of distributing a pixel-wise label to each semantic components (e.g. hair, lips, nose, ears, etc.) in accordance with the input facial image. The task proceeds with the help of BiseNet. +> +> For more details, see the paper, BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation (https://arxiv.org/abs/1808.00897v1). +> +> *Note: dlib package is needed for this interface, use the following codes to install it: +> +> ``` +> pip install dlib +> ``` +> It may take long to install this package under Windows, please be patient. +> +> **Parameters:** +> +> > - input_image: path of the input image to be parsed +> > - output_path: path of the output to be saved +> **Example:** +> +> ``` +> from ppgan.apps import FaceParsePredictor +> parser = FaceParsePredictor() +> parser.run('docs/imgs/face.png') +> ``` +> **Return Value:** +> +> > - mask(numpy.ndarray): return the mask matrix of the parsed facial components, data type: numpy.ndarray. +## ppgan.apps.AnimeGANPredictor + +```pyhton +ppgan.apps.AnimeGANPredictor(output_path='output_dir',weight_path=None,use_adjust_brightness=True) +``` +> Adopt the AnimeGAN v2 to realize the animation of scenery images. +> +> For more details, see the paper, AnimeGAN: A Novel Lightweight GAN for Photo Animation (https://link.springer.com/chapter/10.1007/978-981-15-5577-0_18). +> **Parameters:** +> +> > - input_image: path of the input image to be parsed. +> **Example:** +> +> ``` +> from ppgan.apps import AnimeGANPredictor +> predictor = AnimeGANPredictor() +> predictor.run('docs/imgs/animeganv2_test.jpg') +> ``` +> **Return Value:** +> +> > - anime_image(numpy.ndarray): return the stylized scenery image. + +## ppgan.apps.MiDaSPredictor + +```pyhton +ppgan.apps.MiDaSPredictor(output=None, weight_path=None) +``` + +> MiDaSv2 is a monocular depth estimation model (see https://github.com/intel-isl/MiDaS). Monocular depth estimation is a method used to compute depth from a singe RGB image. +> +> For more details, see the paper Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer (https://arxiv.org/abs/1907.01341v3). +> **Example** +> +> ```python +> from ppgan.apps import MiDaSPredictor +> # if set output, will write depth pfm and png file in output/MiDaS +> model = MiDaSPredictor() +> prediction = model.run() +> ``` +> +> Color display of the depth image: +> +> ```python +> import numpy as np +> import PIL.Image as Image +> import matplotlib as mpl +> import matplotlib.cm as cm +> +> vmax = np.percentile(prediction, 95) +> normalizer = mpl.colors.Normalize(vmin=prediction.min(), vmax=vmax) +> mapper = cm.ScalarMappable(norm=normalizer, cmap='magma') +> colormapped_im = (mapper.to_rgba(prediction)[:, :, :3] * 255).astype(np.uint8) +> im = Image.fromarray(colormapped_im) +> im.save('test_disp.jpeg') +> ``` +> +> **Parameters:** +> +> > - output (str): path of the output, if it is None, no pfm and png depth image will be saved. +> > - weight_path (str): path of the model, default: None, pre-trained integral model will then be automatically downloaded. +> **Return Value:** +> +> > - prediction (numpy.ndarray): return the prediction. +> > - pfm_f (str): return the save path of pfm files if the output path is set. +> > - png_f (str): return the save path of png files if the output path is set. + +## ppgan.apps.Wav2LipPredictor + +```python +ppgan.apps.Wav2LipPredictor(face=None, ausio_seq=None, outfile=None) +``` + +> Build the instance for the Wav2Lip model, which is used for lip generation, i.e., achieving the synchronization of lip movements on a talking face video and the voice from an input audio. +> +> For more details, see the paper, A Lip Sync Expert Is All You Need for Speech to Lip Generation In the Wild (http://arxiv.org/abs/2008.10010). +> +> **Example** +> +> ``` +> from ppgan.apps import Wav2LipPredictor +> import ppgan +> predictor = Wav2LipPredictor() +> predictor.run('/home/aistudio/先烈.jpeg', '/home/aistudio/pp_guangquan_zhenzhu46s.mp4','wav2lip') +> ``` +> **Parameters:** +> - face (str): path of images or videos containing human face. +> - audio_seq (str): path of the input audio, any processable format in ffmpeg is supported, including `.wav`, `.mp3`, `.m4a` etc. +> - outfile (str): path of the output video file. +>**Return Value** +> +>> None diff --git a/docs/en_US/industrial_solution/photo_color_en.md b/docs/en_US/industrial_solution/photo_color_en.md index 16ef14b..83c58b7 100644 --- a/docs/en_US/industrial_solution/photo_color_en.md +++ b/docs/en_US/industrial_solution/photo_color_en.md @@ -1,9 +1,9 @@ # Image Colorization -PaddleGAN provides [DeOldify](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsdeoldifypredictor) model for image colorization. +PaddleGAN provides [DeOldify](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsdeoldifypredictor) model for image colorization. ## DeOldifyPredictor -[DeOldify](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsdeoldifypredictor) generates the adversarial network with a self-attentive mechanism. The generator is a U-NET structured network with better effects in image/video coloring. +[DeOldify](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsdeoldifypredictor) generates the adversarial network with a self-attentive mechanism. The generator is a U-NET structured network with better effects in image/video coloring.
@@ -41,4 +41,3 @@ deoldify.run("/home/aistudio/先烈.jpg") #原图片所在路径 **1. [Old Beijing City Video Restoration](https://aistudio.baidu.com/aistudio/projectdetail/1161285)** **2. [PaddleGAN ❤️ 520 Edition](https://aistudio.baidu.com/aistudio/projectdetail/1956943?channelType=0&channel=0)** - diff --git a/docs/zh_CN/industrial_solution/photo_color_cn.md b/docs/zh_CN/industrial_solution/photo_color_cn.md index db8872c..5427b80 100644 --- a/docs/zh_CN/industrial_solution/photo_color_cn.md +++ b/docs/zh_CN/industrial_solution/photo_color_cn.md @@ -1,9 +1,9 @@ # 图片上色 -针对图片的上色,PaddleGAN提供了[DeOldify](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsdeoldifypredictor)模型。 +针对图片的上色,PaddleGAN提供了[DeOldify](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsdeoldifypredictor)模型。 ## DeOldifyPredictor -[DeOldify](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsdeoldifypredictor)采用自注意力机制的生成对抗网络,生成器是一个U-NET结构的网络。在图像/视频的上色方面有着较好的效果。 +[DeOldify](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsdeoldifypredictor)采用自注意力机制的生成对抗网络,生成器是一个U-NET结构的网络。在图像/视频的上色方面有着较好的效果。
diff --git a/docs/zh_CN/industrial_solution/photo_sr_cn.md b/docs/zh_CN/industrial_solution/photo_sr_cn.md index 18895bb..f87010a 100644 --- a/docs/zh_CN/industrial_solution/photo_sr_cn.md +++ b/docs/zh_CN/industrial_solution/photo_sr_cn.md @@ -5,7 +5,7 @@ [完整模型教程](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/tutorials/single_image_super_resolution.md) -[RealSR](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsrealsrpredictor)模型通过估计各种模糊内核以及实际噪声分布,为现实世界的图像设计一种新颖的真实图片降采样框架。基于该降采样框架,可以获取与真实世界图像共享同一域的低分辨率图像。并且提出了一个旨在提高感知度的真实世界超分辨率模型。对合成噪声数据和真实世界图像进行的大量实验表明,该模型能够有效降低了噪声并提高了视觉质量。 +[RealSR](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsrealsrpredictor)模型通过估计各种模糊内核以及实际噪声分布,为现实世界的图像设计一种新颖的真实图片降采样框架。基于该降采样框架,可以获取与真实世界图像共享同一域的低分辨率图像。并且提出了一个旨在提高感知度的真实世界超分辨率模型。对合成噪声数据和真实世界图像进行的大量实验表明,该模型能够有效降低了噪声并提高了视觉质量。
@@ -35,7 +35,7 @@ deep_remaster.run("docs/imgs/先烈.jpg") #原图片所在路径 --process_order DeepRemaster \ #对原视频处理的顺序 --output output_dir #成品视频所在的路径 ``` -## ESRGAN +## ESRGAN [完整模型教程](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/tutorials/single_image_super_resolution.md) @@ -46,7 +46,7 @@ deep_remaster.run("docs/imgs/先烈.jpg") #原图片所在路径 | esrgan_psnr_x4 | DIV2K | [esrgan_psnr_x4](https://paddlegan.bj.bcebos.com/models/esrgan_psnr_x4.pdparams) | esrgan_x4 | DIV2K | [esrgan_x4](https://paddlegan.bj.bcebos.com/models/esrgan_x4.pdparams) -## LESRCNN +## LESRCNN [完整模型教程](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/tutorials/single_image_super_resolution.md) diff --git a/docs/zh_CN/industrial_solution/video_color_cn.md b/docs/zh_CN/industrial_solution/video_color_cn.md index ea90043..09e1c6b 100644 --- a/docs/zh_CN/industrial_solution/video_color_cn.md +++ b/docs/zh_CN/industrial_solution/video_color_cn.md @@ -1,9 +1,9 @@ # 视频上色 -针对视频上色,PaddleGAN提供两种上色模型:[DeOldify](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsdeoldifypredictor)与[DeepRemaster](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsdeepremasterpredictor)。 +针对视频上色,PaddleGAN提供两种上色模型:[DeOldify](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsdeoldifypredictor)与[DeepRemaster](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsdeepremasterpredictor)。 ## DeOldifyPredictor -[DeOldify](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsdeoldifypredictor)采用自注意力机制的生成对抗网络,生成器是一个U-NET结构的网络。在图像/视频的上色方面有着较好的效果。 +[DeOldify](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsdeoldifypredictor)采用自注意力机制的生成对抗网络,生成器是一个U-NET结构的网络。在图像/视频的上色方面有着较好的效果。
@@ -38,7 +38,7 @@ deoldify.run("/home/aistudio/Peking_input360p_clip6_5s.mp4") #原视频所在路 ## DeepRemasterPredictor -[DeepRemaster](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsdeepremasterpredictor) 模型目前只能用于对视频上色,基于时空卷积神经网络和自注意力机制。并且能够根据输入的任意数量的参考帧对视频中的每一帧图片进行上色。 +[DeepRemaster](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsdeepremasterpredictor) 模型目前只能用于对视频上色,基于时空卷积神经网络和自注意力机制。并且能够根据输入的任意数量的参考帧对视频中的每一帧图片进行上色。 ![](../../imgs/remaster_network.png)
diff --git a/docs/zh_CN/industrial_solution/video_frame_cn.md b/docs/zh_CN/industrial_solution/video_frame_cn.md index de13585..a31e9d2 100644 --- a/docs/zh_CN/industrial_solution/video_frame_cn.md +++ b/docs/zh_CN/industrial_solution/video_frame_cn.md @@ -1,10 +1,10 @@ # 视频补帧 -针对老视频的流畅度提升,PaddleGAN提供了[DAIN](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsdainpredictor)模型接口。 +针对老视频的流畅度提升,PaddleGAN提供了[DAIN](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsdainpredictor)模型接口。 ## DAIN -[DAIN](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsdainpredictor)模型通过探索深度的信息来显式检测遮挡。并且开发了一个深度感知的流投影层来合成中间流。在视频补帧方面有较好的效果。 +[DAIN](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsdainpredictor)模型通过探索深度的信息来显式检测遮挡。并且开发了一个深度感知的流投影层来合成中间流。在视频补帧方面有较好的效果。
@@ -26,7 +26,7 @@ ppgan.apps.DAINPredictor( - `remove_duplicates (bool,可选的)`: 是否删除重复帧,默认值:`False`. ### 使用方式 -**1. API预测** +**1. API预测** 除了定义输入视频路径外,此接口还需定义time_step,同时,目前API预测方式只支持在静态图下运行,需加上启动静态图命令,后续会支持动态图,敬请期待~ diff --git a/docs/zh_CN/industrial_solution/video_sr_cn.md b/docs/zh_CN/industrial_solution/video_sr_cn.md index 0ea7641..0526fe0 100644 --- a/docs/zh_CN/industrial_solution/video_sr_cn.md +++ b/docs/zh_CN/industrial_solution/video_sr_cn.md @@ -6,7 +6,7 @@ [完整模型教程](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/tutorials/single_image_super_resolution.md) -[RealSR](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsrealsrpredictor)模型通过估计各种模糊内核以及实际噪声分布,为现实世界的图像设计一种新颖的真实图片降采样框架。基于该降采样框架,可以获取与真实世界图像共享同一域的低分辨率图像。并且提出了一个旨在提高感知度的真实世界超分辨率模型。对合成噪声数据和真实世界图像进行的大量实验表明,该模型能够有效降低了噪声并提高了视觉质量。 +[RealSR](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsrealsrpredictor)模型通过估计各种模糊内核以及实际噪声分布,为现实世界的图像设计一种新颖的真实图片降采样框架。基于该降采样框架,可以获取与真实世界图像共享同一域的低分辨率图像。并且提出了一个旨在提高感知度的真实世界超分辨率模型。对合成噪声数据和真实世界图像进行的大量实验表明,该模型能够有效降低了噪声并提高了视觉质量。
@@ -44,9 +44,9 @@ deep_remaster.run("/home/aistudio/Peking_input360p_clip6_5s.mp4") #原视频所 [完整模型教程](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/tutorials/video_super_resolution.md) -[EDVR](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsedvrpredictor)模型提出了一个新颖的视频具有增强可变形卷积的还原框架:第一,为了处理大动作而设计的一个金字塔,级联和可变形(PCD)对齐模块,使用可变形卷积以从粗到精的方式在特征级别完成对齐;第二,提出时空注意力机制(TSA)融合模块,在时间和空间上都融合了注意机制,用以增强复原的功能。 +[EDVR](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsedvrpredictor)模型提出了一个新颖的视频具有增强可变形卷积的还原框架:第一,为了处理大动作而设计的一个金字塔,级联和可变形(PCD)对齐模块,使用可变形卷积以从粗到精的方式在特征级别完成对齐;第二,提出时空注意力机制(TSA)融合模块,在时间和空间上都融合了注意机制,用以增强复原的功能。 -[EDVR](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsedvrpredictor)模型是一个基于连续帧的超分模型,能够有效利用帧间的信息,速度比[RealSR](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md#ppganappsrealsrpredictor)模型快。 +[EDVR](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsedvrpredictor)模型是一个基于连续帧的超分模型,能够有效利用帧间的信息,速度比[RealSR](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md#ppganappsrealsrpredictor)模型快。
@@ -63,7 +63,7 @@ ppgan.apps.EDVRPredictor(output='output', weight_path=None) ### 使用方式 -**1. API预测** +**1. API预测** 目前API预测方式只支持在静态图下运行,需加上启动静态图命令,后续会支持动态图,敬请期待~ diff --git a/paddlegan-wechaty-demo/README.md b/paddlegan-wechaty-demo/README.md index 4d3e15f..821052f 100644 --- a/paddlegan-wechaty-demo/README.md +++ b/paddlegan-wechaty-demo/README.md @@ -40,7 +40,7 @@ 3. 安装项目所需的PaddleGAN的module - 此demo以`first order motion`为示例,其他module根据项目所需安装,更多的模型请查阅[PaddleGAN模型API接口说明](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/zh_CN/apis/apps.md)。 + 此demo以`first order motion`为示例,其他module根据项目所需安装,更多的模型请查阅[PaddleGAN模型API接口说明](https://github.com/PaddlePaddle/PaddleGAN/blob/develop/docs/en_US/apis/apps.md)。 4. Set token for your bot @@ -82,9 +82,9 @@ async def on_message(msg: Message): Message Handler for the Bot """ ### PaddleGAN fom - + global fom, source, driving - + if isinstance(msg.text(), str) and len(msg.text()) > 0 \ and msg._payload.type == MessageType.MESSAGE_TYPE_TEXT \ and "fom" in msg.text(): @@ -95,21 +95,21 @@ async def on_message(msg: Message): if fom and msg._payload.type == MessageType.MESSAGE_TYPE_IMAGE: fileBox = await msg.to_file_box() await fileBox.to_file("test_fom/source.jpg", True) - + bot_response = u"好嘞, 收到图片" await msg.say(bot_response) source = True - + if fom and msg._payload.type == MessageType.MESSAGE_TYPE_VIDEO: fileBox = await msg.to_file_box() await fileBox.to_file("test_fom/driving.mp4", True) - + bot_response = u"好嘞, 收到驱动视频" await msg.say(bot_response) driving = True - + if source and driving: bot_response = u"都收到啦,稍等一下嘿嘿" await msg.say(bot_response) @@ -120,7 +120,7 @@ async def on_message(msg: Message): file_box = FileBox.from_file("test_fom/result.mp4") await msg.say(file_box) - ### + ### ``` @@ -128,4 +128,3 @@ async def on_message(msg: Message):
- -- GitLab