diff --git a/modules/audio/keyword_spotting/kwmlp_speech_commands/README.md b/modules/audio/keyword_spotting/kwmlp_speech_commands/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3e3357a09341435e312e2c12314e1e85b30cff53 --- /dev/null +++ b/modules/audio/keyword_spotting/kwmlp_speech_commands/README.md @@ -0,0 +1,98 @@ +# kwmlp_speech_commands + +|模型名称|kwmlp_speech_commands| +| :--- | :---: | +|类别|语音-语言识别| +|网络|Keyword-MLP| +|数据集|Google Speech Commands V2| +|是否支持Fine-tuning|否| +|模型大小|1.6MB| +|最新更新日期|2022-01-04| +|数据指标|ACC 97.56%| + +## 一、模型基本信息 + +### 模型介绍 + +kwmlp_speech_commands采用了 [Keyword-MLP](https://arxiv.org/pdf/2110.07749v1.pdf) 的轻量级模型结构,并在 [Google Speech Commands V2](https://arxiv.org/abs/1804.03209) 数据集上进行了预训练,在其测试集的测试结果为 ACC 97.56%。 + +

+
+

+ + +更多详情请参考 +- [Speech Commands: A Dataset for Limited-Vocabulary Speech Recognition](https://arxiv.org/abs/1804.03209) +- [ATTENTION-FREE KEYWORD SPOTTING](https://arxiv.org/pdf/2110.07749v1.pdf) +- [Keyword-MLP](https://github.com/AI-Research-BD/Keyword-MLP) + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install kwmlp_speech_commands + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + ```python + import paddlehub as hub + + model = hub.Module( + name='kwmlp_speech_commands', + version='1.0.0') + + # 通过下列链接可下载示例音频 + # https://paddlehub.bj.bcebos.com/paddlehub_dev/go.wav + + # Keyword spotting + score, label = model.keyword_recognize('no.wav') + print(score, label) + # [0.89498246] no + score, label = model.keyword_recognize('go.wav') + print(score, label) + # [0.8997176] go + score, label = model.keyword_recognize('one.wav') + print(score, label) + # [0.88598305] one + ``` + +- ### 2、API + - ```python + def keyword_recognize( + wav: os.PathLike, + ) + ``` + - 检测音频中包含的关键词。 + + - **参数** + + - `wav`:输入的包含关键词的音频文件,格式为`*.wav`。 + + - **返回** + + - 输出结果的得分和对应的关键词标签。 + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install kwmlp_speech_commands + ``` diff --git a/modules/audio/keyword_spotting/kwmlp_speech_commands/__init__.py b/modules/audio/keyword_spotting/kwmlp_speech_commands/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..185a92b8d94d3426d616c0624f0f2ee04339349e --- /dev/null +++ b/modules/audio/keyword_spotting/kwmlp_speech_commands/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/modules/audio/keyword_spotting/kwmlp_speech_commands/feature.py b/modules/audio/keyword_spotting/kwmlp_speech_commands/feature.py new file mode 100644 index 0000000000000000000000000000000000000000..900a2eab26e4414b487d6d7858381ee302a107e8 --- /dev/null +++ b/modules/audio/keyword_spotting/kwmlp_speech_commands/feature.py @@ -0,0 +1,59 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math + +import numpy as np +import paddle +import paddleaudio + + +def create_dct(n_mfcc: int, n_mels: int, norm: str = 'ortho'): + n = paddle.arange(float(n_mels)) + k = paddle.arange(float(n_mfcc)).unsqueeze(1) + dct = paddle.cos(math.pi / float(n_mels) * (n + 0.5) * k) # size (n_mfcc, n_mels) + if norm is None: + dct *= 2.0 + else: + assert norm == "ortho" + dct[0] *= 1.0 / math.sqrt(2.0) + dct *= math.sqrt(2.0 / float(n_mels)) + return dct.t() + + +def compute_mfcc( + x: paddle.Tensor, + sr: int = 16000, + n_mels: int = 40, + n_fft: int = 480, + win_length: int = 480, + hop_length: int = 160, + f_min: float = 0.0, + f_max: float = None, + center: bool = False, + top_db: float = 80.0, + norm: str = 'ortho', +): + fbank = paddleaudio.features.spectrum.MelSpectrogram( + sr=sr, + n_mels=n_mels, + n_fft=n_fft, + win_length=win_length, + hop_length=hop_length, + f_min=0.0, + f_max=f_max, + center=center)(x) # waveforms batch ~ (B, T) + log_fbank = paddleaudio.features.spectrum.power_to_db(fbank, top_db=top_db) + dct_matrix = create_dct(n_mfcc=n_mels, n_mels=n_mels, norm=norm) + mfcc = paddle.matmul(log_fbank.transpose((0, 2, 1)), dct_matrix).transpose((0, 2, 1)) # (B, n_mels, L) + return mfcc diff --git a/modules/audio/keyword_spotting/kwmlp_speech_commands/kwmlp.py b/modules/audio/keyword_spotting/kwmlp_speech_commands/kwmlp.py new file mode 100644 index 0000000000000000000000000000000000000000..df8c37e6fb14d1f5c43b0080410d3288406cfa77 --- /dev/null +++ b/modules/audio/keyword_spotting/kwmlp_speech_commands/kwmlp.py @@ -0,0 +1,143 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +class Residual(nn.Layer): + def __init__(self, fn): + super().__init__() + self.fn = fn + + def forward(self, x): + return self.fn(x) + x + + +class PreNorm(nn.Layer): + def __init__(self, dim, fn): + super().__init__() + self.fn = fn + self.norm = nn.LayerNorm(dim) + + def forward(self, x, **kwargs): + x = self.norm(x) + return self.fn(x, **kwargs) + + +class PostNorm(nn.Layer): + def __init__(self, dim, fn): + super().__init__() + self.norm = nn.LayerNorm(dim) + self.fn = fn + + def forward(self, x, **kwargs): + return self.norm(self.fn(x, **kwargs)) + + +class SpatialGatingUnit(nn.Layer): + def __init__(self, dim, dim_seq, act=nn.Identity(), init_eps=1e-3): + super().__init__() + dim_out = dim // 2 + + self.norm = nn.LayerNorm(dim_out) + self.proj = nn.Conv1D(dim_seq, dim_seq, 1) + + self.act = act + + init_eps /= dim_seq + + def forward(self, x): + res, gate = x.split(2, axis=-1) + gate = self.norm(gate) + + weight, bias = self.proj.weight, self.proj.bias + gate = F.conv1d(gate, weight, bias) + + return self.act(gate) * res + + +class gMLPBlock(nn.Layer): + def __init__(self, *, dim, dim_ff, seq_len, act=nn.Identity()): + super().__init__() + self.proj_in = nn.Sequential(nn.Linear(dim, dim_ff), nn.GELU()) + + self.sgu = SpatialGatingUnit(dim_ff, seq_len, act) + self.proj_out = nn.Linear(dim_ff // 2, dim) + + def forward(self, x): + x = self.proj_in(x) + x = self.sgu(x) + x = self.proj_out(x) + return x + + +class Rearrange(nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, x): + x = x.transpose([0, 1, 3, 2]).squeeze(1) + return x + + +class Reduce(nn.Layer): + def __init__(self, axis=1): + super().__init__() + self.axis = axis + + def forward(self, x): + x = x.mean(axis=self.axis, keepdim=False) + return x + + +class KW_MLP(nn.Layer): + """Keyword-MLP.""" + + def __init__(self, + input_res=[40, 98], + patch_res=[40, 1], + num_classes=35, + dim=64, + depth=12, + ff_mult=4, + channels=1, + prob_survival=0.9, + pre_norm=False, + **kwargs): + super().__init__() + image_height, image_width = input_res + patch_height, patch_width = patch_res + assert (image_height % patch_height) == 0 and ( + image_width % patch_width) == 0, 'image height and width must be divisible by patch size' + num_patches = (image_height // patch_height) * (image_width // patch_width) + + P_Norm = PreNorm if pre_norm else PostNorm + + dim_ff = dim * ff_mult + + self.to_patch_embed = nn.Sequential(Rearrange(), nn.Linear(channels * patch_height * patch_width, dim)) + + self.prob_survival = prob_survival + + self.layers = nn.LayerList( + [Residual(P_Norm(dim, gMLPBlock(dim=dim, dim_ff=dim_ff, seq_len=num_patches))) for i in range(depth)]) + + self.to_logits = nn.Sequential(nn.LayerNorm(dim), Reduce(axis=1), nn.Linear(dim, num_classes)) + + def forward(self, x): + x = self.to_patch_embed(x) + layers = self.layers + x = nn.Sequential(*layers)(x) + return self.to_logits(x) diff --git a/modules/audio/keyword_spotting/kwmlp_speech_commands/module.py b/modules/audio/keyword_spotting/kwmlp_speech_commands/module.py new file mode 100644 index 0000000000000000000000000000000000000000..34342de360f2927236429baaa41789993038bd5a --- /dev/null +++ b/modules/audio/keyword_spotting/kwmlp_speech_commands/module.py @@ -0,0 +1,86 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import numpy as np +import paddle +import paddleaudio + +from .feature import compute_mfcc +from .kwmlp import KW_MLP +from paddlehub.module.module import moduleinfo +from paddlehub.utils.log import logger + + +@moduleinfo( + name="kwmlp_speech_commands", + version="1.0.0", + summary="", + author="paddlepaddle", + author_email="", + type="audio/language_identification") +class KWS(paddle.nn.Layer): + def __init__(self): + super(KWS, self).__init__() + ckpt_path = os.path.join(self.directory, 'assets', 'model.pdparams') + label_path = os.path.join(self.directory, 'assets', 'label.txt') + + self.label_list = [] + with open(label_path, 'r') as f: + for l in f: + self.label_list.append(l.strip()) + + self.sr = 16000 + model_conf = { + 'input_res': [40, 98], + 'patch_res': [40, 1], + 'num_classes': 35, + 'channels': 1, + 'dim': 64, + 'depth': 12, + 'pre_norm': False, + 'prob_survival': 0.9, + } + self.model = KW_MLP(**model_conf) + self.model.set_state_dict(paddle.load(ckpt_path)) + self.model.eval() + + def load_audio(self, wav): + wav = os.path.abspath(os.path.expanduser(wav)) + assert os.path.isfile(wav), 'Please check wav file: {}'.format(wav) + waveform, _ = paddleaudio.load(wav, sr=self.sr, mono=True, normal=False) + return waveform + + def keyword_recognize(self, wav): + waveform = self.load_audio(wav) + + # fix_length to 1s + if len(waveform) > self.sr: + waveform = waveform[:self.sr] + else: + waveform = np.pad(waveform, (0, self.sr - len(waveform))) + + logits = self(paddle.to_tensor(waveform)).reshape([-1]) + probs = paddle.nn.functional.softmax(logits) + idx = paddle.argmax(probs) + return probs[idx].numpy(), self.label_list[idx] + + def forward(self, x): + if len(x.shape) == 1: # x: waveform tensors with (B, T) shape + x = x.unsqueeze(0) + + mfcc = compute_mfcc(x).unsqueeze(1) # (B, C, n_mels, L) + logits = self.model(mfcc).squeeze(1) + + return logits diff --git a/modules/audio/keyword_spotting/kwmlp_speech_commands/requirements.txt b/modules/audio/keyword_spotting/kwmlp_speech_commands/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..defe617fa36bc5ab7b72438034c785ee2b3ac3c9 --- /dev/null +++ b/modules/audio/keyword_spotting/kwmlp_speech_commands/requirements.txt @@ -0,0 +1 @@ +paddleaudio==0.1.0 diff --git a/modules/audio/language_identification/ecapa_tdnn_common_language/README.md b/modules/audio/language_identification/ecapa_tdnn_common_language/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f648202e4c97c1a1707bb1f0a0d98949735f047d --- /dev/null +++ b/modules/audio/language_identification/ecapa_tdnn_common_language/README.md @@ -0,0 +1,100 @@ +# ecapa_tdnn_common_language + +|模型名称|ecapa_tdnn_common_language| +| :--- | :---: | +|类别|语音-语言识别| +|网络|ECAPA-TDNN| +|数据集|CommonLanguage| +|是否支持Fine-tuning|否| +|模型大小|79MB| +|最新更新日期|2021-12-30| +|数据指标|ACC 84.9%| + +## 一、模型基本信息 + +### 模型介绍 + +ecapa_tdnn_common_language采用了[ECAPA-TDNN](https://arxiv.org/abs/2005.07143)的模型结构,并在[CommonLanguage](https://zenodo.org/record/5036977/)数据集上进行了预训练,在其测试集的测试结果为 ACC 84.9%。 + +

+
+

+ + +更多详情请参考 +- [CommonLanguage](https://zenodo.org/record/5036977#.Yc19b5Mzb0o) +- [ECAPA-TDNN: Emphasized Channel Attention, Propagation and Aggregation in TDNN Based Speaker Verification](https://arxiv.org/pdf/2005.07143.pdf) +- [The SpeechBrain Toolkit](https://github.com/speechbrain/speechbrain) + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install ecapa_tdnn_common_language + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + ```python + import paddlehub as hub + + model = hub.Module( + name='ecapa_tdnn_common_language', + version='1.0.0') + + # 通过下列链接可下载示例音频 + # https://paddlehub.bj.bcebos.com/paddlehub_dev/zh.wav + # https://paddlehub.bj.bcebos.com/paddlehub_dev/en.wav + # https://paddlehub.bj.bcebos.com/paddlehub_dev/it.wav + + # Language Identification + score, label = model.speaker_verify('zh.wav') + print(score, label) + # array([0.6214552], dtype=float32), 'Chinese_China' + score, label = model.speaker_verify('en.wav') + print(score, label) + # array([0.37193954], dtype=float32), 'English' + score, label = model.speaker_verify('it.wav') + print(score, label) + # array([0.46913534], dtype=float32), 'Italian' + ``` + +- ### 2、API + - ```python + def language_identify( + wav: os.PathLike, + ) + ``` + - 判断输入人声音频的语言类别。 + + - **参数** + + - `wav`:输入的说话人的音频文件,格式为`*.wav`。 + + - **返回** + + - 输出结果的得分和对应的语言类别。 + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install ecapa_tdnn_common_language + ``` diff --git a/modules/audio/language_identification/ecapa_tdnn_common_language/__init__.py b/modules/audio/language_identification/ecapa_tdnn_common_language/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..185a92b8d94d3426d616c0624f0f2ee04339349e --- /dev/null +++ b/modules/audio/language_identification/ecapa_tdnn_common_language/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/modules/audio/language_identification/ecapa_tdnn_common_language/ecapa_tdnn.py b/modules/audio/language_identification/ecapa_tdnn_common_language/ecapa_tdnn.py new file mode 100644 index 0000000000000000000000000000000000000000..950a9df7dd465abf56b30b5594e9b16adb49e573 --- /dev/null +++ b/modules/audio/language_identification/ecapa_tdnn_common_language/ecapa_tdnn.py @@ -0,0 +1,406 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def length_to_mask(length, max_len=None, dtype=None): + assert len(length.shape) == 1 + + if max_len is None: + max_len = length.max().astype('int').item() # using arange to generate mask + mask = paddle.arange(max_len, dtype=length.dtype).expand((len(length), max_len)) < length.unsqueeze(1) + + if dtype is None: + dtype = length.dtype + + mask = paddle.to_tensor(mask, dtype=dtype) + return mask + + +class Conv1d(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding="same", + dilation=1, + groups=1, + bias=True, + padding_mode="reflect", + ): + super(Conv1d, self).__init__() + + self.kernel_size = kernel_size + self.stride = stride + self.dilation = dilation + self.padding = padding + self.padding_mode = padding_mode + + self.conv = nn.Conv1D( + in_channels, + out_channels, + self.kernel_size, + stride=self.stride, + padding=0, + dilation=self.dilation, + groups=groups, + bias_attr=bias, + ) + + def forward(self, x): + if self.padding == "same": + x = self._manage_padding(x, self.kernel_size, self.dilation, self.stride) + else: + raise ValueError("Padding must be 'same'. Got {self.padding}") + + return self.conv(x) + + def _manage_padding(self, x, kernel_size: int, dilation: int, stride: int): + L_in = x.shape[-1] # Detecting input shape + padding = self._get_padding_elem(L_in, stride, kernel_size, dilation) # Time padding + x = F.pad(x, padding, mode=self.padding_mode, data_format="NCL") # Applying padding + return x + + def _get_padding_elem(self, L_in: int, stride: int, kernel_size: int, dilation: int): + if stride > 1: + n_steps = math.ceil(((L_in - kernel_size * dilation) / stride) + 1) + L_out = stride * (n_steps - 1) + kernel_size * dilation + padding = [kernel_size // 2, kernel_size // 2] + else: + L_out = (L_in - dilation * (kernel_size - 1) - 1) // stride + 1 + + padding = [(L_in - L_out) // 2, (L_in - L_out) // 2] + + return padding + + +class BatchNorm1d(nn.Layer): + def __init__( + self, + input_size, + eps=1e-05, + momentum=0.9, + weight_attr=None, + bias_attr=None, + data_format='NCL', + use_global_stats=None, + ): + super(BatchNorm1d, self).__init__() + + self.norm = nn.BatchNorm1D( + input_size, + epsilon=eps, + momentum=momentum, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format, + use_global_stats=use_global_stats, + ) + + def forward(self, x): + x_n = self.norm(x) + return x_n + + +class TDNNBlock(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + dilation, + activation=nn.ReLU, + ): + super(TDNNBlock, self).__init__() + self.conv = Conv1d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + dilation=dilation, + ) + self.activation = activation() + self.norm = BatchNorm1d(input_size=out_channels) + + def forward(self, x): + return self.norm(self.activation(self.conv(x))) + + +class Res2NetBlock(nn.Layer): + def __init__(self, in_channels, out_channels, scale=8, dilation=1): + super(Res2NetBlock, self).__init__() + assert in_channels % scale == 0 + assert out_channels % scale == 0 + + in_channel = in_channels // scale + hidden_channel = out_channels // scale + + self.blocks = nn.LayerList( + [TDNNBlock(in_channel, hidden_channel, kernel_size=3, dilation=dilation) for i in range(scale - 1)]) + self.scale = scale + + def forward(self, x): + y = [] + for i, x_i in enumerate(paddle.chunk(x, self.scale, axis=1)): + if i == 0: + y_i = x_i + elif i == 1: + y_i = self.blocks[i - 1](x_i) + else: + y_i = self.blocks[i - 1](x_i + y_i) + y.append(y_i) + y = paddle.concat(y, axis=1) + return y + + +class SEBlock(nn.Layer): + def __init__(self, in_channels, se_channels, out_channels): + super(SEBlock, self).__init__() + + self.conv1 = Conv1d(in_channels=in_channels, out_channels=se_channels, kernel_size=1) + self.relu = paddle.nn.ReLU() + self.conv2 = Conv1d(in_channels=se_channels, out_channels=out_channels, kernel_size=1) + self.sigmoid = paddle.nn.Sigmoid() + + def forward(self, x, lengths=None): + L = x.shape[-1] + if lengths is not None: + mask = length_to_mask(lengths * L, max_len=L) + mask = mask.unsqueeze(1) + total = mask.sum(axis=2, keepdim=True) + s = (x * mask).sum(axis=2, keepdim=True) / total + else: + s = x.mean(axis=2, keepdim=True) + + s = self.relu(self.conv1(s)) + s = self.sigmoid(self.conv2(s)) + + return s * x + + +class AttentiveStatisticsPooling(nn.Layer): + def __init__(self, channels, attention_channels=128, global_context=True): + super().__init__() + + self.eps = 1e-12 + self.global_context = global_context + if global_context: + self.tdnn = TDNNBlock(channels * 3, attention_channels, 1, 1) + else: + self.tdnn = TDNNBlock(channels, attention_channels, 1, 1) + self.tanh = nn.Tanh() + self.conv = Conv1d(in_channels=attention_channels, out_channels=channels, kernel_size=1) + + def forward(self, x, lengths=None): + C, L = x.shape[1], x.shape[2] # KP: (N, C, L) + + def _compute_statistics(x, m, axis=2, eps=self.eps): + mean = (m * x).sum(axis) + std = paddle.sqrt((m * (x - mean.unsqueeze(axis)).pow(2)).sum(axis).clip(eps)) + return mean, std + + if lengths is None: + lengths = paddle.ones([x.shape[0]]) + + # Make binary mask of shape [N, 1, L] + mask = length_to_mask(lengths * L, max_len=L) + mask = mask.unsqueeze(1) + + # Expand the temporal context of the pooling layer by allowing the + # self-attention to look at global properties of the utterance. + if self.global_context: + total = mask.sum(axis=2, keepdim=True).astype('float32') + mean, std = _compute_statistics(x, mask / total) + mean = mean.unsqueeze(2).tile((1, 1, L)) + std = std.unsqueeze(2).tile((1, 1, L)) + attn = paddle.concat([x, mean, std], axis=1) + else: + attn = x + + # Apply layers + attn = self.conv(self.tanh(self.tdnn(attn))) + + # Filter out zero-paddings + attn = paddle.where(mask.tile((1, C, 1)) == 0, paddle.ones_like(attn) * float("-inf"), attn) + + attn = F.softmax(attn, axis=2) + mean, std = _compute_statistics(x, attn) + + # Append mean and std of the batch + pooled_stats = paddle.concat((mean, std), axis=1) + pooled_stats = pooled_stats.unsqueeze(2) + + return pooled_stats + + +class SERes2NetBlock(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + res2net_scale=8, + se_channels=128, + kernel_size=1, + dilation=1, + activation=nn.ReLU, + ): + super(SERes2NetBlock, self).__init__() + self.out_channels = out_channels + self.tdnn1 = TDNNBlock( + in_channels, + out_channels, + kernel_size=1, + dilation=1, + activation=activation, + ) + self.res2net_block = Res2NetBlock(out_channels, out_channels, res2net_scale, dilation) + self.tdnn2 = TDNNBlock( + out_channels, + out_channels, + kernel_size=1, + dilation=1, + activation=activation, + ) + self.se_block = SEBlock(out_channels, se_channels, out_channels) + + self.shortcut = None + if in_channels != out_channels: + self.shortcut = Conv1d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + ) + + def forward(self, x, lengths=None): + residual = x + if self.shortcut: + residual = self.shortcut(x) + + x = self.tdnn1(x) + x = self.res2net_block(x) + x = self.tdnn2(x) + x = self.se_block(x, lengths) + + return x + residual + + +class ECAPA_TDNN(nn.Layer): + def __init__( + self, + input_size, + lin_neurons=192, + activation=nn.ReLU, + channels=[512, 512, 512, 512, 1536], + kernel_sizes=[5, 3, 3, 3, 1], + dilations=[1, 2, 3, 4, 1], + attention_channels=128, + res2net_scale=8, + se_channels=128, + global_context=True, + ): + + super(ECAPA_TDNN, self).__init__() + assert len(channels) == len(kernel_sizes) + assert len(channels) == len(dilations) + self.channels = channels + self.blocks = nn.LayerList() + self.emb_size = lin_neurons + + # The initial TDNN layer + self.blocks.append(TDNNBlock( + input_size, + channels[0], + kernel_sizes[0], + dilations[0], + activation, + )) + + # SE-Res2Net layers + for i in range(1, len(channels) - 1): + self.blocks.append( + SERes2NetBlock( + channels[i - 1], + channels[i], + res2net_scale=res2net_scale, + se_channels=se_channels, + kernel_size=kernel_sizes[i], + dilation=dilations[i], + activation=activation, + )) + + # Multi-layer feature aggregation + self.mfa = TDNNBlock( + channels[-1], + channels[-1], + kernel_sizes[-1], + dilations[-1], + activation, + ) + + # Attentive Statistical Pooling + self.asp = AttentiveStatisticsPooling( + channels[-1], + attention_channels=attention_channels, + global_context=global_context, + ) + self.asp_bn = BatchNorm1d(input_size=channels[-1] * 2) + + # Final linear transformation + self.fc = Conv1d( + in_channels=channels[-1] * 2, + out_channels=self.emb_size, + kernel_size=1, + ) + + def forward(self, x, lengths=None): + xl = [] + for layer in self.blocks: + try: + x = layer(x, lengths=lengths) + except TypeError: + x = layer(x) + xl.append(x) + + # Multi-layer feature aggregation + x = paddle.concat(xl[1:], axis=1) + x = self.mfa(x) + + # Attentive Statistical Pooling + x = self.asp(x, lengths=lengths) + x = self.asp_bn(x) + + # Final linear transformation + x = self.fc(x) + + return x + + +class Classifier(nn.Layer): + def __init__(self, backbone, num_class, dtype=paddle.float32): + super(Classifier, self).__init__() + self.backbone = backbone + self.params = nn.ParameterList( + [paddle.create_parameter(shape=[num_class, self.backbone.emb_size], dtype=dtype)]) + + def forward(self, x): + emb = self.backbone(x.transpose([0, 2, 1])).transpose([0, 2, 1]) + logits = F.linear(F.normalize(emb.squeeze(1)), F.normalize(self.params[0]).transpose([1, 0])) + + return logits diff --git a/modules/audio/language_identification/ecapa_tdnn_common_language/feature.py b/modules/audio/language_identification/ecapa_tdnn_common_language/feature.py new file mode 100644 index 0000000000000000000000000000000000000000..09b930ebfd4cd56c9be1bc107f4ca6fc5f948027 --- /dev/null +++ b/modules/audio/language_identification/ecapa_tdnn_common_language/feature.py @@ -0,0 +1,112 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import paddle +import paddleaudio +from paddleaudio.features.spectrum import hz_to_mel +from paddleaudio.features.spectrum import mel_to_hz +from paddleaudio.features.spectrum import power_to_db +from paddleaudio.features.spectrum import Spectrogram +from paddleaudio.features.window import get_window + + +def compute_fbank_matrix(sample_rate: int = 16000, + n_fft: int = 400, + n_mels: int = 80, + f_min: int = 0.0, + f_max: int = 8000.0): + mel = paddle.linspace(hz_to_mel(f_min, htk=True), hz_to_mel(f_max, htk=True), n_mels + 2, dtype=paddle.float32) + hz = mel_to_hz(mel, htk=True) + + band = hz[1:] - hz[:-1] + band = band[:-1] + f_central = hz[1:-1] + + n_stft = n_fft // 2 + 1 + all_freqs = paddle.linspace(0, sample_rate // 2, n_stft) + all_freqs_mat = all_freqs.tile([f_central.shape[0], 1]) + + f_central_mat = f_central.tile([all_freqs_mat.shape[1], 1]).transpose([1, 0]) + band_mat = band.tile([all_freqs_mat.shape[1], 1]).transpose([1, 0]) + + slope = (all_freqs_mat - f_central_mat) / band_mat + left_side = slope + 1.0 + right_side = -slope + 1.0 + + fbank_matrix = paddle.maximum(paddle.zeros_like(left_side), paddle.minimum(left_side, right_side)) + + return fbank_matrix + + +def compute_log_fbank( + x: paddle.Tensor, + sample_rate: int = 16000, + n_fft: int = 400, + hop_length: int = 160, + win_length: int = 400, + n_mels: int = 80, + window: str = 'hamming', + center: bool = True, + pad_mode: str = 'constant', + f_min: float = 0.0, + f_max: float = None, + top_db: float = 80.0, +): + + if f_max is None: + f_max = sample_rate / 2 + + spect = Spectrogram( + n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=window, center=center, pad_mode=pad_mode)(x) + + fbank_matrix = compute_fbank_matrix( + sample_rate=sample_rate, + n_fft=n_fft, + n_mels=n_mels, + f_min=f_min, + f_max=f_max, + ) + fbank = paddle.matmul(fbank_matrix, spect) + log_fbank = power_to_db(fbank, top_db=top_db).transpose([0, 2, 1]) + return log_fbank + + +def compute_stats(x: paddle.Tensor, mean_norm: bool = True, std_norm: bool = False, eps: float = 1e-10): + if mean_norm: + current_mean = paddle.mean(x, axis=0) + else: + current_mean = paddle.to_tensor([0.0]) + + if std_norm: + current_std = paddle.std(x, axis=0) + else: + current_std = paddle.to_tensor([1.0]) + + current_std = paddle.maximum(current_std, eps * paddle.ones_like(current_std)) + + return current_mean, current_std + + +def normalize( + x: paddle.Tensor, + global_mean: paddle.Tensor = None, + global_std: paddle.Tensor = None, +): + + for i in range(x.shape[0]): # (B, ...) + if global_mean is None and global_std is None: + mean, std = compute_stats(x[i]) + x[i] = (x[i] - mean) / std + else: + x[i] = (x[i] - global_mean) / global_std + return x diff --git a/modules/audio/language_identification/ecapa_tdnn_common_language/module.py b/modules/audio/language_identification/ecapa_tdnn_common_language/module.py new file mode 100644 index 0000000000000000000000000000000000000000..1950deaf1b5843c5f69269bb6982691739b0332e --- /dev/null +++ b/modules/audio/language_identification/ecapa_tdnn_common_language/module.py @@ -0,0 +1,85 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import re +from typing import List +from typing import Union + +import numpy as np +import paddle +import paddleaudio + +from .ecapa_tdnn import Classifier +from .ecapa_tdnn import ECAPA_TDNN +from .feature import compute_log_fbank +from .feature import normalize +from paddlehub.module.module import moduleinfo +from paddlehub.utils.log import logger + + +@moduleinfo( + name="ecapa_tdnn_common_language", + version="1.0.0", + summary="", + author="paddlepaddle", + author_email="", + type="audio/language_identification") +class LanguageIdentification(paddle.nn.Layer): + def __init__(self): + super(LanguageIdentification, self).__init__() + ckpt_path = os.path.join(self.directory, 'assets', 'model.pdparams') + label_path = os.path.join(self.directory, 'assets', 'label.txt') + + self.label_list = [] + with open(label_path, 'r') as f: + for l in f: + self.label_list.append(l.strip()) + + self.sr = 16000 + model_conf = { + 'input_size': 80, + 'channels': [1024, 1024, 1024, 1024, 3072], + 'kernel_sizes': [5, 3, 3, 3, 1], + 'dilations': [1, 2, 3, 4, 1], + 'attention_channels': 128, + 'lin_neurons': 192 + } + self.model = Classifier( + backbone=ECAPA_TDNN(**model_conf), + num_class=45, + ) + self.model.set_state_dict(paddle.load(ckpt_path)) + self.model.eval() + + def load_audio(self, wav): + wav = os.path.abspath(os.path.expanduser(wav)) + assert os.path.isfile(wav), 'Please check wav file: {}'.format(wav) + waveform, _ = paddleaudio.load(wav, sr=self.sr, mono=True, normal=False) + return waveform + + def language_identify(self, wav): + waveform = self.load_audio(wav) + logits = self(paddle.to_tensor(waveform)).reshape([-1]) + idx = paddle.argmax(logits) + return logits[idx].numpy(), self.label_list[idx] + + def forward(self, x): + if len(x.shape) == 1: + x = x.unsqueeze(0) + + fbank = compute_log_fbank(x) # x: waveform tensors with (B, T) shape + norm_fbank = normalize(fbank) + logits = self.model(norm_fbank).squeeze(1) + + return logits diff --git a/modules/audio/language_identification/ecapa_tdnn_common_language/requirements.txt b/modules/audio/language_identification/ecapa_tdnn_common_language/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..defe617fa36bc5ab7b72438034c785ee2b3ac3c9 --- /dev/null +++ b/modules/audio/language_identification/ecapa_tdnn_common_language/requirements.txt @@ -0,0 +1 @@ +paddleaudio==0.1.0 diff --git a/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/README.md b/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/README.md new file mode 100644 index 0000000000000000000000000000000000000000..70da7371cc411e535a4b53fd74a46c9a2521a016 --- /dev/null +++ b/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/README.md @@ -0,0 +1,128 @@ +# ecapa_tdnn_voxceleb + +|模型名称|ecapa_tdnn_voxceleb| +| :--- | :---: | +|类别|语音-声纹识别| +|网络|ECAPA-TDNN| +|数据集|VoxCeleb| +|是否支持Fine-tuning|否| +|模型大小|79MB| +|最新更新日期|2021-12-30| +|数据指标|EER 0.69%| + +## 一、模型基本信息 + +### 模型介绍 + +ecapa_tdnn_voxceleb采用了[ECAPA-TDNN](https://arxiv.org/abs/2005.07143)的模型结构,并在[VoxCeleb](http://www.robots.ox.ac.uk/~vgg/data/voxceleb/)数据集上进行了预训练,在VoxCeleb1的声纹识别测试集([veri_test.txt](https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test.txt))上的测试结果为 EER 0.69%,达到了该数据集的SOTA。 + +

+
+

+ + + +更多详情请参考 +- [VoxCeleb: a large-scale speaker identification dataset](https://www.robots.ox.ac.uk/~vgg/publications/2017/Nagrani17/nagrani17.pdf) +- [ECAPA-TDNN: Emphasized Channel Attention, Propagation and Aggregation in TDNN Based Speaker Verification](https://arxiv.org/pdf/2005.07143.pdf) +- [The SpeechBrain Toolkit](https://github.com/speechbrain/speechbrain) + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install ecapa_tdnn_voxceleb + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + ```python + import paddlehub as hub + + model = hub.Module( + name='ecapa_tdnn_voxceleb', + threshold=0.25, + version='1.0.0') + + # 通过下列链接可下载示例音频 + # https://paddlehub.bj.bcebos.com/paddlehub_dev/sv1.wav + # https://paddlehub.bj.bcebos.com/paddlehub_dev/sv2.wav + + # Speaker Embedding + embedding = model.speaker_embedding('sv1.wav') + print(embedding.shape) + # (192,) + + # Speaker Verification + score, pred = model.speaker_verify('sv1.wav', 'sv2.wav') + print(score, pred) + # [0.16354457], [False] + ``` + +- ### 2、API + - ```python + def __init__( + threshold: float, + ) + ``` + - 初始化声纹模型,确定判别阈值。 + + - **参数** + + - `threshold`:设定模型判别声纹相似度的得分阈值,默认为 0.25。 + + - ```python + def speaker_embedding( + wav: os.PathLike, + ) + ``` + - 获取输入音频的声纹特征 + + - **参数** + + - `wav`:输入的说话人的音频文件,格式为`*.wav`。 + + - **返回** + + - 输出纬度为 (192,) 的声纹特征向量。 + + - ```python + def speaker_verify( + wav1: os.PathLike, + wav2: os.PathLike, + ) + ``` + - 对比两段音频,分别计算其声纹特征的相似度得分,并判断是否为同一说话人。 + + - **参数** + + - `wav1`:输入的说话人1的音频文件,格式为`*.wav`。 + - `wav2`:输入的说话人2的音频文件,格式为`*.wav`。 + + - **返回** + + - 返回声纹相似度得分[-1, 1]和预测结果。 + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install ecapa_tdnn_voxceleb + ``` diff --git a/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/__init__.py b/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..185a92b8d94d3426d616c0624f0f2ee04339349e --- /dev/null +++ b/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/ecapa_tdnn.py b/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/ecapa_tdnn.py new file mode 100644 index 0000000000000000000000000000000000000000..59950860985414aaca3a46657cd11cd9645c223c --- /dev/null +++ b/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/ecapa_tdnn.py @@ -0,0 +1,392 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def length_to_mask(length, max_len=None, dtype=None): + assert len(length.shape) == 1 + + if max_len is None: + max_len = length.max().astype('int').item() # using arange to generate mask + mask = paddle.arange(max_len, dtype=length.dtype).expand((len(length), max_len)) < length.unsqueeze(1) + + if dtype is None: + dtype = length.dtype + + mask = paddle.to_tensor(mask, dtype=dtype) + return mask + + +class Conv1d(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding="same", + dilation=1, + groups=1, + bias=True, + padding_mode="reflect", + ): + super(Conv1d, self).__init__() + + self.kernel_size = kernel_size + self.stride = stride + self.dilation = dilation + self.padding = padding + self.padding_mode = padding_mode + + self.conv = nn.Conv1D( + in_channels, + out_channels, + self.kernel_size, + stride=self.stride, + padding=0, + dilation=self.dilation, + groups=groups, + bias_attr=bias, + ) + + def forward(self, x): + if self.padding == "same": + x = self._manage_padding(x, self.kernel_size, self.dilation, self.stride) + else: + raise ValueError("Padding must be 'same'. Got {self.padding}") + + return self.conv(x) + + def _manage_padding(self, x, kernel_size: int, dilation: int, stride: int): + L_in = x.shape[-1] # Detecting input shape + padding = self._get_padding_elem(L_in, stride, kernel_size, dilation) # Time padding + x = F.pad(x, padding, mode=self.padding_mode, data_format="NCL") # Applying padding + return x + + def _get_padding_elem(self, L_in: int, stride: int, kernel_size: int, dilation: int): + if stride > 1: + n_steps = math.ceil(((L_in - kernel_size * dilation) / stride) + 1) + L_out = stride * (n_steps - 1) + kernel_size * dilation + padding = [kernel_size // 2, kernel_size // 2] + else: + L_out = (L_in - dilation * (kernel_size - 1) - 1) // stride + 1 + + padding = [(L_in - L_out) // 2, (L_in - L_out) // 2] + + return padding + + +class BatchNorm1d(nn.Layer): + def __init__( + self, + input_size, + eps=1e-05, + momentum=0.9, + weight_attr=None, + bias_attr=None, + data_format='NCL', + use_global_stats=None, + ): + super(BatchNorm1d, self).__init__() + + self.norm = nn.BatchNorm1D( + input_size, + epsilon=eps, + momentum=momentum, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format, + use_global_stats=use_global_stats, + ) + + def forward(self, x): + x_n = self.norm(x) + return x_n + + +class TDNNBlock(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + dilation, + activation=nn.ReLU, + ): + super(TDNNBlock, self).__init__() + self.conv = Conv1d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + dilation=dilation, + ) + self.activation = activation() + self.norm = BatchNorm1d(input_size=out_channels) + + def forward(self, x): + return self.norm(self.activation(self.conv(x))) + + +class Res2NetBlock(nn.Layer): + def __init__(self, in_channels, out_channels, scale=8, dilation=1): + super(Res2NetBlock, self).__init__() + assert in_channels % scale == 0 + assert out_channels % scale == 0 + + in_channel = in_channels // scale + hidden_channel = out_channels // scale + + self.blocks = nn.LayerList( + [TDNNBlock(in_channel, hidden_channel, kernel_size=3, dilation=dilation) for i in range(scale - 1)]) + self.scale = scale + + def forward(self, x): + y = [] + for i, x_i in enumerate(paddle.chunk(x, self.scale, axis=1)): + if i == 0: + y_i = x_i + elif i == 1: + y_i = self.blocks[i - 1](x_i) + else: + y_i = self.blocks[i - 1](x_i + y_i) + y.append(y_i) + y = paddle.concat(y, axis=1) + return y + + +class SEBlock(nn.Layer): + def __init__(self, in_channels, se_channels, out_channels): + super(SEBlock, self).__init__() + + self.conv1 = Conv1d(in_channels=in_channels, out_channels=se_channels, kernel_size=1) + self.relu = paddle.nn.ReLU() + self.conv2 = Conv1d(in_channels=se_channels, out_channels=out_channels, kernel_size=1) + self.sigmoid = paddle.nn.Sigmoid() + + def forward(self, x, lengths=None): + L = x.shape[-1] + if lengths is not None: + mask = length_to_mask(lengths * L, max_len=L) + mask = mask.unsqueeze(1) + total = mask.sum(axis=2, keepdim=True) + s = (x * mask).sum(axis=2, keepdim=True) / total + else: + s = x.mean(axis=2, keepdim=True) + + s = self.relu(self.conv1(s)) + s = self.sigmoid(self.conv2(s)) + + return s * x + + +class AttentiveStatisticsPooling(nn.Layer): + def __init__(self, channels, attention_channels=128, global_context=True): + super().__init__() + + self.eps = 1e-12 + self.global_context = global_context + if global_context: + self.tdnn = TDNNBlock(channels * 3, attention_channels, 1, 1) + else: + self.tdnn = TDNNBlock(channels, attention_channels, 1, 1) + self.tanh = nn.Tanh() + self.conv = Conv1d(in_channels=attention_channels, out_channels=channels, kernel_size=1) + + def forward(self, x, lengths=None): + C, L = x.shape[1], x.shape[2] # KP: (N, C, L) + + def _compute_statistics(x, m, axis=2, eps=self.eps): + mean = (m * x).sum(axis) + std = paddle.sqrt((m * (x - mean.unsqueeze(axis)).pow(2)).sum(axis).clip(eps)) + return mean, std + + if lengths is None: + lengths = paddle.ones([x.shape[0]]) + + # Make binary mask of shape [N, 1, L] + mask = length_to_mask(lengths * L, max_len=L) + mask = mask.unsqueeze(1) + + # Expand the temporal context of the pooling layer by allowing the + # self-attention to look at global properties of the utterance. + if self.global_context: + total = mask.sum(axis=2, keepdim=True).astype('float32') + mean, std = _compute_statistics(x, mask / total) + mean = mean.unsqueeze(2).tile((1, 1, L)) + std = std.unsqueeze(2).tile((1, 1, L)) + attn = paddle.concat([x, mean, std], axis=1) + else: + attn = x + + # Apply layers + attn = self.conv(self.tanh(self.tdnn(attn))) + + # Filter out zero-paddings + attn = paddle.where(mask.tile((1, C, 1)) == 0, paddle.ones_like(attn) * float("-inf"), attn) + + attn = F.softmax(attn, axis=2) + mean, std = _compute_statistics(x, attn) + + # Append mean and std of the batch + pooled_stats = paddle.concat((mean, std), axis=1) + pooled_stats = pooled_stats.unsqueeze(2) + + return pooled_stats + + +class SERes2NetBlock(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + res2net_scale=8, + se_channels=128, + kernel_size=1, + dilation=1, + activation=nn.ReLU, + ): + super(SERes2NetBlock, self).__init__() + self.out_channels = out_channels + self.tdnn1 = TDNNBlock( + in_channels, + out_channels, + kernel_size=1, + dilation=1, + activation=activation, + ) + self.res2net_block = Res2NetBlock(out_channels, out_channels, res2net_scale, dilation) + self.tdnn2 = TDNNBlock( + out_channels, + out_channels, + kernel_size=1, + dilation=1, + activation=activation, + ) + self.se_block = SEBlock(out_channels, se_channels, out_channels) + + self.shortcut = None + if in_channels != out_channels: + self.shortcut = Conv1d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + ) + + def forward(self, x, lengths=None): + residual = x + if self.shortcut: + residual = self.shortcut(x) + + x = self.tdnn1(x) + x = self.res2net_block(x) + x = self.tdnn2(x) + x = self.se_block(x, lengths) + + return x + residual + + +class ECAPA_TDNN(nn.Layer): + def __init__( + self, + input_size, + lin_neurons=192, + activation=nn.ReLU, + channels=[512, 512, 512, 512, 1536], + kernel_sizes=[5, 3, 3, 3, 1], + dilations=[1, 2, 3, 4, 1], + attention_channels=128, + res2net_scale=8, + se_channels=128, + global_context=True, + ): + + super(ECAPA_TDNN, self).__init__() + assert len(channels) == len(kernel_sizes) + assert len(channels) == len(dilations) + self.channels = channels + self.blocks = nn.LayerList() + self.emb_size = lin_neurons + + # The initial TDNN layer + self.blocks.append(TDNNBlock( + input_size, + channels[0], + kernel_sizes[0], + dilations[0], + activation, + )) + + # SE-Res2Net layers + for i in range(1, len(channels) - 1): + self.blocks.append( + SERes2NetBlock( + channels[i - 1], + channels[i], + res2net_scale=res2net_scale, + se_channels=se_channels, + kernel_size=kernel_sizes[i], + dilation=dilations[i], + activation=activation, + )) + + # Multi-layer feature aggregation + self.mfa = TDNNBlock( + channels[-1], + channels[-1], + kernel_sizes[-1], + dilations[-1], + activation, + ) + + # Attentive Statistical Pooling + self.asp = AttentiveStatisticsPooling( + channels[-1], + attention_channels=attention_channels, + global_context=global_context, + ) + self.asp_bn = BatchNorm1d(input_size=channels[-1] * 2) + + # Final linear transformation + self.fc = Conv1d( + in_channels=channels[-1] * 2, + out_channels=self.emb_size, + kernel_size=1, + ) + + def forward(self, x, lengths=None): + xl = [] + for layer in self.blocks: + try: + x = layer(x, lengths=lengths) + except TypeError: + x = layer(x) + xl.append(x) + + # Multi-layer feature aggregation + x = paddle.concat(xl[1:], axis=1) + x = self.mfa(x) + + # Attentive Statistical Pooling + x = self.asp(x, lengths=lengths) + x = self.asp_bn(x) + + # Final linear transformation + x = self.fc(x) + + return x diff --git a/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/feature.py b/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/feature.py new file mode 100644 index 0000000000000000000000000000000000000000..09b930ebfd4cd56c9be1bc107f4ca6fc5f948027 --- /dev/null +++ b/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/feature.py @@ -0,0 +1,112 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import paddle +import paddleaudio +from paddleaudio.features.spectrum import hz_to_mel +from paddleaudio.features.spectrum import mel_to_hz +from paddleaudio.features.spectrum import power_to_db +from paddleaudio.features.spectrum import Spectrogram +from paddleaudio.features.window import get_window + + +def compute_fbank_matrix(sample_rate: int = 16000, + n_fft: int = 400, + n_mels: int = 80, + f_min: int = 0.0, + f_max: int = 8000.0): + mel = paddle.linspace(hz_to_mel(f_min, htk=True), hz_to_mel(f_max, htk=True), n_mels + 2, dtype=paddle.float32) + hz = mel_to_hz(mel, htk=True) + + band = hz[1:] - hz[:-1] + band = band[:-1] + f_central = hz[1:-1] + + n_stft = n_fft // 2 + 1 + all_freqs = paddle.linspace(0, sample_rate // 2, n_stft) + all_freqs_mat = all_freqs.tile([f_central.shape[0], 1]) + + f_central_mat = f_central.tile([all_freqs_mat.shape[1], 1]).transpose([1, 0]) + band_mat = band.tile([all_freqs_mat.shape[1], 1]).transpose([1, 0]) + + slope = (all_freqs_mat - f_central_mat) / band_mat + left_side = slope + 1.0 + right_side = -slope + 1.0 + + fbank_matrix = paddle.maximum(paddle.zeros_like(left_side), paddle.minimum(left_side, right_side)) + + return fbank_matrix + + +def compute_log_fbank( + x: paddle.Tensor, + sample_rate: int = 16000, + n_fft: int = 400, + hop_length: int = 160, + win_length: int = 400, + n_mels: int = 80, + window: str = 'hamming', + center: bool = True, + pad_mode: str = 'constant', + f_min: float = 0.0, + f_max: float = None, + top_db: float = 80.0, +): + + if f_max is None: + f_max = sample_rate / 2 + + spect = Spectrogram( + n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=window, center=center, pad_mode=pad_mode)(x) + + fbank_matrix = compute_fbank_matrix( + sample_rate=sample_rate, + n_fft=n_fft, + n_mels=n_mels, + f_min=f_min, + f_max=f_max, + ) + fbank = paddle.matmul(fbank_matrix, spect) + log_fbank = power_to_db(fbank, top_db=top_db).transpose([0, 2, 1]) + return log_fbank + + +def compute_stats(x: paddle.Tensor, mean_norm: bool = True, std_norm: bool = False, eps: float = 1e-10): + if mean_norm: + current_mean = paddle.mean(x, axis=0) + else: + current_mean = paddle.to_tensor([0.0]) + + if std_norm: + current_std = paddle.std(x, axis=0) + else: + current_std = paddle.to_tensor([1.0]) + + current_std = paddle.maximum(current_std, eps * paddle.ones_like(current_std)) + + return current_mean, current_std + + +def normalize( + x: paddle.Tensor, + global_mean: paddle.Tensor = None, + global_std: paddle.Tensor = None, +): + + for i in range(x.shape[0]): # (B, ...) + if global_mean is None and global_std is None: + mean, std = compute_stats(x[i]) + x[i] = (x[i] - mean) / std + else: + x[i] = (x[i] - global_mean) / global_std + return x diff --git a/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/module.py b/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/module.py new file mode 100644 index 0000000000000000000000000000000000000000..11f7121a5f0a7eb2b330ffeedec821171bb30bef --- /dev/null +++ b/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/module.py @@ -0,0 +1,93 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import re +from typing import List +from typing import Union + +import numpy as np +import paddle +import paddleaudio + +from .ecapa_tdnn import ECAPA_TDNN +from .feature import compute_log_fbank +from .feature import normalize +from paddlehub.module.module import moduleinfo +from paddlehub.utils.log import logger + + +@moduleinfo( + name="ecapa_tdnn_voxceleb", + version="1.0.0", + summary="", + author="paddlepaddle", + author_email="", + type="audio/speaker_recognition") +class SpeakerRecognition(paddle.nn.Layer): + def __init__(self, threshold=0.25): + super(SpeakerRecognition, self).__init__() + global_stats_path = os.path.join(self.directory, 'assets', 'global_embedding_stats.npy') + ckpt_path = os.path.join(self.directory, 'assets', 'model.pdparams') + + self.sr = 16000 + self.threshold = threshold + model_conf = { + 'input_size': 80, + 'channels': [1024, 1024, 1024, 1024, 3072], + 'kernel_sizes': [5, 3, 3, 3, 1], + 'dilations': [1, 2, 3, 4, 1], + 'attention_channels': 128, + 'lin_neurons': 192 + } + self.model = ECAPA_TDNN(**model_conf) + self.model.set_state_dict(paddle.load(ckpt_path)) + self.model.eval() + + global_embedding_stats = np.load(global_stats_path, allow_pickle=True) + self.global_emb_mean = paddle.to_tensor(global_embedding_stats.item().get('global_emb_mean')) + self.global_emb_std = paddle.to_tensor(global_embedding_stats.item().get('global_emb_std')) + + self.similarity = paddle.nn.CosineSimilarity(axis=-1, eps=1e-6) + + def load_audio(self, wav): + wav = os.path.abspath(os.path.expanduser(wav)) + assert os.path.isfile(wav), 'Please check wav file: {}'.format(wav) + waveform, _ = paddleaudio.load(wav, sr=self.sr, mono=True, normal=False) + return waveform + + def speaker_embedding(self, wav): + waveform = self.load_audio(wav) + embedding = self(paddle.to_tensor(waveform)).reshape([-1]) + return embedding.numpy() + + def speaker_verify(self, wav1, wav2): + waveform1 = self.load_audio(wav1) + embedding1 = self(paddle.to_tensor(waveform1)).reshape([-1]) + + waveform2 = self.load_audio(wav2) + embedding2 = self(paddle.to_tensor(waveform2)).reshape([-1]) + + score = self.similarity(embedding1, embedding2).numpy() + return score, score > self.threshold + + def forward(self, x): + if len(x.shape) == 1: + x = x.unsqueeze(0) + + fbank = compute_log_fbank(x) # x: waveform tensors with (B, T) shape + norm_fbank = normalize(fbank) + embedding = self.model(norm_fbank.transpose([0, 2, 1])).transpose([0, 2, 1]) + norm_embedding = normalize(x=embedding, global_mean=self.global_emb_mean, global_std=self.global_emb_std) + + return norm_embedding diff --git a/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/requirements.txt b/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..defe617fa36bc5ab7b72438034c785ee2b3ac3c9 --- /dev/null +++ b/modules/audio/speaker_recognition/ecapa_tdnn_voxceleb/requirements.txt @@ -0,0 +1 @@ +paddleaudio==0.1.0 diff --git a/modules/image/Image_editing/colorization/deoldify/README.md b/modules/image/Image_editing/colorization/deoldify/README.md index a181b89bdcc802fa5c6129d5d466472e80bfb258..c4303720a52c02215250d23158798053659014f1 100644 --- a/modules/image/Image_editing/colorization/deoldify/README.md +++ b/modules/image/Image_editing/colorization/deoldify/README.md @@ -53,14 +53,14 @@ ## 三、模型API预测 - - ### 1、代码示例 + - ### 1、预测代码示例 - ```python - import paddlehub as hub + - ```python + import paddlehub as hub - model = hub.Module(name='deoldify') - model.predict('/PATH/TO/IMAGE/OR/VIDEO') - ``` + model = hub.Module(name='deoldify') + model.predict('/PATH/TO/IMAGE/OR/VIDEO') + ``` - ### 2、API diff --git a/modules/image/Image_editing/colorization/deoldify/README_en.md b/modules/image/Image_editing/colorization/deoldify/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..cbfcd6078a00e7dbf81b07c5a527e494dcad6093 --- /dev/null +++ b/modules/image/Image_editing/colorization/deoldify/README_en.md @@ -0,0 +1,171 @@ +# deoldify + +| Module Name |deoldify| +| :--- | :---: | +|Category|Image editing| +|Network |NoGAN| +|Dataset|ILSVRC 2012| +|Fine-tuning supported or not |No| +|Module Size |834MB| +|Data indicators|-| +|Latest update date |2021-04-13| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ +

+ +- ### Module Introduction + + - Deoldify is a color rendering model for images and videos, which can restore color for black and white photos and videos. + + - For more information, please refer to: [deoldify](https://github.com/jantic/DeOldify) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + + - NOTE: This Module relies on ffmpeg, Please install ffmpeg before using this Module. + + ```shell + $ conda install x264=='1!152.20180717' ffmpeg=4.0.2 -c conda-forge + ``` + + +- ### 2、Installation + - ```shell + $ hub install deoldify + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + + - ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + + model = hub.Module(name='deoldify') + model.predict('/PATH/TO/IMAGE/OR/VIDEO') + ``` + + - ### 2、API + + - ```python + def predict(self, input): + ``` + + - Prediction API. + + - **Parameter** + + - input (str): Image path. + + - **Return** + + - If input is image path, the output is: + - pred_img(np.ndarray): image data, ndarray.shape is in the format [H, W, C], BGR. + - out_path(str): save path of images. + + - If input is video path, the output is : + - frame_pattern_combined(str): save path of frames from output video. + - vid_out_path(str): save path of output video. + + - ```python + def run_image(self, img): + ``` + - Prediction API for image. + + - **Parameter** + + - img (str|np.ndarray): Image data, str or ndarray. ndarray.shape is in the format [H, W, C], BGR. + + - **Return** + + - pred_img(np.ndarray): Ndarray.shape is in the format [H, W, C], BGR. + + - ```python + def run_video(self, video): + ``` + - Prediction API for video. + + - **Parameter** + + - video(str): Video path. + + - **Return** + + - frame_pattern_combined(str): Save path of frames from output video. + - vid_out_path(str): Save path of output video. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of coloring old photos or videos. + + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m deoldify + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result. + + - ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # Send an HTTP request + org_im = cv2.imread('/PATH/TO/ORIGIN/IMAGE') + data = {'images':cv2_to_base64(org_im)} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/deoldify" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + img = base64_to_cv2(r.json()["results"]) + cv2.imwrite('/PATH/TO/SAVE/IMAGE', img) + ``` + + +## V. Release Note + +- 1.0.0 + + First release + +- 1.0.1 + + Adapt to paddlehub2.0 diff --git a/modules/image/Image_editing/colorization/photo_restoration/README.md b/modules/image/Image_editing/colorization/photo_restoration/README.md index e3a2d5fd3459e07a4045ccfb3f20b5774826e773..fbb6332c95babf6d3ce3c43343af7711217fc59c 100644 --- a/modules/image/Image_editing/colorization/photo_restoration/README.md +++ b/modules/image/Image_editing/colorization/photo_restoration/README.md @@ -51,17 +51,17 @@ ## 三、模型API预测 - - ### 1、代码示例 + - ### 1、预测代码示例 - ```python - import cv2 - import paddlehub as hub + - ```python + import cv2 + import paddlehub as hub - model = hub.Module(name='photo_restoration', visualization=True) - im = cv2.imread('/PATH/TO/IMAGE') - res = model.run_image(im) + model = hub.Module(name='photo_restoration', visualization=True) + im = cv2.imread('/PATH/TO/IMAGE') + res = model.run_image(im) - ``` + ``` - ### 2、API diff --git a/modules/image/Image_editing/colorization/photo_restoration/README_en.md b/modules/image/Image_editing/colorization/photo_restoration/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..1ff585bddd0dc54768fb168999cfdddac266a6f8 --- /dev/null +++ b/modules/image/Image_editing/colorization/photo_restoration/README_en.md @@ -0,0 +1,151 @@ +# photo_restoration + +|Module Name|photo_restoration| +| :--- | :---: | +|Category|Image editing| +|Network|deoldify and realsr| +|Fine-tuning supported or not|No| +|Module Size |64MB+834MB| +|Data indicators|-| +|Latest update date|2021-08-19| + + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ +

+ + + +- ### Module Introduction + + - Photo_restoration can restore old photos. It mainly consists of two parts: coloring and super-resolution. The coloring model is deoldify + , and super resolution model is realsr. Therefore, when using this model, please install deoldify and realsr in advance. + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + + - NOTE: This Module relies on ffmpeg, Please install ffmpeg before using this Module. + + ```shell + $ conda install x264=='1!152.20180717' ffmpeg=4.0.2 -c conda-forge + ``` + +- ### 2、Installation + + - ```shell + $ hub install photo_restoration + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + + - ```python + import cv2 + import paddlehub as hub + + model = hub.Module(name='photo_restoration', visualization=True) + im = cv2.imread('/PATH/TO/IMAGE') + res = model.run_image(im) + + ``` +- ### 2、API + + + - ```python + def run_image(self, + input, + model_select= ['Colorization', 'SuperResolution'], + save_path = 'photo_restoration'): + ``` + + - Predicition API, produce repaired photos. + + - **Parameter** + + - input (numpy.ndarray|str): Image data,numpy.ndarray or str. ndarray.shape is in the format [H, W, C], BGR. + + - model_select (list\[str\]): Mode selection,\['Colorization'\] only colorize the input image, \['SuperResolution'\] only increase the image resolution; + default is \['Colorization', 'SuperResolution'\]。 + + - save_path (str): Save path, default is 'photo_restoration'. + + - **Return** + + - output (numpy.ndarray): Restoration result,ndarray.shape is in the format [H, W, C], BGR. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of photo restoration. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m photo_restoration + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # Send an HTTP request + org_im = cv2.imread('PATH/TO/IMAGE') + data = {'images':cv2_to_base64(org_im), 'model_select': ['Colorization', 'SuperResolution']} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/photo_restoration" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + img = base64_to_cv2(r.json()["results"]) + cv2.imwrite('PATH/TO/SAVE/IMAGE', img) + ``` + + +## V. Release Note + +- 1.0.0 + + First release + +- 1.0.1 + + Adapt to paddlehub2.0 + diff --git a/modules/image/Image_editing/colorization/user_guided_colorization/README.md b/modules/image/Image_editing/colorization/user_guided_colorization/README.md index 390f04e1500e1d3d0ae1215f798bb9f7902f1fdc..d5d13144eed22656bf7e5fd12343b2fec6cf7b34 100644 --- a/modules/image/Image_editing/colorization/user_guided_colorization/README.md +++ b/modules/image/Image_editing/colorization/user_guided_colorization/README.md @@ -22,7 +22,7 @@

- - user_guided_colorization 是基于''Real-Time User-Guided Image Colorization with Learned Deep Priors"的着色模型,该模型利用预先提供的着色块对图像进行着色。 + - user_guided_colorization 是基于"Real-Time User-Guided Image Colorization with Learned Deep Priors"的着色模型,该模型利用预先提供的着色块对图像进行着色。 ## 二、安装 diff --git a/modules/image/Image_editing/colorization/user_guided_colorization/README_en.md b/modules/image/Image_editing/colorization/user_guided_colorization/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..8e17592c87ca4ee428e98afc8478411803471cd8 --- /dev/null +++ b/modules/image/Image_editing/colorization/user_guided_colorization/README_en.md @@ -0,0 +1,205 @@ +# user_guided_colorization + +|Module Name|user_guided_colorization| +| :--- | :---: | +|Category |Image editing| +|Network| Local and Global Hints Network | +|Dataset|ILSVRC 2012| +|Fine-tuning supported or notFine-tuning|Yes| +|Module Size|131MB| +|Data indicators|-| +|Latest update date |2021-02-26| + + + +## I. Basic Information + + +- ### Application Effect Display + + - Sample results: +

+ +

+ +- ### Module Introduction + + - User_guided_colorization is a colorization model based on "Real-Time User-Guided Image Colorization with Learned Deep Priors",this model uses pre-supplied coloring blocks to color the gray image. + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install user_guided_colorization + ``` + + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III. Module API Prediction + +- ### 1、Command line Prediction + + ```shell + $ hub run user_guided_colorization --input_path "/PATH/TO/IMAGE" + ``` + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) +- ### 2、Prediction Code Example + + ```python + import paddle + import paddlehub as hub + + if __name__ == '__main__': + + model = hub.Module(name='user_guided_colorization') + model.set_config(prob=0.1) + result = model.predict(images=['/PATH/TO/IMAGE']) + ``` +- ### 3.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the user_guided_colorization model to fine-tune datasets such as [Canvas](../../docs/reference/datasets.md#class-hubdatasetsCanvas) by executing `python train.py`. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + import paddlehub.vision.transforms as T + + transform = T.Compose([T.Resize((256, 256), interpolation='NEAREST'), + T.RandomPaddingCrop(crop_size=176), + T.RGB2LAB()], to_rgb=True) + ``` + + - `transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + - ```python + from paddlehub.datasets import Canvas + + color_set = Canvas(transform=transform, mode='train') + ``` + + * `transforms`: Data preprocessing methods. + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + * `hub.datasets.Canvas()`: The dataset will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + + - Step3: Load the pre-trained model + + - ```python + model = hub.Module(name='user_guided_colorization', load_checkpoint=None) + model.set_config(classification=True, prob=1) + ``` + * `name`: Model name. + * `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + * `classification`: The model is trained by two mode. At the beginning, `classification` is set to True, which is used for shallow network training. In the later stage of training, set `classification` to False, which is used to train the output layer of the network. + * `prob`: The probability that a priori color block is not added to each input image, the default is 1, that is, no prior color block is added. For example, when `prob` is set to 0.9, the probability that there are two a priori color blocks on a picture is(1-0.9)*(1-0.9)*0.9=0.009. + + - Step4: Optimization strategy + + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_colorization_ckpt_cls_1') + trainer.train(color_set, epochs=201, batch_size=25, eval_dataset=color_set, log_interval=10, save_interval=10) + ``` + + + - Run configuration + + - `Trainer` mainly control the training of Fine-tune, including the following controllable parameters: + + * `model`: Optimized model. + * `optimizer`: Optimizer selection. + * `use_vdl`: Whether to use vdl to visualize the training process. + * `checkpoint_dir`: The storage address of the model parameters. + * `compare_metrics`: The measurement index of the optimal model. + + - `trainer.train` mainly control the specific training process, including the following controllable parameters: + + * `train_dataset`: Training dataset. + * `epochs`: Epochs of training process. + * `batch_size`: Batch size. + * `num_workers`: Number of workers. + * `eval_dataset`: Validation dataset. + * `log_interval`:The interval for printing logs. + * `save_interval`: The interval for saving model parameters. + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + - ```python + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='user_guided_colorization', load_checkpoint='/PATH/TO/CHECKPOINT') + model.set_config(prob=0.1) + result = model.predict(images=['/PATH/TO/IMAGE']) + ``` + + + - **NOTE:** If you want to get the oil painting style, please download the parameter file [Canvas colorization](https://paddlehub.bj.bcebos.com/dygraph/models/canvas_rc.pdparams) + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of colorization. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m user_guided_colorization + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + - ```python + import requests + import json + import cv2 + import base64 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # Send an HTTP request + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/user_guided_colorization" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data = base64_to_cv2(r.json()["results"]['data'][0]['fake_reg']) + cv2.imwrite('color.png', data) + ``` + + +## V. Release Note + +* 1.0.0 + + First release diff --git a/modules/image/Image_editing/super_resolution/dcscn/README_en.md b/modules/image/Image_editing/super_resolution/dcscn/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..098d03657369d534d4975e27fc19b3d120ff3d97 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/dcscn/README_en.md @@ -0,0 +1,172 @@ +# dcscn + +|Module Name|dcscn| +| :--- | :---: | +|Category |Image editing| +|Network|dcscn| +|Dataset|DIV2k| +|Fine-tuning supported or not|No| +|Module Size|260KB| +|Data indicators|PSNR37.63| +|Data indicators |2021-02-26| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ +

+ + +- ### Module Introduction + + - DCSCN is a super resolution model based on 'Fast and Accurate Image Super Resolution by Deep CNN with Skip Connection and Network in Network'. The model uses residual structure and skip connections to extract local and global features. It uses a parallel 1*1 convolutional network to learn detailed features to improve model performance. This model provides super resolution result with scale factor x2. + + - For more information, please refer to: [dcscn](https://github.com/jiny2001/dcscn-super-resolution) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + + +- ### 2、Installation + + - ```shell + $ hub install dcscn + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ``` + $ hub run dcscn --input_path "/PATH/TO/IMAGE" + ``` + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) +- ### 2、Prediction Code Example + + - ```python + import cv2 + import paddlehub as hub + + sr_model = hub.Module(name='dcscn') + im = cv2.imread('/PATH/TO/IMAGE').astype('float32') + res = sr_model.reconstruct(images=[im], visualization=True) + print(res[0]['data']) + sr_model.save_inference_model() + ``` + +- ### 3、API + + - ```python + def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="dcscn_output") + ``` + + - Prediction API. + + - **Parameter** + + * images (list\[numpy.ndarray\]): Image data,ndarray.shape is in the format \[H, W, C\],BGR. + * paths (list\[str\]): image path. + * use\_gpu (bool): Use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU**. + * visualization (bool): Whether to save the recognition results as picture files. + * output\_dir (str): Save path of images, "dcscn_output" by default. + + - **Return** + * res (list\[dict\]): The list of model results, where each element is dict and each field is: + * save\_path (str, optional): Save path of the result, save_path is '' if no image is saved. + * data (numpy.ndarray): Result of super resolution. + + - ```python + def save_inference_model(self, + dirname='dcscn_save_model', + model_filename=None, + params_filename=None, + combined=False) + ``` + + - Save the model to the specified path. + + - **Parameters** + + * dirname: Save path. + * model\_filename: Model file name,defalt is \_\_model\_\_ + * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) + * combined: Whether to save the parameters to a unified file. + + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of super resolution. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m dcscn + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/dcscn" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + sr = np.expand_dims(cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY), axis=2) + shape =sr.shape + org_im = cv2.cvtColor(org_im, cv2.COLOR_BGR2YUV) + uv = cv2.resize(org_im[...,1:], (shape[1], shape[0]), interpolation=cv2.INTER_CUBIC) + combine_im = cv2.cvtColor(np.concatenate((sr, uv), axis=2), cv2.COLOR_YUV2BGR) + cv2.imwrite('dcscn_X2.png', combine_im) + print("save image as dcscn_X2.png") + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/Image_editing/super_resolution/falsr_a/README_en.md b/modules/image/Image_editing/super_resolution/falsr_a/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..aa677c6d5d5bbe480cad8049b7cad08e1ede441f --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_a/README_en.md @@ -0,0 +1,173 @@ +# falsr_a + +|Module Name|falsr_a| +| :--- | :---: | +|Category |Image editing| +|Network |falsr_a| +|Dataset|DIV2k| +|Fine-tuning supported or not|No| +|Module Size |8.9MB| +|Data indicators|PSNR37.82| +|Latest update date|2021-02-26| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ +

+ + +- ### Module Introduction + + - Falsr_a is a lightweight super-resolution model based on "Accurate and Lightweight Super-Resolution with Neural Architecture Search". The model uses a multi-objective approach to deal with the over-segmentation problem, and uses an elastic search strategy based on a hybrid controller to improve the performance of the model. This model provides super resolution result with scale factor x2. + + - For more information, please refer to: [falsr_a](https://github.com/xiaomi-automl/FALSR) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + + +- ### 2、Installation + + - ```shell + $ hub install falsr_a + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ``` + $ hub run falsr_a --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import cv2 + import paddlehub as hub + + sr_model = hub.Module(name='falsr_a') + im = cv2.imread('/PATH/TO/IMAGE').astype('float32') + res = sr_model.reconstruct(images=[im], visualization=True) + print(res[0]['data']) + sr_model.save_inference_model() + ``` + +- ### 3、API + + - ```python + def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_a_output") + ``` + + - Prediction API. + + - **Parameter** + + * images (list\[numpy.ndarray\]): image data,ndarray.shape is in the format \[H, W, C\],BGR. + * paths (list\[str\]): image path. + * use\_gpu (bool): use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU**. + * visualization (bool): Whether to save the recognition results as picture files. + * output\_dir (str): save path of images, "dcscn_output" by default. + + - **Return** + * res (list\[dict\]): The list of model results, where each element is dict and each field is: + * save\_path (str, optional): Save path of the result, save_path is '' if no image is saved. + * data (numpy.ndarray): result of super resolution. + + - ```python + def save_inference_model(self, + dirname='falsr_a_save_model', + model_filename=None, + params_filename=None, + combined=False) + ``` + + - Save the model to the specified path. + + - **Parameters** + + * dirname: Save path. + * model\_filename: model file name,defalt is \_\_model\_\_ + * params\_filename: parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) + * combined: Whether to save the parameters to a unified file. + + + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of super resolution. + + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m falsr_a + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/falsr_a" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + sr = base64_to_cv2(r.json()["results"][0]['data']) + cv2.imwrite('falsr_a_X2.png', sr) + print("save image as falsr_a_X2.png") + ``` + + +## V. Release Note + +- 1.0.0 + + First release + + + diff --git a/modules/image/Image_editing/super_resolution/falsr_b/README_en.md b/modules/image/Image_editing/super_resolution/falsr_b/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..5507b2ac6de0a89ce0c061b8651dcc59752b7079 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_b/README_en.md @@ -0,0 +1,173 @@ +# falsr_b + +|Module Name|falsr_b| +| :--- | :---: | +|Category |Image editing| +|Network |falsr_b| +|Dataset|DIV2k| +|Fine-tuning supported or not|No| +|Module Size |4MB| +|Data indicators|PSNR37.61| +|Latest update date|2021-02-26| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ +

+ + +- ### Module Introduction + + - Falsr_b is a lightweight super-resolution model based on "Accurate and Lightweight Super-Resolution with Neural Architecture Search". The model uses a multi-objective approach to deal with the over-segmentation problem, and uses an elastic search strategy based on a hybrid controller to improve the performance of the model. This model provides super resolution result with scale factor x2. + + - For more information, please refer to:[falsr_b](https://github.com/xiaomi-automl/FALSR) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + + +- ### 2、Installation + + - ```shell + $ hub install falsr_b + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ``` + $ hub run falsr_b --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + ```python + import cv2 + import paddlehub as hub + + sr_model = hub.Module(name='falsr_b') + im = cv2.imread('/PATH/TO/IMAGE').astype('float32') + res = sr_model.reconstruct(images=[im], visualization=True) + print(res[0]['data']) + sr_model.save_inference_model() + ``` + +- ### 3、API + + - ```python + def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_b_output") + ``` + + - Prediction API. + + - **Parameter** + + * images (list\[numpy.ndarray\]): Image data,ndarray.shape is in the format \[H, W, C\],BGR. + * paths (list\[str\]): Image path. + * use\_gpu (bool): Use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU**. + * visualization (bool): Whether to save the recognition results as picture files. + * output\_dir (str): Save path of images, "dcscn_output" by default. + + - **Return** + * res (list\[dict\]): The list of model results, where each element is dict and each field is: + * save\_path (str, optional): Save path of the result, save_path is '' if no image is saved. + * data (numpy.ndarray): Result of super resolution. + + - ```python + def save_inference_model(self, + dirname='falsr_b_save_model', + model_filename=None, + params_filename=None, + combined=False) + ``` + + - Save the model to the specified path. + + - **Parameters** + + * dirname: Save path. + * model\_filename: Model file name,defalt is \_\_model\_\_ + * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) + * combined: Whether to save the parameters to a unified file. + + + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of super resolution. + + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m falsr_b + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/falsr_b" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + sr = base64_to_cv2(r.json()["results"][0]['data']) + cv2.imwrite('falsr_b_X2.png', sr) + print("save image as falsr_b_X2.png") + ``` + + +## V. Release Note + +- 1.0.0 + + First release + + + diff --git a/modules/image/Image_editing/super_resolution/falsr_c/README.md b/modules/image/Image_editing/super_resolution/falsr_c/README.md index 3227847494d5b34867aa7ee36e91ff789ad80574..2e7d35bbea7cc2eff7ab40af558942a826412a3f 100644 --- a/modules/image/Image_editing/super_resolution/falsr_c/README.md +++ b/modules/image/Image_editing/super_resolution/falsr_c/README.md @@ -51,7 +51,7 @@ - ``` $ hub run falsr_c --input_path "/PATH/TO/IMAGE" ``` -- ### 代码示例 +- ### 2、预测代码示例 ```python import cv2 @@ -65,7 +65,7 @@ sr_model.save_inference_model() ``` -- ### 2、API +- ### 3、API - ```python def reconstruct(self, diff --git a/modules/image/Image_editing/super_resolution/falsr_c/README_en.md b/modules/image/Image_editing/super_resolution/falsr_c/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..5e651a7ea9393c68af8e24a9bb34a741287ffd46 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_c/README_en.md @@ -0,0 +1,173 @@ +# falsr_c + +|Module Name|falsr_c| +| :--- | :---: | +|Category |Image editing| +|Network |falsr_c| +|Dataset|DIV2k| +|Fine-tuning supported or not|No| +|Module Size |4.4MB| +|Data indicators|PSNR37.66| +|Latest update date|2021-02-26| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ +

+ + +- ### Module Introduction + + - Falsr_c is a lightweight super-resolution model based on "Accurate and Lightweight Super-Resolution with Neural Architecture Search". The model uses a multi-objective approach to deal with the over-segmentation problem, and uses an elastic search strategy based on a hybrid controller to improve the performance of the model. This model provides super resolution result with scale factor x2. + + - For more information, please refer to:[falsr_c](https://github.com/xiaomi-automl/FALSR) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + + +- ### 2、Installation + + - ```shell + $ hub install falsr_c + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ``` + $ hub run falsr_c --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + ```python + import cv2 + import paddlehub as hub + + sr_model = hub.Module(name='falsr_c') + im = cv2.imread('/PATH/TO/IMAGE').astype('float32') + res = sr_model.reconstruct(images=[im], visualization=True) + print(res[0]['data']) + sr_model.save_inference_model() + ``` + +- ### 3、API + + - ```python + def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_c_output") + ``` + + - Prediction API. + + - **Parameter** + + * images (list\[numpy.ndarray\]): Image data,ndarray.shape is in the format \[H, W, C\],BGR. + * paths (list\[str\]): Image path. + * use\_gpu (bool): Use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU**. + * visualization (bool): Whether to save the recognition results as picture files. + * output\_dir (str): Save path of images, "dcscn_output" by default. + + - **Return** + * res (list\[dict\]): The list of model results, where each element is dict and each field is: + * save\_path (str, optional): Save path of the result, save_path is '' if no image is saved. + * data (numpy.ndarray): Result of super resolution. + + - ```python + def save_inference_model(self, + dirname='falsr_c_save_model', + model_filename=None, + params_filename=None, + combined=False) + ``` + + - Save the model to the specified path. + + - **Parameters** + + * dirname: Save path. + * model\_filename: Model file name,defalt is \_\_model\_\_ + * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) + * combined: Whether to save the parameters to a unified file. + + + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of super resolution. + + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m falsr_c + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/falsr_c" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + sr = base64_to_cv2(r.json()["results"][0]['data']) + cv2.imwrite('falsr_c_X2.png', sr) + print("save image as falsr_c_X2.png") + ``` + + +## V. Release Note + +- 1.0.0 + + First release + + + diff --git a/modules/image/Image_editing/super_resolution/realsr/README.md b/modules/image/Image_editing/super_resolution/realsr/README.md index 02e66678c5926f6f9e54344d6f74a1bf91304b39..e5eebce61099444691edd8c084572398ccc785cd 100644 --- a/modules/image/Image_editing/super_resolution/realsr/README.md +++ b/modules/image/Image_editing/super_resolution/realsr/README.md @@ -57,7 +57,7 @@ ## 三、模型API预测 - - ### 1、代码示例 + - ### 1、预测代码示例 ```python import paddlehub as hub diff --git a/modules/image/Image_editing/super_resolution/realsr/README_en.md b/modules/image/Image_editing/super_resolution/realsr/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..4e3eafba85acd21d185da2af622dffd82d7d09ee --- /dev/null +++ b/modules/image/Image_editing/super_resolution/realsr/README_en.md @@ -0,0 +1,174 @@ +# realsr + +|Module Name |reasr| +| :--- | :---: | +|Category |Image editing| +|Network|LP-KPN| +|Dataset |RealSR dataset| +|Fine-tuning supported or not|No| +|Module Size |64MB| +|Latest update date|2021-02-26| +|Data indicators |PSNR29.05| + + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ +

+ +- ### Module Introduction + + - Realsr is a super resolution model for image and video based on "Toward Real-World Single Image Super-Resolution: A New Benchmark and A New Mode". This model provides super resolution result with scale factor x4. + + - For more information, please refer to: [realsr](https://github.com/csjcai/RealSR) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + + - **NOTE**: This Module relies on ffmpeg, Please install ffmpeg before using this Module. + ```shell + $ conda install x264=='1!152.20180717' ffmpeg=4.0.2 -c conda-forge + ``` + +- ### 2、Installation + + - ```shell + $ hub install realsr + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + + +## III. Module API Prediction + + - ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + + model = hub.Module(name='realsr') + model.predict('/PATH/TO/IMAGE/OR/VIDEO') + ``` + - ### 2、API + + - ```python + def predict(self, input): + ``` + + - Prediction API. + + - **Parameter** + + - input (str): image path. + + - **Return** + + - If input is image path, the output is: + - pred_img(np.ndarray): image data, ndarray.shape is in the format [H, W, C], BGR. + - out_path(str): save path of images. + + - If input is video path, the output is : + - frame_pattern_combined(str): save path of frames from output video. + - vid_out_path(str): save path of output video. + + - ```python + def run_image(self, img): + ``` + - Prediction API for images. + + - **Parameter** + + - img (str|np.ndarray): Image data, str or ndarray. ndarray.shape is in the format [H, W, C], BGR. + + - **Return** + + - pred_img(np.ndarray): Prediction result, ndarray.shape is in the format [H, W, C], BGR. + + - ```python + def run_video(self, video): + ``` + - Prediction API for video. + + - **Parameter** + + - video(str): Video path. + + - **Return** + + - frame_pattern_combined(str): Save path of frames from output video. + - vid_out_path(str): Save path of output video. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image super resolution. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m realsr + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':cv2_to_base64(org_im)} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/realsr" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + img = base64_to_cv2(r.json()["results"]) + cv2.imwrite('/PATH/TO/SAVE/IMAGE', img) + + ``` + + +## V. Release Note + + +- 1.0.0 + + First release + +* 1.0.1 + + Support paddlehub2.0 + diff --git a/modules/image/Image_gan/attgan_celeba/README_en.md b/modules/image/Image_gan/attgan_celeba/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..48808475316d209f437e12021881c61c48c32d7e --- /dev/null +++ b/modules/image/Image_gan/attgan_celeba/README_en.md @@ -0,0 +1,110 @@ +# attgan_celeba + +|Module Name|attgan_celeba| +| :--- | :---: | +|Category |image generation| +|Network |AttGAN| +|Dataset|Celeba| +|Fine-tuning supported or not |No| +|Module Size |167MB| +|Latest update date|2021-02-26| +|Data indicators |-| + + +## I. Basic Information + +- ### Application Effect Display + - Sample results: + +

+
+ The image attributes are: original image, Bald, Bangs, Black_Hair, Blond_Hair, Brown_Hair, Bushy_Eyebrows, Eyeglasses, Gender, Mouth_Slightly_Open, Mustache, No_Beard, Pale_Skin, Aged
+

+ + +- ### Module Introduction + + - AttGAN is a Generative Adversarial Network, which uses classification loss and reconstruction loss to train the network. The PaddleHub Module is trained one Celeba dataset and currently supports attributes of "Bald", "Bangs", "Black_Hair", "Blond_Hair", "Brown_Hair", "Bushy_Eyebrows", "Eyeglasses", "Gender", "Mouth_Slightly_Open", "Mustache", "No_Beard", "Pale_Skin", "Aged". + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.5.2 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install attgan_celeba==1.0.0 + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md). + + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run attgan_celeba --image "/PATH/TO/IMAGE" --style "target_attribute" + ``` + + - **Parameters** + + - image: Input image path. + + - style: Specify the attributes to be converted. The options are "Bald", "Bangs", "Black_Hair", "Blond_Hair", "Brown_Hair", "Bushy_Eyebrows", "Eyeglasses", "Gender", "Mouth_Slightly_Open", "Mustache", "No_Beard", "Pale_Skin", "Aged". You can choose one of the options. + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + + + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + + attgan = hub.Module(name="attgan_celeba") + + test_img_path = ["/PATH/TO/IMAGE"] + trans_attr = ["Bangs"] + + # set input dict + input_dict = {"image": test_img_path, "style": trans_attr} + + # execute predict and print the result + results = attgan.generate(data=input_dict) + print(results) + ``` + +- ### 3、API + + - ```python + def generate(data) + ``` + + - Style transfer API. + + - **Parameter** + + - data(list[dict]): Each element in the list is dict and each field is: + - image (list\[str\]): Each element in the list is the path of the image to be converted. + - style (list\[str\]): Each element in the list is a string, fill in the face attributes to be converted. + + - **Return** + - res (list\[str\]): Save path of the result. + + + +## IV. Release Note + +- 1.0.0 + + First release + + diff --git a/modules/image/Image_gan/cyclegan_cityscapes/README_en.md b/modules/image/Image_gan/cyclegan_cityscapes/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..dc310e8f1592773400e1d413df5425f82742ff00 --- /dev/null +++ b/modules/image/Image_gan/cyclegan_cityscapes/README_en.md @@ -0,0 +1,109 @@ +# cyclegan_cityscapes + +|Module Name|cyclegan_cityscapes| +| :--- | :---: | +|Category |Image generation| +|Network |CycleGAN| +|Dataset|Cityscapes| +|Fine-tuning supported or not |No| +|Module Size |33MB| +|Latest update date |2021-02-26| +|Data indicators|-| + + +## I. Basic Information + + +- ### Application Effect Display + + - Sample results: + +

+ +
+ Input image +
+ +
+ Output image +
+

+ + +- ### Module Introduction + + - CycleGAN belongs to Generative Adversarial Networks(GANs). Unlike traditional GANs that can only generate pictures in one direction, CycleGAN can simultaneously complete the style transfer of two domains. The PaddleHub Module is trained by Cityscapes dataset, and supports the conversion from real images to semantic segmentation results, and also supports conversion from semantic segmentation results to real images. + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.1.0 + +- ### 2、Installation + + - ```shell + $ hub install cyclegan_cityscapes==1.0.0 + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + - ```shell + $ hub run cyclegan_cityscapes --input_path "/PATH/TO/IMAGE" + ``` + + - **Parameters** + + - input_path: image path + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + + cyclegan = hub.Module(name="cyclegan_cityscapes") + + test_img_path = "/PATH/TO/IMAGE" + + # set input dict + input_dict = {"image": [test_img_path]} + + # execute predict and print the result + results = cyclegan.generate(data=input_dict) + print(results) + ``` + +- ### 3、API + + - ```python + def generate(data) + ``` + + - Style transfer API. + + - **Parameters** + + - data(list[dict]): Each element in the list is dict and each field is: + - image (list\[str\]): Image path. + + - **Return** + - res (list\[str\]): The list of style transfer results, where each element is dict and each field is: + - origin: Original input path. + - generated: Save path of images. + + + +## IV. Release Note + +* 1.0.0 + + First release + diff --git a/modules/image/Image_gan/gan/photopen/README.md b/modules/image/Image_gan/gan/photopen/README.md new file mode 100644 index 0000000000000000000000000000000000000000..73c80f9ad381b2adaeb7ab28d95c702b6cc55102 --- /dev/null +++ b/modules/image/Image_gan/gan/photopen/README.md @@ -0,0 +1,126 @@ +# photopen + +|模型名称|photopen| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|SPADEGenerator| +|数据集|coco_stuff| +|是否支持Fine-tuning|否| +|模型大小|74MB| +|最新更新日期|2021-12-14| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ +- ### 模型介绍 + + - 本模块采用一个像素风格迁移网络 Pix2PixHD,能够根据输入的语义分割标签生成照片风格的图片。为了解决模型归一化层导致标签语义信息丢失的问题,向 Pix2PixHD 的生成器网络中添加了 SPADE(Spatially-Adaptive + Normalization)空间自适应归一化模块,通过两个卷积层保留了归一化时训练的缩放与偏置参数的空间维度,以增强生成图片的质量。语义风格标签图像可以参考[coco_stuff数据集](https://github.com/nightrome/cocostuff)获取, 也可以通过[PaddleGAN repo中的该项目](https://github.com/PaddlePaddle/PaddleGAN/blob/87537ad9d4eeda17eaa5916c6a585534ab989ea8/docs/zh_CN/tutorials/photopen.md)来自定义生成图像进行体验。 + + + +## 二、安装 + +- ### 1、环境依赖 + - ppgan + +- ### 2、安装 + + - ```shell + $ hub install photopen + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a file + $ hub run photopen --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像生成模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="photopen") + input_path = ["/PATH/TO/IMAGE"] + # Read from a file + module.photo_transfer(paths=input_path, output_dir='./transfer_result/', use_gpu=True) + ``` + +- ### 3、API + + - ```python + photo_transfer(images=None, paths=None, output_dir='./transfer_result/', use_gpu=False, visualization=True): + ``` + - 图像转换生成API。 + + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
+ - paths (list\[str\]): 图片的路径;
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像转换生成服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m photopen + ``` + + - 这样就完成了一个图像转换生成的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/photopen" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install photopen==1.0.0 + ``` diff --git a/modules/image/Image_gan/gan/photopen/model.py b/modules/image/Image_gan/gan/photopen/model.py new file mode 100644 index 0000000000000000000000000000000000000000..4a0b0a4836b010ca4d72995c8857a8bb0ddd7aa2 --- /dev/null +++ b/modules/image/Image_gan/gan/photopen/model.py @@ -0,0 +1,62 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import cv2 +import numpy as np +import paddle +from PIL import Image +from PIL import ImageOps +from ppgan.models.generators import SPADEGenerator +from ppgan.utils.filesystem import load +from ppgan.utils.photopen import data_onehot_pro + + +class PhotoPenPredictor: + def __init__(self, weight_path, gen_cfg): + + # 初始化模型 + gen = SPADEGenerator( + gen_cfg.ngf, + gen_cfg.num_upsampling_layers, + gen_cfg.crop_size, + gen_cfg.aspect_ratio, + gen_cfg.norm_G, + gen_cfg.semantic_nc, + gen_cfg.use_vae, + gen_cfg.nef, + ) + gen.eval() + para = load(weight_path) + if 'net_gen' in para: + gen.set_state_dict(para['net_gen']) + else: + gen.set_state_dict(para) + + self.gen = gen + self.gen_cfg = gen_cfg + + def run(self, image): + sem = Image.fromarray(image).convert('L') + sem = sem.resize((self.gen_cfg.crop_size, self.gen_cfg.crop_size), Image.NEAREST) + sem = np.array(sem).astype('float32') + sem = paddle.to_tensor(sem) + sem = sem.reshape([1, 1, self.gen_cfg.crop_size, self.gen_cfg.crop_size]) + + one_hot = data_onehot_pro(sem, self.gen_cfg) + predicted = self.gen(one_hot) + pic = predicted.numpy()[0].reshape((3, 256, 256)).transpose((1, 2, 0)) + pic = ((pic + 1.) / 2. * 255).astype('uint8') + + return pic diff --git a/modules/image/Image_gan/gan/photopen/module.py b/modules/image/Image_gan/gan/photopen/module.py new file mode 100644 index 0000000000000000000000000000000000000000..f8a23e574c9823c52daf2e07a318e344b8220b70 --- /dev/null +++ b/modules/image/Image_gan/gan/photopen/module.py @@ -0,0 +1,133 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from ppgan.utils.config import get_config +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import PhotoPenPredictor +from .util import base64_to_cv2 +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo( + name="photopen", type="CV/style_transfer", author="paddlepaddle", author_email="", summary="", version="1.0.0") +class Photopen: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "photopen.pdparams") + cfg = get_config(os.path.join(self.directory, "photopen.yaml")) + self.network = PhotoPenPredictor(weight_path=self.pretrained_model, gen_cfg=cfg.predict) + + def photo_transfer(self, + images: list = None, + paths: list = None, + output_dir: str = './transfer_result/', + use_gpu: bool = False, + visualization: bool = True): + ''' + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR(read by cv2). + paths (list[str]): paths to images + + output_dir (str): the dir to save the results + use_gpu (bool): if True, use gpu to perform the computation, otherwise cpu. + visualization (bool): if True, save results in output_dir. + ''' + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + out = self.network.run(image) + results.append(out) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + out = self.network.run(image) + results.append(out) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + if out is not None: + cv2.imwrite(os.path.join(output_dir, 'output_{}.png'.format(i)), out[:, :, ::-1]) + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.photo_transfer( + paths=[self.args.input_path], + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.photo_transfer(images=images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='transfer_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/Image_gan/gan/photopen/photopen.yaml b/modules/image/Image_gan/gan/photopen/photopen.yaml new file mode 100644 index 0000000000000000000000000000000000000000..178f361736c06f1f816997dc4a52a9a6bd62bcc9 --- /dev/null +++ b/modules/image/Image_gan/gan/photopen/photopen.yaml @@ -0,0 +1,95 @@ +total_iters: 1 +output_dir: output_dir +checkpoints_dir: checkpoints + +model: + name: PhotoPenModel + generator: + name: SPADEGenerator + ngf: 24 + num_upsampling_layers: normal + crop_size: 256 + aspect_ratio: 1.0 + norm_G: spectralspadebatch3x3 + semantic_nc: 14 + use_vae: False + nef: 16 + discriminator: + name: MultiscaleDiscriminator + ndf: 128 + num_D: 4 + crop_size: 256 + label_nc: 12 + output_nc: 3 + contain_dontcare_label: True + no_instance: False + n_layers_D: 6 + criterion: + name: PhotoPenPerceptualLoss + crop_size: 224 + lambda_vgg: 1.6 + label_nc: 12 + contain_dontcare_label: True + batchSize: 1 + crop_size: 256 + lambda_feat: 10.0 + +dataset: + train: + name: PhotoPenDataset + content_root: test/coco_stuff + load_size: 286 + crop_size: 256 + num_workers: 0 + batch_size: 1 + test: + name: PhotoPenDataset_test + content_root: test/coco_stuff + load_size: 286 + crop_size: 256 + num_workers: 0 + batch_size: 1 + +lr_scheduler: # abundoned + name: LinearDecay + learning_rate: 0.0001 + start_epoch: 99999 + decay_epochs: 99999 + # will get from real dataset + iters_per_epoch: 1 + +optimizer: + lr: 0.0001 + optimG: + name: Adam + net_names: + - net_gen + beta1: 0.9 + beta2: 0.999 + optimD: + name: Adam + net_names: + - net_des + beta1: 0.9 + beta2: 0.999 + +log_config: + interval: 1 + visiual_interval: 1 + +snapshot_config: + interval: 1 + +predict: + name: SPADEGenerator + ngf: 24 + num_upsampling_layers: normal + crop_size: 256 + aspect_ratio: 1.0 + norm_G: spectralspadebatch3x3 + semantic_nc: 14 + use_vae: False + nef: 16 + contain_dontcare_label: True + label_nc: 12 + batchSize: 1 diff --git a/modules/image/Image_gan/gan/photopen/requirements.txt b/modules/image/Image_gan/gan/photopen/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..67e9bb6fa840355e9ed0d44b7134850f1fe22fe1 --- /dev/null +++ b/modules/image/Image_gan/gan/photopen/requirements.txt @@ -0,0 +1 @@ +ppgan diff --git a/modules/image/Image_gan/gan/photopen/util.py b/modules/image/Image_gan/gan/photopen/util.py new file mode 100644 index 0000000000000000000000000000000000000000..531a0ae0d487822a870ba7f09817e658967aff10 --- /dev/null +++ b/modules/image/Image_gan/gan/photopen/util.py @@ -0,0 +1,11 @@ +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/Image_gan/gan/stgan_bald/README.md b/modules/image/Image_gan/gan/stgan_bald/README.md index 45f23ae0dc22f917661ab82d2614dfcc106f10fd..1d504093bb716892f06bd162b9534da61c3d8aa5 100644 --- a/modules/image/Image_gan/gan/stgan_bald/README.md +++ b/modules/image/Image_gan/gan/stgan_bald/README.md @@ -25,8 +25,6 @@ - ### 1、环境依赖 - - paddlepaddle >= 1.8.2 - - paddlehub >= 1.8.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -38,7 +36,7 @@ | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/Image_gan/gan/stgan_bald/README_en.md b/modules/image/Image_gan/gan/stgan_bald/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..7146b1f5b7184a9969d07c31b9c94ee81f9f9094 --- /dev/null +++ b/modules/image/Image_gan/gan/stgan_bald/README_en.md @@ -0,0 +1,133 @@ +# stgan_bald + +|Module Name|stgan_bald| +| :--- | :---: | +|Category|image generation| +|Network|STGAN| +|Dataset|CelebA| +|Fine-tuning supported or not|No| +|Module Size|287MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Please refer to this [link](https://aistudio.baidu.com/aistudio/projectdetail/1145381) + +- ### Module Introduction + + - This module is based on STGAN model, trained on CelebA dataset, and can be used to predict bald appearance after 1, 3 and 5 years. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install stgan_bald + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + stgan_bald = hub.Module(name="stgan_bald") + result = stgan_bald.bald(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = stgan_bald.bald(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def bald(images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="bald_output") + ``` + + - Bald appearance generation API. + + - **Parameters** + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - visualization (bool): Whether to save the results as picture files; + - output_dir (str): save path of images; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[numpy.ndarray\]): result list,ndarray.shape is \[H, W, C\] + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of bald appearance generation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m stgan_bald + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/stgan_bald" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # save results + one_year =cv2.cvtColor(base64_to_cv2(r.json()["results"]['data_0']), cv2.COLOR_RGB2BGR) + three_year =cv2.cvtColor(base64_to_cv2(r.json()["results"]['data_1']), cv2.COLOR_RGB2BGR) + five_year =cv2.cvtColor(base64_to_cv2(r.json()["results"]['data_2']), cv2.COLOR_RGB2BGR) + cv2.imwrite("stgan_bald_server.png", one_year) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + - ```shell + $ hub install stgan_bald==1.0.0 + ``` diff --git a/modules/image/Image_gan/gan/stgan_bald/requirements.txt b/modules/image/Image_gan/gan/stgan_bald/requirements.txt index 2d8443d02d090d830649fbfacbc11c8cebea8d34..00a00fcc8e48e65538cf8b73b2fd4e1157362f20 100644 --- a/modules/image/Image_gan/gan/stgan_bald/requirements.txt +++ b/modules/image/Image_gan/gan/stgan_bald/requirements.txt @@ -1,2 +1 @@ -paddlepaddle>=1.8.4 paddlehub>=1.8.0 diff --git a/modules/image/Image_gan/gan/styleganv2_mixing/README.md b/modules/image/Image_gan/gan/styleganv2_mixing/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6623f0f6f4d40962b41ef409e736bb230617a913 --- /dev/null +++ b/modules/image/Image_gan/gan/styleganv2_mixing/README.md @@ -0,0 +1,143 @@ +# styleganv2_mixing + +|模型名称|styleganv2_mixing| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|StyleGAN V2| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|190MB| +|最新更新日期|2021-12-23| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入图像1 +
+ +
+ 输入图像2 +
+ +
+ 输出图像 +
+

+ +- ### 模型介绍 + + - StyleGAN V2 的任务是使用风格向量进行image generation,而Mixing模块则是利用其风格向量实现两张生成图像不同层次不同比例的混合。 + + + +## 二、安装 + +- ### 1、环境依赖 + - paddlepaddle >= 2.1.0 + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install styleganv2_mixing + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a file + $ hub run styleganv2_mixing --image1 "/PATH/TO/IMAGE1" --image2 "/PATH/TO/IMAGE2" + ``` + - 通过命令行方式实现人脸融合模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="styleganv2_mixing") + input_path = ["/PATH/TO/IMAGE"] + # Read from a file + module.generate(paths=input_path, direction_name = 'age', direction_offset = 5, output_dir='./editing_result/', use_gpu=True) + ``` + +- ### 3、API + + - ```python + generate(self, images=None, paths=None, weights = [0.5] * 18, output_dir='./mixing_result/', use_gpu=False, visualization=True) + ``` + - 人脸融合生成API。 + + - **参数** + - images (list[dict]): data of images, 每一个元素都为一个 dict,有关键字 image1, image2, 相应取值为: + - image1 (numpy.ndarray): 待融合的图片1,shape 为 \[H, W, C\],BGR格式;
+ - image2 (numpy.ndarray) : 待融合的图片2,shape为 \[H, W, C\],BGR格式;
+ - paths (list[str]): paths to images, 每一个元素都为一个dict, 有关键字 image1, image2, 相应取值为: + - image1 (str): 待融合的图片1的路径;
+ - image2 (str) : 待融合的图片2的路径;
+ - weights (list(float)): 融合的权重 + - images (list\[numpy.ndarray\]): 图片数据
+ - paths (list\[str\]): 图片路径;
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线人脸融合服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m styleganv2_mixing + ``` + + - 这样就完成了一个人脸融合的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[{'image1': cv2_to_base64(cv2.imread("/PATH/TO/IMAGE1")),'image2': cv2_to_base64(cv2.imread("/PATH/TO/IMAGE2"))}]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/styleganv2_mixing" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install styleganv2_mixing==1.0.0 + ``` diff --git a/modules/image/Image_gan/gan/styleganv2_mixing/basemodel.py b/modules/image/Image_gan/gan/styleganv2_mixing/basemodel.py new file mode 100644 index 0000000000000000000000000000000000000000..37eca73d4e14965a1f69e818744aa435a7e3600f --- /dev/null +++ b/modules/image/Image_gan/gan/styleganv2_mixing/basemodel.py @@ -0,0 +1,140 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import random +import numpy as np +import paddle +from ppgan.models.generators import StyleGANv2Generator +from ppgan.utils.download import get_path_from_url +from ppgan.utils.visual import make_grid, tensor2img, save_image + +model_cfgs = { + 'ffhq-config-f': { + 'model_urls': 'https://paddlegan.bj.bcebos.com/models/stylegan2-ffhq-config-f.pdparams', + 'size': 1024, + 'style_dim': 512, + 'n_mlp': 8, + 'channel_multiplier': 2 + }, + 'animeface-512': { + 'model_urls': 'https://paddlegan.bj.bcebos.com/models/stylegan2-animeface-512.pdparams', + 'size': 512, + 'style_dim': 512, + 'n_mlp': 8, + 'channel_multiplier': 2 + } +} + + +@paddle.no_grad() +def get_mean_style(generator): + mean_style = None + + for i in range(10): + style = generator.mean_latent(1024) + + if mean_style is None: + mean_style = style + + else: + mean_style += style + + mean_style /= 10 + return mean_style + + +@paddle.no_grad() +def sample(generator, mean_style, n_sample): + image = generator( + [paddle.randn([n_sample, generator.style_dim])], + truncation=0.7, + truncation_latent=mean_style, + )[0] + + return image + + +@paddle.no_grad() +def style_mixing(generator, mean_style, n_source, n_target): + source_code = paddle.randn([n_source, generator.style_dim]) + target_code = paddle.randn([n_target, generator.style_dim]) + + resolution = 2**((generator.n_latent + 2) // 2) + + images = [paddle.ones([1, 3, resolution, resolution]) * -1] + + source_image = generator([source_code], truncation_latent=mean_style, truncation=0.7)[0] + target_image = generator([target_code], truncation_latent=mean_style, truncation=0.7)[0] + + images.append(source_image) + + for i in range(n_target): + image = generator( + [target_code[i].unsqueeze(0).tile([n_source, 1]), source_code], + truncation_latent=mean_style, + truncation=0.7, + )[0] + images.append(target_image[i].unsqueeze(0)) + images.append(image) + + images = paddle.concat(images, 0) + + return images + + +class StyleGANv2Predictor: + def __init__(self, + output_path='output_dir', + weight_path=None, + model_type=None, + seed=None, + size=1024, + style_dim=512, + n_mlp=8, + channel_multiplier=2): + self.output_path = output_path + + if weight_path is None: + if model_type in model_cfgs.keys(): + weight_path = get_path_from_url(model_cfgs[model_type]['model_urls']) + size = model_cfgs[model_type].get('size', size) + style_dim = model_cfgs[model_type].get('style_dim', style_dim) + n_mlp = model_cfgs[model_type].get('n_mlp', n_mlp) + channel_multiplier = model_cfgs[model_type].get('channel_multiplier', channel_multiplier) + checkpoint = paddle.load(weight_path) + else: + raise ValueError('Predictor need a weight path or a pretrained model type') + else: + checkpoint = paddle.load(weight_path) + + self.generator = StyleGANv2Generator(size, style_dim, n_mlp, channel_multiplier) + self.generator.set_state_dict(checkpoint) + self.generator.eval() + + if seed is not None: + paddle.seed(seed) + random.seed(seed) + np.random.seed(seed) + + def run(self, n_row=3, n_col=5): + os.makedirs(self.output_path, exist_ok=True) + mean_style = get_mean_style(self.generator) + + img = sample(self.generator, mean_style, n_row * n_col) + save_image(tensor2img(make_grid(img, nrow=n_col)), f'{self.output_path}/sample.png') + + for j in range(2): + img = style_mixing(self.generator, mean_style, n_col, n_row) + save_image(tensor2img(make_grid(img, nrow=n_col + 1)), f'{self.output_path}/sample_mixing_{j}.png') diff --git a/modules/image/Image_gan/gan/styleganv2_mixing/model.py b/modules/image/Image_gan/gan/styleganv2_mixing/model.py new file mode 100644 index 0000000000000000000000000000000000000000..5e2287df0c7bb22854e56a023f2278dd7981360c --- /dev/null +++ b/modules/image/Image_gan/gan/styleganv2_mixing/model.py @@ -0,0 +1,47 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import cv2 +import numpy as np +import paddle + +from .basemodel import StyleGANv2Predictor + + +def make_image(tensor): + return (((tensor.detach() + 1) / 2 * 255).clip(min=0, max=255).transpose((0, 2, 3, 1)).numpy().astype('uint8')) + + +class StyleGANv2MixingPredictor(StyleGANv2Predictor): + @paddle.no_grad() + def run(self, latent1, latent2, weights=[0.5] * 18): + + latent1 = paddle.to_tensor(latent1).unsqueeze(0) + latent2 = paddle.to_tensor(latent2).unsqueeze(0) + assert latent1.shape[1] == latent2.shape[1] == len( + weights), 'latents and their weights should have the same level nums.' + mix_latent = [] + for i, weight in enumerate(weights): + mix_latent.append(latent1[:, i:i + 1] * weight + latent2[:, i:i + 1] * (1 - weight)) + mix_latent = paddle.concat(mix_latent, 1) + latent_n = paddle.concat([latent1, latent2, mix_latent], 0) + generator = self.generator + img_gen, _ = generator([latent_n], input_is_latent=True, randomize_noise=False) + imgs = make_image(img_gen) + src_img1 = imgs[0] + src_img2 = imgs[1] + dst_img = imgs[2] + + return src_img1, src_img2, dst_img diff --git a/modules/image/Image_gan/gan/styleganv2_mixing/module.py b/modules/image/Image_gan/gan/styleganv2_mixing/module.py new file mode 100644 index 0000000000000000000000000000000000000000..fbc10091c3ef86676f520c20b2d1704294c36fe1 --- /dev/null +++ b/modules/image/Image_gan/gan/styleganv2_mixing/module.py @@ -0,0 +1,161 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import copy + +import paddle +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable, serving +import numpy as np +import cv2 +from skimage.io import imread +from skimage.transform import rescale, resize + +from .model import StyleGANv2MixingPredictor +from .util import base64_to_cv2 + + +@moduleinfo( + name="styleganv2_mixing", + type="CV/style_transfer", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class styleganv2_mixing: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "stylegan2-ffhq-config-f.pdparams") + self.network = StyleGANv2MixingPredictor(weight_path=self.pretrained_model, model_type='ffhq-config-f') + self.pixel2style2pixel_module = hub.Module(name='pixel2style2pixel') + + def generate(self, + images=None, + paths=None, + weights=[0.5] * 18, + output_dir='./mixing_result/', + use_gpu=False, + visualization=True): + ''' + images (list[dict]): data of images, each element is a dict,the keys are as below: + - image1 (numpy.ndarray): image1 to be mixed,shape is \[H, W, C\],BGR format;
+ - image2 (numpy.ndarray) : image2 to be mixed,shape is \[H, W, C\],BGR format;
+ paths (list[str]): paths to images, each element is a dict,the keys are as below: + - image1 (str): path to image1;
+ - image2 (str) : path to image2;
+ weights (list(float)): weight for mixing + output_dir: the dir to save the results + use_gpu: if True, use gpu to perform the computation, otherwise cpu. + visualization: if True, save results in output_dir. + ''' + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + if images != None: + for image_dict in images: + image1 = image_dict['image1'][:, :, ::-1] + image2 = image_dict['image2'][:, :, ::-1] + _, latent1 = self.pixel2style2pixel_module.network.run(image1) + _, latent2 = self.pixel2style2pixel_module.network.run(image2) + results.append(self.network.run(latent1, latent2, weights)) + + if paths != None: + for path_dict in paths: + path1 = path_dict['image1'] + path2 = path_dict['image2'] + image1 = cv2.imread(path1)[:, :, ::-1] + image2 = cv2.imread(path2)[:, :, ::-1] + _, latent1 = self.pixel2style2pixel_module.network.run(image1) + _, latent2 = self.pixel2style2pixel_module.network.run(image2) + results.append(self.network.run(latent1, latent2, weights)) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + if out is not None: + cv2.imwrite(os.path.join(output_dir, 'src_{}_image1.png'.format(i)), out[0][:, :, ::-1]) + cv2.imwrite(os.path.join(output_dir, 'src_{}_image2.png'.format(i)), out[1][:, :, ::-1]) + cv2.imwrite(os.path.join(output_dir, 'dst_{}.png'.format(i)), out[2][:, :, ::-1]) + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.generate( + paths=[{ + 'image1': self.args.image1, + 'image2': self.args.image2 + }], + weights=self.args.weights, + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = copy.deepcopy(images) + for image in images_decode: + image['image1'] = base64_to_cv2(image['image1']) + image['image2'] = base64_to_cv2(image['image2']) + results = self.generate(images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='mixing_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--image1', type=str, help="path to input image1.") + self.arg_input_group.add_argument('--image2', type=str, help="path to input image2.") + self.arg_input_group.add_argument( + "--weights", + type=float, + nargs="+", + default=[0.5] * 18, + help="different weights at each level of two latent codes") diff --git a/modules/image/Image_gan/gan/styleganv2_mixing/requirements.txt b/modules/image/Image_gan/gan/styleganv2_mixing/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..67e9bb6fa840355e9ed0d44b7134850f1fe22fe1 --- /dev/null +++ b/modules/image/Image_gan/gan/styleganv2_mixing/requirements.txt @@ -0,0 +1 @@ +ppgan diff --git a/modules/image/Image_gan/gan/styleganv2_mixing/util.py b/modules/image/Image_gan/gan/styleganv2_mixing/util.py new file mode 100644 index 0000000000000000000000000000000000000000..b88ac3562b74cadc1d4d6459a56097ca4a938a0b --- /dev/null +++ b/modules/image/Image_gan/gan/styleganv2_mixing/util.py @@ -0,0 +1,10 @@ +import base64 +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/Image_gan/stargan_celeba/README_en.md b/modules/image/Image_gan/stargan_celeba/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..a79a091aa017e1caafae05d142bd48b29cf61aa1 --- /dev/null +++ b/modules/image/Image_gan/stargan_celeba/README_en.md @@ -0,0 +1,101 @@ +# stargan_celeba + +|Module Name|stargan_celeba| +| :--- | :---: | +|Category|image generation| +|Network|STGAN| +|Dataset|Celeba| +|Fine-tuning supported or not|No| +|Module Size |33MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I. Basic Information + +- ### Application Effect Display + - Sample results: + +

+
+ The image attributes are: origial image, Black_Hair, Blond_Hair, Brown_Hair, Male, Aged
+

+ + +- ### Module Introduction + + - STGAN takes the original attribute and the target attribute as input, and proposes STUs (Selective transfer units) to select and modify features of the encoder. The PaddleHub Module is trained one Celeba dataset and currently supports attributes of "Black_Hair", "Blond_Hair", "Brown_Hair", "Female", "Male", "Aged". + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.5.2 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install stargan_celeba==1.0.0 + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run stargan_celeba --image "/PATH/TO/IMAGE" --style "target_attribute" + ``` + + - **Parameters** + + - image: image path + + - style: Specify the attributes to be converted. The options are "Black_Hair", "Blond_Hair", "Brown_Hair", "Female", "Male", "Aged". You can choose one of the options. + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + + stargan = hub.Module(name="stargan_celeba") + test_img_path = ["/PATH/TO/IMAGE"] + trans_attr = ["Blond_Hair"] + + # set input dict + input_dict = {"image": test_img_path, "style": trans_attr} + + # execute predict and print the result + results = stargan.generate(data=input_dict) + print(results) + ``` + +- ### 3、API + + - ```python + def generate(data) + ``` + + - Style transfer API. + + - **Parameter** + + - data(list[dict]): each element in the list is dict and each field is: + - image (list\[str\]): Each element in the list is the path of the image to be converted. + - style (list\[str\]): Each element in the list is a string, fill in the face attributes to be converted. + + - **Return** + - res (list\[str\]): Save path of the result. + +## IV. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/Image_gan/stgan_celeba/README_en.md b/modules/image/Image_gan/stgan_celeba/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..c48718c792d5678a2a1980d9502e8f813e896ed7 --- /dev/null +++ b/modules/image/Image_gan/stgan_celeba/README_en.md @@ -0,0 +1,105 @@ +# stgan_celeba + +|Module Name|stgan_celeba| +| :--- | :---: | +|Category|image generation| +|Network|STGAN| +|Dataset|Celeba| +|Fine-tuning supported or not|No| +|Module Size |287MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I. Basic Information + +- ### Application Effect Display + - Sample results: + +

+
+ The image attributes are: original image, Bald, Bangs, Black_Hair, Blond_Hair, Brown_Hair, Bushy_Eyebrows, Eyeglasses, Gender, Mouth_Slightly_Open, Mustache, No_Beard, Pale_Skin, Aged
+

+ + +- ### Module Introduction + + - STGAN takes the original attribute and the target attribute as input, and proposes STUs (Selective transfer units) to select and modify features of the encoder. The PaddleHub Module is trained one Celeba dataset and currently supports attributes of "Bald", "Bangs", "Black_Hair", "Blond_Hair", "Brown_Hair", "Bushy_Eyebrows", "Eyeglasses", "Gender", "Mouth_Slightly_Open", "Mustache", "No_Beard", "Pale_Skin", "Aged". + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.5.2 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install stgan_celeba==1.0.0 + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run stgan_celeba --image "/PATH/TO/IMAGE" --info "original_attributes" --style "target_attribute" + ``` + - **Parameters** + + - image: Image path + + - info: Attributes of original image, must fill in gender( "Male" or "Female").The options are "Bald", "Bangs", "Black_Hair", "Blond_Hair", "Brown_Hair", "Bushy_Eyebrows", "Eyeglasses", "Mouth_Slightly_Open", "Mustache", "No_Beard", "Pale_Skin", "Aged". For example, the input picture is a girl with black hair, then fill in as "Female,Black_Hair". + + - style: Specify the attributes to be converted. The options are "Bald", "Bangs", "Black_Hair", "Blond_Hair", "Brown_Hair", "Bushy_Eyebrows", "Eyeglasses", "Gender", "Mouth_Slightly_Open", "Mustache", "No_Beard", "Pale_Skin", "Aged". You can choose one of the options. + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + + stgan = hub.Module(name="stgan_celeba") + + test_img_path = ["/PATH/TO/IMAGE"] + org_info = ["Female,Black_Hair"] + trans_attr = ["Bangs"] + + # set input dict + input_dict = {"image": test_img_path, "style": trans_attr, "info": org_info} + + # execute predict and print the result + results = stgan.generate(data=input_dict) + print(results) + ``` + +- ### 3、API + + - ```python + def generate(data) + ``` + + - Style transfer API. + + - **Parameter** + + - data(list[dict]): Each element in the list is dict and each field is: + - image (list\[str\]): Each element in the list is the path of the image to be converted. + - style (list\[str\]): Each element in the list is a string, fill in the face attributes to be converted. + - info (list\[str\]): Represents the face attributes of the original image. Different attributes are separated by commas. + + + - **Return** + - res (list\[str\]): Save path of the result. + +## IV. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/Image_gan/style_transfer/ID_Photo_GEN/README_en.md b/modules/image/Image_gan/style_transfer/ID_Photo_GEN/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..ba06c5e7bcf9b7b2291c48958756b84f4cc3234d --- /dev/null +++ b/modules/image/Image_gan/style_transfer/ID_Photo_GEN/README_en.md @@ -0,0 +1,98 @@ +# ID_Photo_GEN + +|Module Name |ID_Photo_GEN| +| :--- | :---: | +|Category|Image generation| +|Network|HRNet_W18| +|Dataset |-| +|Fine-tuning supported or not |No| +|Module Size|28KB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ + +- ### Module Introduction + + - This model is based on face_landmark_localization and FCN_HRNet_W18_Face_Seg. It can generate ID photos with white, red and blue background + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install ID_Photo_GEN + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import cv2 + import paddlehub as hub + + model = hub.Module(name='ID_Photo_GEN') + + result = model.Photo_GEN( + images=[cv2.imread('/PATH/TO/IMAGE')], + paths=None, + batch_size=1, + output_dir='output', + visualization=True, + use_gpu=False) + ``` + +- ### 2、API + + - ```python + def Photo_GEN( + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + use_gpu=False): + ``` + + - Prediction API, generating ID photos. + + - **Parameter** + * images (list[np.ndarray]): Image data, ndarray.shape is in the format [H, W, C], BGR. + * paths (list[str]): Image path + * batch_size (int): Batch size + * output_dir (str): Save path of images, output by default. + * visualization (bool): Whether to save the recognition results as picture files. + * use_gpu (bool): Use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + + **NOTE:** Choose one of `paths` and `images` to provide input data. + + - **Return** + + * results (list[dict{"write":np.ndarray,"blue":np.ndarray,"red":np.ndarray}]): The list of generation results. + + +## IV. Release Note + +- 1.0.0 + + First release \ No newline at end of file diff --git a/modules/image/Image_gan/style_transfer/Photo2Cartoon/README.md b/modules/image/Image_gan/style_transfer/Photo2Cartoon/README.md index 59626d007fea4170afbf6df694c3f67e9d80cf19..160d9d3b3d87568d2f32851d931aa56bdce2d534 100644 --- a/modules/image/Image_gan/style_transfer/Photo2Cartoon/README.md +++ b/modules/image/Image_gan/style_transfer/Photo2Cartoon/README.md @@ -1,96 +1,96 @@ -# Photo2Cartoon - -|模型名称|Photo2Cartoon| -| :--- | :---: | -|类别|图像 - 图像生成| -|网络|U-GAT-IT| -|数据集|cartoon_data| -|是否支持Fine-tuning|否| -|模型大小|205MB| -|最新更新日期|2021-02-26| -|数据指标|-| - - -## 一、模型基本信息 - -- ### 应用效果展示 - - 样例结果示例: -

-
-

- - - -- ### 模型介绍 - - - 本模型封装自[小视科技photo2cartoon项目的paddlepaddle版本](https://github.com/minivision-ai/photo2cartoon-paddle)。 - - -## 二、安装 - -- ### 1、环境依赖 - - - paddlepaddle >= 2.0.0 - - - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) - -- ### 2、安装 - - - ```shell - $ hub install Photo2Cartoon - ``` - - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) - | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) - -## 三、模型API预测 - -- ### 1、代码示例 - - - ```python - import paddlehub as hub - import cv2 - - model = hub.Module(name="Photo2Cartoon") - result = model.Cartoon_GEN(images=[cv2.imread('/PATH/TO/IMAGE')]) - # or - # result = model.Cartoon_GEN(paths=['/PATH/TO/IMAGE']) - ``` - -- ### 2、API - - - ```python - def Cartoon_GEN(images=None, - paths=None, - batch_size=1, - output_dir='output', - visualization=False, - use_gpu=False): - ``` - - - 人像卡通化图像生成API。 - - - **参数** - - - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
- - paths (list\[str\]): 输入图像路径;
- - output\_dir (str): 图片的保存路径,默认设为 output;
- - batch_size (int) : batch大小;
- - visualization (bool) : 是否将结果保存为图片文件;;
- - use_gpu (bool) : 是否使用 GPU 进行推理。 - - **NOTE:** paths和images两个参数选择其一进行提供数据 - - - **返回** - - res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\] - - - -## 四、更新历史 - -* 1.0.0 - - 初始发布 - - - ```shell - $ hub install Photo2Cartoon==1.0.0 +# Photo2Cartoon + +|模型名称|Photo2Cartoon| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|U-GAT-IT| +|数据集|cartoon_data| +|是否支持Fine-tuning|否| +|模型大小|205MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+
+

+ + + +- ### 模型介绍 + + - 本模型封装自[小视科技photo2cartoon项目的paddlepaddle版本](https://github.com/minivision-ai/photo2cartoon-paddle)。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install Photo2Cartoon + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="Photo2Cartoon") + result = model.Cartoon_GEN(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.Cartoon_GEN(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def Cartoon_GEN(images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + use_gpu=False): + ``` + + - 人像卡通化图像生成API。 + + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
+ - paths (list\[str\]): 输入图像路径;
+ - output\_dir (str): 图片的保存路径,默认设为 output;
+ - batch_size (int) : batch大小;
+ - visualization (bool) : 是否将结果保存为图片文件;;
+ - use_gpu (bool) : 是否使用 GPU 进行推理。 + + **NOTE:** paths和images两个参数选择其一进行提供数据 + + - **返回** + - res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\] + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install Photo2Cartoon==1.0.0 ``` diff --git a/modules/image/Image_gan/style_transfer/Photo2Cartoon/README_en.md b/modules/image/Image_gan/style_transfer/Photo2Cartoon/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..db345676faba055bc12357b648f09c36a25dff81 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/Photo2Cartoon/README_en.md @@ -0,0 +1,95 @@ +# Photo2Cartoon + +|Module Name|Photo2Cartoon| +| :--- | :---: | +|Category|image generation| +|Network|U-GAT-IT| +|Dataset|cartoon_data| +|Fine-tuning supported or not|No| +|Module Size|205MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+
+

+ + + +- ### Module Introduction + + - This module encapsulates project [photo2cartoon](https://github.com/minivision-ai/photo2cartoon-paddle). + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install Photo2Cartoon + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="Photo2Cartoon") + result = model.Cartoon_GEN(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.Cartoon_GEN(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def Cartoon_GEN(images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + use_gpu=False): + ``` + + - Cartoon style generation API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - output_dir (str): save path of images; + - batch_size (int): the size of batch; + - visualization (bool): Whether to save the results as picture files; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + - res (list\[numpy.ndarray\]): result list,ndarray.shape is \[H, W, C\] + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install Photo2Cartoon==1.0.0 + ``` diff --git a/modules/image/Image_gan/style_transfer/U2Net_Portrait/README.md b/modules/image/Image_gan/style_transfer/U2Net_Portrait/README.md index 4175ec598c6e02a65a744a9b26dd7c00aa2efd43..c387e12f68ecfcbc5ba744690bc71f3220cf5469 100644 --- a/modules/image/Image_gan/style_transfer/U2Net_Portrait/README.md +++ b/modules/image/Image_gan/style_transfer/U2Net_Portrait/README.md @@ -1,103 +1,103 @@ -# U2Net_Portrait - -|模型名称|U2Net_Portrait| -| :--- | :---: | -|类别|图像 - 图像生成| -|网络|U^2Net| -|数据集|-| -|是否支持Fine-tuning|否| -|模型大小|254MB| -|最新更新日期|2021-02-26| -|数据指标|-| - - -## 一、模型基本信息 - -- ### 应用效果展示 - - 样例结果示例: -

- -
- 输入图像 -
- -
- 输出图像 -
-

- - -- ### 模型介绍 - - - U2Net_Portrait 可以用于提取人脸的素描结果。 - - -## 二、安装 - -- ### 1、环境依赖 - - - paddlepaddle >= 2.0.0 - - - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) - -- ### 2、安装 - - - ```shell - $ hub install U2Net_Portrait - ``` - - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) - | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) - -## 三、模型API预测 - -- ### 1、预测代码示例 - - - ```python - import paddlehub as hub - import cv2 - - model = hub.Module(name="U2Net_Portrait") - result = model.Portrait_GEN(images=[cv2.imread('/PATH/TO/IMAGE')]) - # or - # result = model.Portrait_GEN(paths=['/PATH/TO/IMAGE']) - ``` - -- ### 2、API - - - ```python - def Portrait_GEN(images=None, - paths=None, - scale=1, - batch_size=1, - output_dir='output', - face_detection=True, - visualization=False): - ``` - - - 人脸画像生成API。 - - - **参数** - - - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
- - paths (list\[str\]): 输入图像路径;
- - scale (float) : 缩放因子(与face_detection相关联);
- - batch_size (int) : batch大小;
- - output\_dir (str): 图片的保存路径,默认设为 output;
- - visualization (bool) : 是否将结果保存为图片文件;;
- - **NOTE:** paths和images两个参数选择其一进行提供数据 - - - **返回** - - res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\] - - - -## 四、更新历史 - -* 1.0.0 - - 初始发布 - - - ```shell - $ hub install U2Net_Portrait==1.0.0 - ``` +# U2Net_Portrait + +|模型名称|U2Net_Portrait| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|U^2Net| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|254MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入图像 +
+ +
+ 输出图像 +
+

+ + +- ### 模型介绍 + + - U2Net_Portrait 可以用于提取人脸的素描结果。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install U2Net_Portrait + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="U2Net_Portrait") + result = model.Portrait_GEN(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.Portrait_GEN(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def Portrait_GEN(images=None, + paths=None, + scale=1, + batch_size=1, + output_dir='output', + face_detection=True, + visualization=False): + ``` + + - 人脸画像生成API。 + + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
+ - paths (list\[str\]): 输入图像路径;
+ - scale (float) : 缩放因子(与face_detection相关联);
+ - batch_size (int) : batch大小;
+ - output\_dir (str): 图片的保存路径,默认设为 output;
+ - visualization (bool) : 是否将结果保存为图片文件;;
+ + **NOTE:** paths和images两个参数选择其一进行提供数据 + + - **返回** + - res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\] + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install U2Net_Portrait==1.0.0 + ``` diff --git a/modules/image/Image_gan/style_transfer/U2Net_Portrait/README_en.md b/modules/image/Image_gan/style_transfer/U2Net_Portrait/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..dcd7d77290b4fe243cd02a7382a6c1da0f60566c --- /dev/null +++ b/modules/image/Image_gan/style_transfer/U2Net_Portrait/README_en.md @@ -0,0 +1,102 @@ +# U2Net_Portrait + +|Module Name|U2Net_Portrait| +| :--- | :---: | +|Category|image generation| +|Network|U^2Net| +|Dataset|-| +|Fine-tuning supported or not|No| +|Module Size|254MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+ Input image +
+ +
+ Output image +
+

+ + +- ### Module Introduction + + - U2Net_Portrait can be used to create a face portrait. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install U2Net_Portrait + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="U2Net_Portrait") + result = model.Portrait_GEN(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.Portrait_GEN(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def Portrait_GEN(images=None, + paths=None, + scale=1, + batch_size=1, + output_dir='output', + face_detection=True, + visualization=False): + ``` + + - Portrait generation API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - scale (float) : scale for resizing image;
+ - batch_size (int): the size of batch; + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + - res (list\[numpy.ndarray\]): result list,ndarray.shape is \[H, W, C\] + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install U2Net_Portrait==1.0.0 + ``` diff --git a/modules/image/Image_gan/style_transfer/UGATIT_100w/README.md b/modules/image/Image_gan/style_transfer/UGATIT_100w/README.md index 939f2021f0fb8cc20edad0703bde17132201a7d5..0a3ebeabf8e2a35081102ea866db10d5b293e22d 100644 --- a/modules/image/Image_gan/style_transfer/UGATIT_100w/README.md +++ b/modules/image/Image_gan/style_transfer/UGATIT_100w/README.md @@ -50,7 +50,7 @@ ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/Image_gan/style_transfer/UGATIT_100w/README_en.md b/modules/image/Image_gan/style_transfer/UGATIT_100w/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..a4e1ee688fe4be12653dad6479cb302f2f027719 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_100w/README_en.md @@ -0,0 +1,139 @@ +# UGATIT_100w + +|Module Name|UGATIT_100w| +| :--- | :---: | +|Category|image generation| +|Network|U-GAT-IT| +|Dataset|selfie2anime| +|Fine-tuning supported or not|No| +|Module Size|41MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+ Input image +
+ +
+ Output image +
+

+ + +- ### Module Introduction + + - UGATIT is a model for style transfer. This module can be used to transfer a face image to cartoon style. For more information, please refer to [UGATIT-Paddle Project](https://github.com/miraiwk/UGATIT-paddle). + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install UGATIT_100w + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="UGATIT_100w") + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer(images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False) + ``` + + - Style transfer API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - visualization (bool): Whether to save the results as picture files; + - output_dir (str): save path of images; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + - res (list\[numpy.ndarray\]): result list,ndarray.shape is \[H, W, C\] + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of style transfer. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m UGATIT_100w + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/UGATIT_100w" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install UGATIT_100w==1.0.0 + ``` diff --git a/modules/image/Image_gan/style_transfer/UGATIT_83w/README_en.md b/modules/image/Image_gan/style_transfer/UGATIT_83w/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..b4afce178b13910e0de350bf2fd20c1532aad355 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_83w/README_en.md @@ -0,0 +1,134 @@ +# UGATIT_83w + +|Module Name|UGATIT_83w| +| :--- | :---: | +|Category|Image editing| +|Network |U-GAT-IT| +|Dataset|selfie2anime| +|Fine-tuning supported or not|No| +|Module Size|41MB| +|Latest update date |2021-02-26| +|Data indicators|-| + + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ + + +- ### Module Introduction + + - UGATIT can transfer the input face image into the anime style. + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.2 + + - paddlehub >= 1.8.0 + +- ### 2、Installation + + - ```shell + $ hub install UGATIT_83w + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import cv2 + import paddlehub as hub + + model = hub.Module(name='UGATIT_83w', use_gpu=False) + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False + ) + ``` + + - Style transfer API, convert the input face image into anime style. + + - **Parameters** + * images (list\[numpy.ndarray\]): Image data, ndarray.shape is in the format [H, W, C], BGR. + * paths (list\[str\]): image path,default is None; + * batch\_size (int): Batch size, default is 1; + * visualization (bool): Whether to save the recognition results as picture files, default is False. + * output\_dir (str): Save path of images, `output` by default. + + **NOTE:** Choose one of `paths` and `images` to provide data. + + - **Return** + + - res (list\[numpy.ndarray\]): Result, ndarray.shape is in the format [H, W, C]. + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of Style transfer task. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m UGATIT_83w + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/UGATIT_83w" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + +## V. Release Note + +- 1.0.0 + + First release \ No newline at end of file diff --git a/modules/image/Image_gan/style_transfer/UGATIT_92w/README_en.md b/modules/image/Image_gan/style_transfer/UGATIT_92w/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..ef7a22a493e58d3745b383b60e0dccc44ae1fdf9 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_92w/README_en.md @@ -0,0 +1,134 @@ +# UGATIT_92w + +|Module Name|UGATIT_92w| +| :--- | :---: | +|Category|Image editing| +|Network |U-GAT-IT| +|Dataset|selfie2anime| +|Fine-tuning supported or not|No| +|Module Size|41MB| +|Latest update date |2021-02-26| +|Data indicators|-| + + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ + + +- ### Module Introduction + + - UGATIT can transfer the input face image into the anime style. + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.2 + + - paddlehub >= 1.8.0 + +- ### 2、Installation + + - ```shell + $ hub install UGATIT_92w + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import cv2 + import paddlehub as hub + + model = hub.Module(name='UGATIT_92w', use_gpu=False) + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False + ) + ``` + + - Style transfer API, convert the input face image into anime style. + + - **Parameters** + * images (list\[numpy.ndarray\]): Image data, ndarray.shape is in the format [H, W, C], BGR. + * paths (list\[str\]): Image path,default is None; + * batch\_size (int): Batch size, default is 1; + * visualization (bool): Whether to save the recognition results as picture files, default is False. + * output\_dir (str): save path of images, `output` by default. + + **NOTE:** Choose one of `paths` and `images` to provide input data. + + - **Return** + + - res (list\[numpy.ndarray\]): Style tranfer result, ndarray.shape is in the format [H, W, C]. + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of Style transfer task. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m UGATIT_92w + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/UGATIT_92w" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + +## V. Release Note + +- 1.0.0 + + First release \ No newline at end of file diff --git a/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/README.md b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/README.md index 78393fe38c39f241b105272cb5cc1d827440e7c6..b4f1d91a522ea9325e7bb839d17a11c62e0443d9 100644 --- a/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/README.md +++ b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/README.md @@ -51,7 +51,7 @@ ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/README_en.md b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..349aecfc2da96b73c638180e3bf41050d5126a29 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/README_en.md @@ -0,0 +1,149 @@ +# animegan_v1_hayao_60 + +|Module Name|animegan_v1_hayao_60| +| :--- | :---: | +|Category|image generation| +|Network|AnimeGAN| +|Dataset|The Wind Rises| +|Fine-tuning supported or not|No| +|Module Size|18MB| +|Latest update date|2021-07-30| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+ Input Image +
+ +
+ Output Image +
+

+ + + +- ### Module Introduction + + - AnimeGAN V1 is a style transfer model, which can transfer a image style to Miyazaki carton style. For more information, please refer to [AnimeGAN V1 Project](https://github.com/TachibanaYoshino/AnimeGAN). + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install animegan_v1_hayao_60 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="animegan_v1_hayao_60") + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer(images=None, + paths=None, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024) + ``` + + - Style transfer API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + - min\_size (int): min size of image shape,default is 32; + - max\_size (int): max size of image shape,default is 1024. + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + - res (list\[numpy.ndarray\]): result list,ndarray.shape is \[H, W, C\] + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of style transfer. +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m animegan_v1_hayao_60 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/animegan_v1_hayao_60" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Adapt to paddlehub2.0 + +* 1.0.2 + + Delete optional parameter batch_size + + - ```shell + $ hub install animegan_v1_hayao_60==1.0.2 + ``` diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/README.md index 8ea008138f591b03fac2e7a922578fcbc5b4c0cb..7b0a66a3c6086c24f5bb949952dd96faee57352c 100644 --- a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/README.md +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/README.md @@ -49,7 +49,7 @@ ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/README_en.md b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..b5de4cb5e0a585e68482224372a1345601a41d37 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/README_en.md @@ -0,0 +1,148 @@ +# animegan_v2_hayao_64 + +|Module Name|animegan_v2_hayao_64| +| :--- | :---: | +|Category|image generation| +|Network|AnimeGAN| +|Dataset|The Wind Rises| +|Fine-tuning supported or not|No| +|Module Size|9.4MB| +|Latest update date|2021-07-30| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+ Input image +
+ +
+ Output image +
+

+ +- ### Module Introduction + + - AnimeGAN V2 is a style transfer model, which can transfer a image style to Miyazaki carton style. For more information, please refer to [AnimeGAN V2 Project](https://github.com/TachibanaYoshino/AnimeGANv2). + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install animegan_v2_hayao_64 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="animegan_v2_hayao_64") + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def style_transfer(images=None, + paths=None, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024) + ``` + + - Style transfer API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + - min\_size (int): min size of image shape,default is 32; + - max\_size (int): max size of image shape,default is 1024. + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + - res (list\[numpy.ndarray\]): result list,ndarray.shape is \[H, W, C\] + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of style transfer. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m animegan_v2_hayao_64 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/animegan_v2_hayao_64" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Adapt to paddlehub2.0 + +* 1.0.2 + + Delete optional parameter batch_size + + - ```shell + $ hub install animegan_v2_hayao_64==1.0.2 + ``` diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/README.md index 4aa09781dcccfa0b56e33d7ad31c91dfa475c1a8..060a33f252e9fcafeec5706eb5bbb93cb06ae103 100644 --- a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/README.md +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/README.md @@ -51,7 +51,7 @@ ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/README_en.md b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..91f959ff88edc0d7de9809b47b4fdcbbcff9f40d --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/README_en.md @@ -0,0 +1,148 @@ +# animegan_v2_hayao_99 + +|Module Name|animegan_v2_hayao_99| +| :--- | :---: | +|Category|image generation| +|Network|AnimeGAN| +|Dataset|The Wind Rises| +|Fine-tuning supported or not|No| +|Module Size|9.4MB| +|Latest update date|2021-07-30| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+ Input image +
+ +
+ Output image +
+

+ + +- ### Module Introduction + + - AnimeGAN V2 is a style transfer model, which can transfer a image style to Miyazaki carton style. For more information, please refer to [AnimeGAN V2 Project](https://github.com/TachibanaYoshino/AnimeGANv2). + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install animegan_v2_hayao_99 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="animegan_v2_hayao_99") + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer(images=None, + paths=None, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024) + ``` + + - Style transfer API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + - min\_size (int): min size of image shape,default is 32; + - max\_size (int): max size of image shape,default is 1024. + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + - res (list\[numpy.ndarray\]): result list,ndarray.shape is \[H, W, C\] + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of style transfer. +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m animegan_v2_hayao_99 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/animegan_v2_hayao_99" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Adapt to paddlehub2.0 + +* 1.0.2 + + Delete optional parameter batch_size + + - ```shell + $ hub install animegan_v2_hayao_99==1.0.2 + ``` diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/README_en.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..77d724986f3442b9ba35ef789381a338c6208c28 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/README_en.md @@ -0,0 +1,151 @@ +# animegan_v2_paprika_54 + +|Module Name |animegan_v2_paprika_54| +| :--- | :---: | +|Category |Image generation| +|Network|AnimeGAN| +|Dataset|Paprika| +|Fine-tuning supported or not|No| +|Module Size|9.4MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ +
+ Input image +
+ +
+ Output image +
+

+ + + +- ### Module Introduction + + - AnimeGAN V2 image style stransfer model, the model can convert the input image into red pepper anime style, the model weight is converted from[AnimeGAN V2 official repo](https://github.com/TachibanaYoshino/AnimeGAN)。 + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install animegan_v2_paprika_54 + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="animegan_v2_paprika_54") + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer(images=None, + paths=None, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024) + ``` + + - Style transfer API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): Image data, ndarray.shape is in the format [H, W, C], BGR. + - paths (list\[str\]): Image path. + - output\_dir (str): Save path of images, `output` by default. + - visualization (bool): Whether to save the results as picture files. + - min\_size (int): Minimum size, default is 32. + - max\_size (int): Maximum size, default is 1024. + + **NOTE:** Choose one of `paths` and `images` to provide input data. + + - **Return** + - res (list\[numpy.ndarray\]): The list of style transfer results,ndarray.shape is in the format [H, W, C]. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of style transfer. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m animegan_v2_paprika_54 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/animegan_v2_paprika_54" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V. Release Note + +- 1.0.0 + + First release. + +* 1.0.1 + + Support paddlehub2.0. + +* 1.0.2 + + Delete batch_size. diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/README.md index 627b5e57d1bec696920e8a334a8197e297efe650..d10dcbb2c21d2c74f27379dcdb10715f5bd8cf10 100644 --- a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/README.md +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/README.md @@ -50,7 +50,7 @@ ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/README_en.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..1296fac7289e2b94002cee5fbbbf376ba70befde --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/README_en.md @@ -0,0 +1,147 @@ +# animegan_v2_paprika_74 + +|Module Name|animegan_v2_paprika_74| +| :--- | :---: | +|Category|image generation| +|Network|AnimeGAN| +|Dataset|Paprika| +|Fine-tuning supported or not|No| +|Module Size|9.4MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+ Input Image +
+ +
+ Output Image +
+

+ + +- ### Module Introduction + + - AnimeGAN V2 is a style transfer model, which can transfer a image style to paprika carton style. For more information, please refer to [AnimeGAN V2 Project](https://github.com/TachibanaYoshino/AnimeGANv2). + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install animegan_v2_paprika_74 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="animegan_v2_paprika_74") + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer(images=None, + paths=None, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024) + ``` + + - Style transfer API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + - min\_size (int): min size of image shape,default is 32; + - max\_size (int): max size of image shape,default is 1024. + + - **Return** + - res (list\[numpy.ndarray\]): result list,ndarray.shape is \[H, W, C\] + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of style transfer. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m animegan_v2_paprika_74 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/animegan_v2_paprika_74" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Adapt to paddlehub2.0 + +* 1.0.2 + + Delete optional parameter batch_size + + - ```shell + $ hub install animegan_v2_paprika_74==1.0.2 + ``` diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/README_en.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..fa2a8953a76dab2cac7fef702977291cc5504303 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/README_en.md @@ -0,0 +1,151 @@ +# animegan_v2_paprika_97 + +|Module Name |animegan_v2_paprika_97| +| :--- | :---: | +|Category |Image generation| +|Network|AnimeGAN| +|Dataset|Paprika| +|Fine-tuning supported or not|No| +|Module Size|9.7MB| +|Latest update date|2021-07-30| +|Data indicators|-| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ +
+ Input image +
+ +
+ Output image +
+

+ + + +- ### Module Introduction + + - AnimeGAN V2 image style stransfer model, the model can convert the input image into red pepper anime style, the model weight is converted from[AnimeGAN V2 official repo](https://github.com/TachibanaYoshino/AnimeGAN)。 + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install animegan_v2_paprika_97 + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="animegan_v2_paprika_97") + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer(images=None, + paths=None, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024) + ``` + + - Style transfer API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): Image data, ndarray.shape is in the format [H, W, C], BGR. + - paths (list\[str\]): Image path. + - output\_dir (str): Save path of images, `output` by default. + - visualization (bool): Whether to save the results as picture files. + - min\_size (int): Minimum size, default is 32. + - max\_size (int): Maximum size, default is 1024. + + **NOTE:** Choose one of `paths` and `images` to provide input data. + + - **Return** + - res (list\[numpy.ndarray\]): The list of style transfer results,ndarray.shape is in the format [H, W, C]. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of style transfer. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m animegan_v2_paprika_97 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/animegan_v2_paprika_97" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V. Release Note + +- 1.0.0 + + First release. + +* 1.0.1 + + Support paddlehub2.0. + +* 1.0.2 + + Delete batch_size. diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/README.md index e2c0567ed9bfdbd0a76be8e4dfb56e0629182a68..34fb9a67c882311709b87a8ca83f1077046c5de7 100644 --- a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/README.md +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/README.md @@ -50,7 +50,7 @@ ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/README_en.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..27ea28710976b6ae4256ca7ffe36ff7f22a66106 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/README_en.md @@ -0,0 +1,149 @@ +# animegan_v2_paprika_98 + +|Module Name|animegan_v2_paprika_98| +| :--- | :---: | +|Category|image generation| +|Network|AnimeGAN| +|Dataset|Paprika| +|Fine-tuning supported or not|No| +|Module Size|9.4MB| +|Latest update date|2021-07-30| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+ Input image +
+ +
+ Output image +
+

+ + +- ### Module Introduction + + - AnimeGAN V2 is a style transfer model, which can transfer a image style to paprika carton style. For more information, please refer to [AnimeGAN V2 Project](https://github.com/TachibanaYoshino/AnimeGANv2). + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install animegan_v2_paprika_98 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="animegan_v2_paprika_98") + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer(images=None, + paths=None, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024) + ``` + + - Style transfer API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + - min\_size (int): min size of image shape,default is 32; + - max\_size (int): max size of image shape,default is 1024. + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + - res (list\[numpy.ndarray\]): result list,ndarray.shape is \[H, W, C\] + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of style transfer. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m animegan_v2_paprika_98 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/animegan_v2_paprika_98" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Adapt to paddlehub2.0 + +* 1.0.2 + + Delete optional parameter batch_size + + - ```shell + $ hub install animegan_v2_paprika_98==1.0.2 + ``` diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README.md index 51e552a8006474d469973de30ecee164f1953e21..0aed09d52d08cc583abe267ede731f1903ec3d8d 100644 --- a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README.md +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README.md @@ -50,7 +50,7 @@ ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README_en.md b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..22d50826fb9def6742f2836e1099002843ce7ed6 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README_en.md @@ -0,0 +1,150 @@ +# animegan_v2_shinkai_33 + +|Module Name|animegan_v2_shinkai_33| +| :--- | :---: | +|Category|image generation| +|Network|AnimeGAN| +|Dataset|Your Name, Weathering with you| +|Fine-tuning supported or not|No| +|Module Size|9.4MB| +|Latest update date|2021-07-30| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+ Input image +
+ +
+ Output image +
+

+ + +- ### Module Introduction + + - AnimeGAN V2 is a style transfer model, which can transfer a image style to Shinkai carton style. For more information, please refer to [AnimeGAN V2 Project](https://github.com/TachibanaYoshino/AnimeGANv2). + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install animegan_v2_shinkai_33 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="animegan_v2_shinkai_33") + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer(images=None, + paths=None, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024) + ``` + + - Style transfer API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + - min\_size (int): min size of image shape,default is 32; + - max\_size (int): max size of image shape,default is 1024. + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + - res (list\[numpy.ndarray\]): result list,ndarray.shape is \[H, W, C\] + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of style transfer. + + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m animegan_v2_shinkai_33 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/animegan_v2_shinkai_33" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Adapt to paddlehub2.0 + +* 1.0.2 + + Delete optional parameter batch_size + + - ```shell + $ hub install animegan_v2_shinkai_33==1.0.2 + ``` diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README.md index bc8d5de7dbca4bff629ee395c149e11eac93e128..e8c04f4acae6d5dcc61b5455e7ad2684ef67eef2 100644 --- a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README.md +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README.md @@ -50,7 +50,7 @@ ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README_en.md b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..88c5c6f8f9a7b1606b9966fbc54434a7e6a43eba --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README_en.md @@ -0,0 +1,149 @@ +# animegan_v2_shinkai_53 + +|Module Name|animegan_v2_shinkai_53| +| :--- | :---: | +|Category|image generation| +|Network|AnimeGAN| +|Dataset|Your Name, Weathering with you| +|Fine-tuning supported or not|No| +|Module Size|9.4MB| +|Latest update date|2021-07-30| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+ Input image +
+ +
+ Output image +
+

+ + +- ### Module Introduction + + - AnimeGAN V2 is a style transfer model, which can transfer a image style to Shinkai carton style. For more information, please refer to [AnimeGAN V2 Project](https://github.com/TachibanaYoshino/AnimeGANv2). + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install animegan_v2_shinkai_53 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="animegan_v2_shinkai_53") + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer(images=None, + paths=None, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024) + ``` + + - Style transfer API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + - min\_size (int): min size of image shape,default is 32; + - max\_size (int): max size of image shape,default is 1024. + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + - res (list\[numpy.ndarray\]): result list,ndarray.shape is \[H, W, C\] + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of style transfer. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m animegan_v2_shinkai_53 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/animegan_v2_shinkai_53" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Adapt to paddlehub2.0 + +* 1.0.2 + + Delete optional parameter batch_size + + - ```shell + $ hub install animegan_v2_shinkai_53==1.0.2 + ``` diff --git a/modules/image/Image_gan/style_transfer/face_parse/README.md b/modules/image/Image_gan/style_transfer/face_parse/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d9716150c156912c42eebe67bf0cd38db9f2bcd --- /dev/null +++ b/modules/image/Image_gan/style_transfer/face_parse/README.md @@ -0,0 +1,133 @@ +# face_parse + +|模型名称|face_parse| +| :--- | :---: | +|类别|图像 - 人脸解析| +|网络|BiSeNet| +|数据集|COCO-Stuff| +|是否支持Fine-tuning|否| +|模型大小|77MB| +|最新更新日期|2021-12-07| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入图像 +
+ +
+ 输出图像 +
+

+ +- ### 模型介绍 + + - 人脸解析是语义图像分割的一种特殊情况,人脸解析是计算人脸图像中不同语义成分(如头发、嘴唇、鼻子、眼睛等)的像素级标签映射。给定一个输入的人脸图像,人脸解析将为每个语义成分分配一个像素级标签。 + + + +## 二、安装 + +- ### 1、环境依赖 + - ppgan + - dlib + +- ### 2、安装 + + - ```shell + $ hub install face_parse + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a file + $ hub run face_parse --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现人脸解析模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="face_parse") + input_path = ["/PATH/TO/IMAGE"] + # Read from a file + module.style_transfer(paths=input_path, output_dir='./transfer_result/', use_gpu=True) + ``` + +- ### 3、API + + - ```python + style_transfer(images=None, paths=None, output_dir='./transfer_result/', use_gpu=False, visualization=True): + ``` + - 人脸解析转换API。 + + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
+ - paths (list\[str\]): 图片的路径;
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线人脸解析转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m face_parse + ``` + + - 这样就完成了一个人脸解析转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/face_parse" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install face_parse==1.0.0 + ``` diff --git a/modules/image/Image_gan/style_transfer/face_parse/model.py b/modules/image/Image_gan/style_transfer/face_parse/model.py new file mode 100644 index 0000000000000000000000000000000000000000..c5df633416cd0ddc199bbb4bc7908e9dec008c58 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/face_parse/model.py @@ -0,0 +1,51 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import argparse + +from PIL import Image +import numpy as np +import cv2 + +import ppgan.faceutils as futils +from ppgan.utils.preprocess import * +from ppgan.utils.visual import mask2image + + +class FaceParsePredictor: + def __init__(self): + self.input_size = (512, 512) + self.up_ratio = 0.6 / 0.85 + self.down_ratio = 0.2 / 0.85 + self.width_ratio = 0.2 / 0.85 + self.face_parser = futils.mask.FaceParser() + + def run(self, image): + image = Image.fromarray(image) + face = futils.dlib.detect(image) + + if not face: + return + face_on_image = face[0] + image, face, crop_face = futils.dlib.crop(image, face_on_image, self.up_ratio, self.down_ratio, + self.width_ratio) + np_image = np.array(image) + mask = self.face_parser.parse(np.float32(cv2.resize(np_image, self.input_size))) + mask = cv2.resize(mask.numpy(), (256, 256)) + mask = mask.astype(np.uint8) + mask = mask2image(mask) + + return mask diff --git a/modules/image/Image_gan/style_transfer/face_parse/module.py b/modules/image/Image_gan/style_transfer/face_parse/module.py new file mode 100644 index 0000000000000000000000000000000000000000..f1985f9ba23faf68a74e07315d2dc766ffb4f0fc --- /dev/null +++ b/modules/image/Image_gan/style_transfer/face_parse/module.py @@ -0,0 +1,133 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import FaceParsePredictor +from .util import base64_to_cv2 +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo( + name="face_parse", type="CV/style_transfer", author="paddlepaddle", author_email="", summary="", version="1.0.0") +class Face_parse: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "bisenet.pdparams") + + self.network = FaceParsePredictor() + + def style_transfer(self, + images: list = None, + paths: list = None, + output_dir: str = './transfer_result/', + use_gpu: bool = False, + visualization: bool = True): + ''' + + + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR(read by cv2). + paths (list[str]): paths to images + output_dir (str): the dir to save the results + use_gpu (bool): if True, use gpu to perform the computation, otherwise cpu. + visualization (bool): if True, save results in output_dir. + ''' + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + out = self.network.run(image) + results.append(out) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + out = self.network.run(image) + results.append(out) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + if out is not None: + cv2.imwrite(os.path.join(output_dir, 'output_{}.png'.format(i)), out[:, :, ::-1]) + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.style_transfer( + paths=[self.args.input_path], + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.style_transfer(images=images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='transfer_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/Image_gan/style_transfer/face_parse/requirements.txt b/modules/image/Image_gan/style_transfer/face_parse/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..d9bfc85782a3ee323241fe7beb87a9f281c120fe --- /dev/null +++ b/modules/image/Image_gan/style_transfer/face_parse/requirements.txt @@ -0,0 +1,2 @@ +ppgan +dlib diff --git a/modules/image/Image_gan/style_transfer/face_parse/util.py b/modules/image/Image_gan/style_transfer/face_parse/util.py new file mode 100644 index 0000000000000000000000000000000000000000..b88ac3562b74cadc1d4d6459a56097ca4a938a0b --- /dev/null +++ b/modules/image/Image_gan/style_transfer/face_parse/util.py @@ -0,0 +1,10 @@ +import base64 +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/Image_gan/style_transfer/lapstyle_circuit/README.md b/modules/image/Image_gan/style_transfer/lapstyle_circuit/README.md new file mode 100644 index 0000000000000000000000000000000000000000..39c3270adf3914cacd7c60f6b250be58b74188c1 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_circuit/README.md @@ -0,0 +1,142 @@ +# lapstyle_circuit + +|模型名称|lapstyle_circuit| +| :--- | :---: | +|类别|图像 - 风格迁移| +|网络|LapStyle| +|数据集|COCO| +|是否支持Fine-tuning|否| +|模型大小|121MB| +|最新更新日期|2021-12-07| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入内容图形 +
+ +
+ 输入风格图形 +
+ +
+ 输出图像 +
+

+ +- ### 模型介绍 + + - LapStyle--拉普拉斯金字塔风格化网络,是一种能够生成高质量风格化图的快速前馈风格化网络,能渐进地生成复杂的纹理迁移效果,同时能够在512分辨率下达到100fps的速度。可实现多种不同艺术风格的快速迁移,在艺术图像生成、滤镜等领域有广泛的应用。 + + - 更多详情参考:[Drafting and Revision: Laplacian Pyramid Network for Fast High-Quality Artistic Style Transfer](https://arxiv.org/pdf/2104.05376.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + - ppgan + +- ### 2、安装 + + - ```shell + $ hub install lapstyle_circuit + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a file + $ hub run lapstyle_circuit --content "/PATH/TO/IMAGE" --style "/PATH/TO/IMAGE1" + ``` + - 通过命令行方式实现风格转换模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="lapstyle_circuit") + content = cv2.imread("/PATH/TO/IMAGE") + style = cv2.imread("/PATH/TO/IMAGE1") + results = module.style_transfer(images=[{'content':content, 'style':style}], output_dir='./transfer_result', use_gpu=True) + ``` + +- ### 3、API + + - ```python + style_transfer(images=None, paths=None, output_dir='./transfer_result/', use_gpu=False, visualization=True) + ``` + - 风格转换API。 + + - **参数** + + - images (list[dict]): data of images, 每一个元素都为一个 dict,有关键字 content, style, 相应取值为: + - content (numpy.ndarray): 待转换的图片,shape 为 \[H, W, C\],BGR格式;
+ - style (numpy.ndarray) : 风格图像,shape为 \[H, W, C\],BGR格式;
+ - paths (list[str]): paths to images, 每一个元素都为一个dict, 有关键字 content, style, 相应取值为: + - content (str): 待转换的图片的路径;
+ - style (str) : 风格图像的路径;
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像风格转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m lapstyle_circuit + ``` + + - 这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[{'content': cv2_to_base64(cv2.imread("/PATH/TO/IMAGE")), 'style': cv2_to_base64(cv2.imread("/PATH/TO/IMAGE1"))}]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/lapstyle_circuit" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install lapstyle_circuit==1.0.0 + ``` diff --git a/modules/image/Image_gan/style_transfer/lapstyle_circuit/model.py b/modules/image/Image_gan/style_transfer/lapstyle_circuit/model.py new file mode 100644 index 0000000000000000000000000000000000000000..d66c02322ecf630d643b23e193ac95b05d62a826 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_circuit/model.py @@ -0,0 +1,140 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import urllib.request + +import cv2 as cv +import numpy as np +import paddle +import paddle.nn.functional as F +from paddle.vision.transforms import functional +from PIL import Image +from ppgan.models.generators import DecoderNet +from ppgan.models.generators import Encoder +from ppgan.models.generators import RevisionNet +from ppgan.utils.visual import tensor2img + + +def img(img): + # some images have 4 channels + if img.shape[2] > 3: + img = img[:, :, :3] + # HWC to CHW + return img + + +def img_totensor(content_img, style_img): + if content_img.ndim == 2: + content_img = cv.cvtColor(content_img, cv.COLOR_GRAY2RGB) + else: + content_img = cv.cvtColor(content_img, cv.COLOR_BGR2RGB) + h, w, c = content_img.shape + content_img = Image.fromarray(content_img) + content_img = content_img.resize((512, 512), Image.BILINEAR) + content_img = np.array(content_img) + content_img = img(content_img) + content_img = functional.to_tensor(content_img) + + style_img = cv.cvtColor(style_img, cv.COLOR_BGR2RGB) + style_img = Image.fromarray(style_img) + style_img = style_img.resize((512, 512), Image.BILINEAR) + style_img = np.array(style_img) + style_img = img(style_img) + style_img = functional.to_tensor(style_img) + + content_img = paddle.unsqueeze(content_img, axis=0) + style_img = paddle.unsqueeze(style_img, axis=0) + return content_img, style_img, h, w + + +def tensor_resample(tensor, dst_size, mode='bilinear'): + return F.interpolate(tensor, dst_size, mode=mode, align_corners=False) + + +def laplacian(x): + """ + Laplacian + + return: + x - upsample(downsample(x)) + """ + return x - tensor_resample(tensor_resample(x, [x.shape[2] // 2, x.shape[3] // 2]), [x.shape[2], x.shape[3]]) + + +def make_laplace_pyramid(x, levels): + """ + Make Laplacian Pyramid + """ + pyramid = [] + current = x + for i in range(levels): + pyramid.append(laplacian(current)) + current = tensor_resample(current, (max(current.shape[2] // 2, 1), max(current.shape[3] // 2, 1))) + pyramid.append(current) + return pyramid + + +def fold_laplace_pyramid(pyramid): + """ + Fold Laplacian Pyramid + """ + current = pyramid[-1] + for i in range(len(pyramid) - 2, -1, -1): # iterate from len-2 to 0 + up_h, up_w = pyramid[i].shape[2], pyramid[i].shape[3] + current = pyramid[i] + tensor_resample(current, (up_h, up_w)) + return current + + +class LapStylePredictor: + def __init__(self, weight_path=None): + + self.net_enc = Encoder() + self.net_dec = DecoderNet() + self.net_rev = RevisionNet() + self.net_rev_2 = RevisionNet() + + self.net_enc.set_dict(paddle.load(weight_path)['net_enc']) + self.net_enc.eval() + self.net_dec.set_dict(paddle.load(weight_path)['net_dec']) + self.net_dec.eval() + self.net_rev.set_dict(paddle.load(weight_path)['net_rev']) + self.net_rev.eval() + self.net_rev_2.set_dict(paddle.load(weight_path)['net_rev_2']) + self.net_rev_2.eval() + + def run(self, content_img, style_image): + content_img, style_img, h, w = img_totensor(content_img, style_image) + pyr_ci = make_laplace_pyramid(content_img, 2) + pyr_si = make_laplace_pyramid(style_img, 2) + pyr_ci.append(content_img) + pyr_si.append(style_img) + cF = self.net_enc(pyr_ci[2]) + sF = self.net_enc(pyr_si[2]) + stylized_small = self.net_dec(cF, sF) + stylized_up = F.interpolate(stylized_small, scale_factor=2) + + revnet_input = paddle.concat(x=[pyr_ci[1], stylized_up], axis=1) + stylized_rev_lap = self.net_rev(revnet_input) + stylized_rev = fold_laplace_pyramid([stylized_rev_lap, stylized_small]) + + stylized_up = F.interpolate(stylized_rev, scale_factor=2) + + revnet_input = paddle.concat(x=[pyr_ci[0], stylized_up], axis=1) + stylized_rev_lap_second = self.net_rev_2(revnet_input) + stylized_rev_second = fold_laplace_pyramid([stylized_rev_lap_second, stylized_rev_lap, stylized_small]) + + stylized = stylized_rev_second + stylized_visual = tensor2img(stylized, min_max=(0., 1.)) + + return stylized_visual diff --git a/modules/image/Image_gan/style_transfer/lapstyle_circuit/module.py b/modules/image/Image_gan/style_transfer/lapstyle_circuit/module.py new file mode 100644 index 0000000000000000000000000000000000000000..6a4fbc67816660e202960828b2c4abd042e71a3c --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_circuit/module.py @@ -0,0 +1,150 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import LapStylePredictor +from .util import base64_to_cv2 +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo( + name="lapstyle_circuit", + type="CV/style_transfer", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class Lapstyle_circuit: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "lapstyle_circuit.pdparams") + + self.network = LapStylePredictor(weight_path=self.pretrained_model) + + def style_transfer(self, + images: list = None, + paths: list = None, + output_dir: str = './transfer_result/', + use_gpu: bool = False, + visualization: bool = True): + ''' + Transfer a image to circuit style. + + images (list[dict]): data of images, each element is a dict: + - content (numpy.ndarray): input image,shape is \[H, W, C\],BGR format;
+ - style (numpy.ndarray) : style image,shape is \[H, W, C\],BGR format;
+ paths (list[dict]): paths to images, eacg element is a dict: + - content (str): path to input image;
+ - style (str) : path to style image;
+ + output_dir (str): the dir to save the results + use_gpu (bool): if True, use gpu to perform the computation, otherwise cpu. + visualization (bool): if True, save results in output_dir. + + ''' + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image_dict in images: + content_img = image_dict['content'] + style_img = image_dict['style'] + results.append(self.network.run(content_img, style_img)) + + if paths != None: + for path_dict in paths: + content_img = cv2.imread(path_dict['content']) + style_img = cv2.imread(path_dict['style']) + results.append(self.network.run(content_img, style_img)) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + cv2.imwrite(os.path.join(output_dir, 'output_{}.png'.format(i)), out[:, :, ::-1]) + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + + self.style_transfer( + paths=[{ + 'content': self.args.content, + 'style': self.args.style + }], + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = copy.deepcopy(images) + for image in images_decode: + image['content'] = base64_to_cv2(image['content']) + image['style'] = base64_to_cv2(image['style']) + results = self.style_transfer(images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='transfer_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--content', type=str, help="path to content image.") + self.arg_input_group.add_argument('--style', type=str, help="path to style image.") diff --git a/modules/image/Image_gan/style_transfer/lapstyle_circuit/requirements.txt b/modules/image/Image_gan/style_transfer/lapstyle_circuit/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..67e9bb6fa840355e9ed0d44b7134850f1fe22fe1 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_circuit/requirements.txt @@ -0,0 +1 @@ +ppgan diff --git a/modules/image/Image_gan/style_transfer/lapstyle_circuit/util.py b/modules/image/Image_gan/style_transfer/lapstyle_circuit/util.py new file mode 100644 index 0000000000000000000000000000000000000000..531a0ae0d487822a870ba7f09817e658967aff10 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_circuit/util.py @@ -0,0 +1,11 @@ +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/Image_gan/style_transfer/lapstyle_ocean/README.md b/modules/image/Image_gan/style_transfer/lapstyle_ocean/README.md new file mode 100644 index 0000000000000000000000000000000000000000..497dba5af97ab602827ddf87e1749e8586b4a296 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_ocean/README.md @@ -0,0 +1,142 @@ +# lapstyle_ocean + +|模型名称|lapstyle_ocean| +| :--- | :---: | +|类别|图像 - 风格迁移| +|网络|LapStyle| +|数据集|COCO| +|是否支持Fine-tuning|否| +|模型大小|121MB| +|最新更新日期|2021-12-07| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入内容图形 +
+ +
+ 输入风格图形 +
+ +
+ 输出图像 +
+

+ +- ### 模型介绍 + + - LapStyle--拉普拉斯金字塔风格化网络,是一种能够生成高质量风格化图的快速前馈风格化网络,能渐进地生成复杂的纹理迁移效果,同时能够在512分辨率下达到100fps的速度。可实现多种不同艺术风格的快速迁移,在艺术图像生成、滤镜等领域有广泛的应用。 + + - 更多详情参考:[Drafting and Revision: Laplacian Pyramid Network for Fast High-Quality Artistic Style Transfer](https://arxiv.org/pdf/2104.05376.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + - ppgan + +- ### 2、安装 + + - ```shell + $ hub install lapstyle_ocean + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a file + $ hub run lapstyle_ocean --content "/PATH/TO/IMAGE" --style "/PATH/TO/IMAGE1" + ``` + - 通过命令行方式实现风格转换模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="lapstyle_ocean") + content = cv2.imread("/PATH/TO/IMAGE") + style = cv2.imread("/PATH/TO/IMAGE1") + results = module.style_transfer(images=[{'content':content, 'style':style}], output_dir='./transfer_result', use_gpu=True) + ``` + +- ### 3、API + + - ```python + style_transfer(images=None, paths=None, output_dir='./transfer_result/', use_gpu=False, visualization=True) + ``` + - 风格转换API。 + + - **参数** + + - images (list[dict]): data of images, 每一个元素都为一个 dict,有关键字 content, style, 相应取值为: + - content (numpy.ndarray): 待转换的图片,shape 为 \[H, W, C\],BGR格式;
+ - style (numpy.ndarray) : 风格图像,shape为 \[H, W, C\],BGR格式;
+ - paths (list[str]): paths to images, 每一个元素都为一个dict, 有关键字 content, style, 相应取值为: + - content (str): 待转换的图片的路径;
+ - style (str) : 风格图像的路径;
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像风格转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m lapstyle_ocean + ``` + + - 这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[{'content': cv2_to_base64(cv2.imread("/PATH/TO/IMAGE")), 'style': cv2_to_base64(cv2.imread("/PATH/TO/IMAGE1"))}]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/lapstyle_ocean" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install lapstyle_ocean==1.0.0 + ``` diff --git a/modules/image/Image_gan/style_transfer/lapstyle_ocean/model.py b/modules/image/Image_gan/style_transfer/lapstyle_ocean/model.py new file mode 100644 index 0000000000000000000000000000000000000000..d66c02322ecf630d643b23e193ac95b05d62a826 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_ocean/model.py @@ -0,0 +1,140 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import urllib.request + +import cv2 as cv +import numpy as np +import paddle +import paddle.nn.functional as F +from paddle.vision.transforms import functional +from PIL import Image +from ppgan.models.generators import DecoderNet +from ppgan.models.generators import Encoder +from ppgan.models.generators import RevisionNet +from ppgan.utils.visual import tensor2img + + +def img(img): + # some images have 4 channels + if img.shape[2] > 3: + img = img[:, :, :3] + # HWC to CHW + return img + + +def img_totensor(content_img, style_img): + if content_img.ndim == 2: + content_img = cv.cvtColor(content_img, cv.COLOR_GRAY2RGB) + else: + content_img = cv.cvtColor(content_img, cv.COLOR_BGR2RGB) + h, w, c = content_img.shape + content_img = Image.fromarray(content_img) + content_img = content_img.resize((512, 512), Image.BILINEAR) + content_img = np.array(content_img) + content_img = img(content_img) + content_img = functional.to_tensor(content_img) + + style_img = cv.cvtColor(style_img, cv.COLOR_BGR2RGB) + style_img = Image.fromarray(style_img) + style_img = style_img.resize((512, 512), Image.BILINEAR) + style_img = np.array(style_img) + style_img = img(style_img) + style_img = functional.to_tensor(style_img) + + content_img = paddle.unsqueeze(content_img, axis=0) + style_img = paddle.unsqueeze(style_img, axis=0) + return content_img, style_img, h, w + + +def tensor_resample(tensor, dst_size, mode='bilinear'): + return F.interpolate(tensor, dst_size, mode=mode, align_corners=False) + + +def laplacian(x): + """ + Laplacian + + return: + x - upsample(downsample(x)) + """ + return x - tensor_resample(tensor_resample(x, [x.shape[2] // 2, x.shape[3] // 2]), [x.shape[2], x.shape[3]]) + + +def make_laplace_pyramid(x, levels): + """ + Make Laplacian Pyramid + """ + pyramid = [] + current = x + for i in range(levels): + pyramid.append(laplacian(current)) + current = tensor_resample(current, (max(current.shape[2] // 2, 1), max(current.shape[3] // 2, 1))) + pyramid.append(current) + return pyramid + + +def fold_laplace_pyramid(pyramid): + """ + Fold Laplacian Pyramid + """ + current = pyramid[-1] + for i in range(len(pyramid) - 2, -1, -1): # iterate from len-2 to 0 + up_h, up_w = pyramid[i].shape[2], pyramid[i].shape[3] + current = pyramid[i] + tensor_resample(current, (up_h, up_w)) + return current + + +class LapStylePredictor: + def __init__(self, weight_path=None): + + self.net_enc = Encoder() + self.net_dec = DecoderNet() + self.net_rev = RevisionNet() + self.net_rev_2 = RevisionNet() + + self.net_enc.set_dict(paddle.load(weight_path)['net_enc']) + self.net_enc.eval() + self.net_dec.set_dict(paddle.load(weight_path)['net_dec']) + self.net_dec.eval() + self.net_rev.set_dict(paddle.load(weight_path)['net_rev']) + self.net_rev.eval() + self.net_rev_2.set_dict(paddle.load(weight_path)['net_rev_2']) + self.net_rev_2.eval() + + def run(self, content_img, style_image): + content_img, style_img, h, w = img_totensor(content_img, style_image) + pyr_ci = make_laplace_pyramid(content_img, 2) + pyr_si = make_laplace_pyramid(style_img, 2) + pyr_ci.append(content_img) + pyr_si.append(style_img) + cF = self.net_enc(pyr_ci[2]) + sF = self.net_enc(pyr_si[2]) + stylized_small = self.net_dec(cF, sF) + stylized_up = F.interpolate(stylized_small, scale_factor=2) + + revnet_input = paddle.concat(x=[pyr_ci[1], stylized_up], axis=1) + stylized_rev_lap = self.net_rev(revnet_input) + stylized_rev = fold_laplace_pyramid([stylized_rev_lap, stylized_small]) + + stylized_up = F.interpolate(stylized_rev, scale_factor=2) + + revnet_input = paddle.concat(x=[pyr_ci[0], stylized_up], axis=1) + stylized_rev_lap_second = self.net_rev_2(revnet_input) + stylized_rev_second = fold_laplace_pyramid([stylized_rev_lap_second, stylized_rev_lap, stylized_small]) + + stylized = stylized_rev_second + stylized_visual = tensor2img(stylized, min_max=(0., 1.)) + + return stylized_visual diff --git a/modules/image/Image_gan/style_transfer/lapstyle_ocean/module.py b/modules/image/Image_gan/style_transfer/lapstyle_ocean/module.py new file mode 100644 index 0000000000000000000000000000000000000000..18534a3756805db51d33e9ff4bbb59bcf76d0dc7 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_ocean/module.py @@ -0,0 +1,149 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import LapStylePredictor +from .util import base64_to_cv2 +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo( + name="lapstyle_ocean", + type="CV/style_transfer", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class Lapstyle_ocean: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "lapstyle_ocean.pdparams") + + self.network = LapStylePredictor(weight_path=self.pretrained_model) + + def style_transfer(self, + images: list = None, + paths: list = None, + output_dir: str = './transfer_result/', + use_gpu: bool = False, + visualization: bool = True): + ''' + Transfer a image to ocean style. + + images (list[dict]): data of images, each element is a dict: + - content (numpy.ndarray): input image,shape is \[H, W, C\],BGR format;
+ - style (numpy.ndarray) : style image,shape is \[H, W, C\],BGR format;
+ paths (list[dict]): paths to images, eacg element is a dict: + - content (str): path to input image;
+ - style (str) : path to style image;
+ + output_dir (str): the dir to save the results + use_gpu (bool): if True, use gpu to perform the computation, otherwise cpu. + visualization (bool): if True, save results in output_dir. + ''' + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image_dict in images: + content_img = image_dict['content'] + style_img = image_dict['style'] + results.append(self.network.run(content_img, style_img)) + + if paths != None: + for path_dict in paths: + content_img = cv2.imread(path_dict['content']) + style_img = cv2.imread(path_dict['style']) + results.append(self.network.run(content_img, style_img)) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + cv2.imwrite(os.path.join(output_dir, 'output_{}.png'.format(i)), out[:, :, ::-1]) + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + + self.style_transfer( + paths=[{ + 'content': self.args.content, + 'style': self.args.style + }], + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = copy.deepcopy(images) + for image in images_decode: + image['content'] = base64_to_cv2(image['content']) + image['style'] = base64_to_cv2(image['style']) + results = self.style_transfer(images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='transfer_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--content', type=str, help="path to content image.") + self.arg_input_group.add_argument('--style', type=str, help="path to style image.") diff --git a/modules/image/Image_gan/style_transfer/lapstyle_ocean/requirements.txt b/modules/image/Image_gan/style_transfer/lapstyle_ocean/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..67e9bb6fa840355e9ed0d44b7134850f1fe22fe1 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_ocean/requirements.txt @@ -0,0 +1 @@ +ppgan diff --git a/modules/image/Image_gan/style_transfer/lapstyle_ocean/util.py b/modules/image/Image_gan/style_transfer/lapstyle_ocean/util.py new file mode 100644 index 0000000000000000000000000000000000000000..531a0ae0d487822a870ba7f09817e658967aff10 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_ocean/util.py @@ -0,0 +1,11 @@ +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/Image_gan/style_transfer/lapstyle_starrynew/README.md b/modules/image/Image_gan/style_transfer/lapstyle_starrynew/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4219317c3239d0083413bad47f645aebccd4aa23 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_starrynew/README.md @@ -0,0 +1,142 @@ +# lapstyle_starrynew + +|模型名称|lapstyle_starrynew| +| :--- | :---: | +|类别|图像 - 风格迁移| +|网络|LapStyle| +|数据集|COCO| +|是否支持Fine-tuning|否| +|模型大小|121MB| +|最新更新日期|2021-12-07| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入内容图形 +
+ +
+ 输入风格图形 +
+ +
+ 输出图像 +
+

+ +- ### 模型介绍 + + - LapStyle--拉普拉斯金字塔风格化网络,是一种能够生成高质量风格化图的快速前馈风格化网络,能渐进地生成复杂的纹理迁移效果,同时能够在512分辨率下达到100fps的速度。可实现多种不同艺术风格的快速迁移,在艺术图像生成、滤镜等领域有广泛的应用。 + + - 更多详情参考:[Drafting and Revision: Laplacian Pyramid Network for Fast High-Quality Artistic Style Transfer](https://arxiv.org/pdf/2104.05376.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + - ppgan + +- ### 2、安装 + + - ```shell + $ hub install lapstyle_starrynew + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a file + $ hub run lapstyle_starrynew --content "/PATH/TO/IMAGE" --style "/PATH/TO/IMAGE1" + ``` + - 通过命令行方式实现风格转换模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="lapstyle_starrynew") + content = cv2.imread("/PATH/TO/IMAGE") + style = cv2.imread("/PATH/TO/IMAGE1") + results = module.style_transfer(images=[{'content':content, 'style':style}], output_dir='./transfer_result', use_gpu=True) + ``` + +- ### 3、API + + - ```python + style_transfer(images=None, paths=None, output_dir='./transfer_result/', use_gpu=False, visualization=True) + ``` + - 风格转换API。 + + - **参数** + + - images (list[dict]): data of images, 每一个元素都为一个 dict,有关键字 content, style, 相应取值为: + - content (numpy.ndarray): 待转换的图片,shape 为 \[H, W, C\],BGR格式;
+ - style (numpy.ndarray) : 风格图像,shape为 \[H, W, C\],BGR格式;
+ - paths (list[str]): paths to images, 每一个元素都为一个dict, 有关键字 content, style, 相应取值为: + - content (str): 待转换的图片的路径;
+ - style (str) : 风格图像的路径;
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像风格转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m lapstyle_starrynew + ``` + + - 这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[{'content': cv2_to_base64(cv2.imread("/PATH/TO/IMAGE")), 'style': cv2_to_base64(cv2.imread("/PATH/TO/IMAGE1"))}]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/lapstyle_starrynew" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install lapstyle_starrynew==1.0.0 + ``` diff --git a/modules/image/Image_gan/style_transfer/lapstyle_starrynew/model.py b/modules/image/Image_gan/style_transfer/lapstyle_starrynew/model.py new file mode 100644 index 0000000000000000000000000000000000000000..d66c02322ecf630d643b23e193ac95b05d62a826 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_starrynew/model.py @@ -0,0 +1,140 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import urllib.request + +import cv2 as cv +import numpy as np +import paddle +import paddle.nn.functional as F +from paddle.vision.transforms import functional +from PIL import Image +from ppgan.models.generators import DecoderNet +from ppgan.models.generators import Encoder +from ppgan.models.generators import RevisionNet +from ppgan.utils.visual import tensor2img + + +def img(img): + # some images have 4 channels + if img.shape[2] > 3: + img = img[:, :, :3] + # HWC to CHW + return img + + +def img_totensor(content_img, style_img): + if content_img.ndim == 2: + content_img = cv.cvtColor(content_img, cv.COLOR_GRAY2RGB) + else: + content_img = cv.cvtColor(content_img, cv.COLOR_BGR2RGB) + h, w, c = content_img.shape + content_img = Image.fromarray(content_img) + content_img = content_img.resize((512, 512), Image.BILINEAR) + content_img = np.array(content_img) + content_img = img(content_img) + content_img = functional.to_tensor(content_img) + + style_img = cv.cvtColor(style_img, cv.COLOR_BGR2RGB) + style_img = Image.fromarray(style_img) + style_img = style_img.resize((512, 512), Image.BILINEAR) + style_img = np.array(style_img) + style_img = img(style_img) + style_img = functional.to_tensor(style_img) + + content_img = paddle.unsqueeze(content_img, axis=0) + style_img = paddle.unsqueeze(style_img, axis=0) + return content_img, style_img, h, w + + +def tensor_resample(tensor, dst_size, mode='bilinear'): + return F.interpolate(tensor, dst_size, mode=mode, align_corners=False) + + +def laplacian(x): + """ + Laplacian + + return: + x - upsample(downsample(x)) + """ + return x - tensor_resample(tensor_resample(x, [x.shape[2] // 2, x.shape[3] // 2]), [x.shape[2], x.shape[3]]) + + +def make_laplace_pyramid(x, levels): + """ + Make Laplacian Pyramid + """ + pyramid = [] + current = x + for i in range(levels): + pyramid.append(laplacian(current)) + current = tensor_resample(current, (max(current.shape[2] // 2, 1), max(current.shape[3] // 2, 1))) + pyramid.append(current) + return pyramid + + +def fold_laplace_pyramid(pyramid): + """ + Fold Laplacian Pyramid + """ + current = pyramid[-1] + for i in range(len(pyramid) - 2, -1, -1): # iterate from len-2 to 0 + up_h, up_w = pyramid[i].shape[2], pyramid[i].shape[3] + current = pyramid[i] + tensor_resample(current, (up_h, up_w)) + return current + + +class LapStylePredictor: + def __init__(self, weight_path=None): + + self.net_enc = Encoder() + self.net_dec = DecoderNet() + self.net_rev = RevisionNet() + self.net_rev_2 = RevisionNet() + + self.net_enc.set_dict(paddle.load(weight_path)['net_enc']) + self.net_enc.eval() + self.net_dec.set_dict(paddle.load(weight_path)['net_dec']) + self.net_dec.eval() + self.net_rev.set_dict(paddle.load(weight_path)['net_rev']) + self.net_rev.eval() + self.net_rev_2.set_dict(paddle.load(weight_path)['net_rev_2']) + self.net_rev_2.eval() + + def run(self, content_img, style_image): + content_img, style_img, h, w = img_totensor(content_img, style_image) + pyr_ci = make_laplace_pyramid(content_img, 2) + pyr_si = make_laplace_pyramid(style_img, 2) + pyr_ci.append(content_img) + pyr_si.append(style_img) + cF = self.net_enc(pyr_ci[2]) + sF = self.net_enc(pyr_si[2]) + stylized_small = self.net_dec(cF, sF) + stylized_up = F.interpolate(stylized_small, scale_factor=2) + + revnet_input = paddle.concat(x=[pyr_ci[1], stylized_up], axis=1) + stylized_rev_lap = self.net_rev(revnet_input) + stylized_rev = fold_laplace_pyramid([stylized_rev_lap, stylized_small]) + + stylized_up = F.interpolate(stylized_rev, scale_factor=2) + + revnet_input = paddle.concat(x=[pyr_ci[0], stylized_up], axis=1) + stylized_rev_lap_second = self.net_rev_2(revnet_input) + stylized_rev_second = fold_laplace_pyramid([stylized_rev_lap_second, stylized_rev_lap, stylized_small]) + + stylized = stylized_rev_second + stylized_visual = tensor2img(stylized, min_max=(0., 1.)) + + return stylized_visual diff --git a/modules/image/Image_gan/style_transfer/lapstyle_starrynew/module.py b/modules/image/Image_gan/style_transfer/lapstyle_starrynew/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b6cdab72eb2d4c89bd53c5ba3a63adcbc061acc3 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_starrynew/module.py @@ -0,0 +1,148 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import LapStylePredictor +from .util import base64_to_cv2 +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo( + name="lapstyle_starrynew", + type="CV/style_transfer", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class Lapstyle_starrynew: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "lapstyle_starrynew.pdparams") + + self.network = LapStylePredictor(weight_path=self.pretrained_model) + + def style_transfer(self, + images: list = None, + paths: list = None, + output_dir: str = './transfer_result/', + use_gpu: bool = False, + visualization: bool = True): + ''' + Transfer a image to starrynew style. + + images (list[dict]): data of images, each element is a dict: + - content (numpy.ndarray): input image,shape is \[H, W, C\],BGR format;
+ - style (numpy.ndarray) : style image,shape is \[H, W, C\],BGR format;
+ paths (list[dict]): paths to images, eacg element is a dict: + - content (str): path to input image;
+ - style (str) : path to style image;
+ output_dir (str): the dir to save the results + use_gpu (bool): if True, use gpu to perform the computation, otherwise cpu. + visualization (bool): if True, save results in output_dir. + ''' + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image_dict in images: + content_img = image_dict['content'] + style_img = image_dict['style'] + results.append(self.network.run(content_img, style_img)) + + if paths != None: + for path_dict in paths: + content_img = cv2.imread(path_dict['content']) + style_img = cv2.imread(path_dict['style']) + results.append(self.network.run(content_img, style_img)) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + cv2.imwrite(os.path.join(output_dir, 'output_{}.png'.format(i)), out[:, :, ::-1]) + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + + self.style_transfer( + paths=[{ + 'content': self.args.content, + 'style': self.args.style + }], + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = copy.deepcopy(images) + for image in images_decode: + image['content'] = base64_to_cv2(image['content']) + image['style'] = base64_to_cv2(image['style']) + results = self.style_transfer(images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='transfer_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--content', type=str, help="path to content image.") + self.arg_input_group.add_argument('--style', type=str, help="path to style image.") diff --git a/modules/image/Image_gan/style_transfer/lapstyle_starrynew/requirements.txt b/modules/image/Image_gan/style_transfer/lapstyle_starrynew/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..67e9bb6fa840355e9ed0d44b7134850f1fe22fe1 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_starrynew/requirements.txt @@ -0,0 +1 @@ +ppgan diff --git a/modules/image/Image_gan/style_transfer/lapstyle_starrynew/util.py b/modules/image/Image_gan/style_transfer/lapstyle_starrynew/util.py new file mode 100644 index 0000000000000000000000000000000000000000..531a0ae0d487822a870ba7f09817e658967aff10 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_starrynew/util.py @@ -0,0 +1,11 @@ +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/Image_gan/style_transfer/lapstyle_stars/README.md b/modules/image/Image_gan/style_transfer/lapstyle_stars/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a2e1abca2ec904df927ee6c594df09cfb40f0b9e --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_stars/README.md @@ -0,0 +1,142 @@ +# lapstyle_stars + +|模型名称|lapstyle_stars| +| :--- | :---: | +|类别|图像 - 风格迁移| +|网络|LapStyle| +|数据集|COCO| +|是否支持Fine-tuning|否| +|模型大小|121MB| +|最新更新日期|2021-12-07| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入内容图形 +
+ +
+ 输入风格图形 +
+ +
+ 输出图像 +
+

+ +- ### 模型介绍 + + - LapStyle--拉普拉斯金字塔风格化网络,是一种能够生成高质量风格化图的快速前馈风格化网络,能渐进地生成复杂的纹理迁移效果,同时能够在512分辨率下达到100fps的速度。可实现多种不同艺术风格的快速迁移,在艺术图像生成、滤镜等领域有广泛的应用。 + + - 更多详情参考:[Drafting and Revision: Laplacian Pyramid Network for Fast High-Quality Artistic Style Transfer](https://arxiv.org/pdf/2104.05376.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + - ppgan + +- ### 2、安装 + + - ```shell + $ hub install lapstyle_stars + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a file + $ hub run lapstyle_stars --content "/PATH/TO/IMAGE" --style "/PATH/TO/IMAGE1" + ``` + - 通过命令行方式实现风格转换模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="lapstyle_stars") + content = cv2.imread("/PATH/TO/IMAGE") + style = cv2.imread("/PATH/TO/IMAGE1") + results = module.style_transfer(images=[{'content':content, 'style':style}], output_dir='./transfer_result', use_gpu=True) + ``` + +- ### 3、API + + - ```python + style_transfer(images=None, paths=None, output_dir='./transfer_result/', use_gpu=False, visualization=True) + ``` + - 风格转换API。 + + - **参数** + + - images (list[dict]): data of images, 每一个元素都为一个 dict,有关键字 content, style, 相应取值为: + - content (numpy.ndarray): 待转换的图片,shape 为 \[H, W, C\],BGR格式;
+ - style (numpy.ndarray) : 风格图像,shape为 \[H, W, C\],BGR格式;
+ - paths (list[str]): paths to images, 每一个元素都为一个dict, 有关键字 content, style, 相应取值为: + - content (str): 待转换的图片的路径;
+ - style (str) : 风格图像的路径;
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像风格转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m lapstyle_stars + ``` + + - 这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[{'content': cv2_to_base64(cv2.imread("/PATH/TO/IMAGE")), 'style': cv2_to_base64(cv2.imread("/PATH/TO/IMAGE1"))}]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/lapstyle_stars" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install lapstyle_stars==1.0.0 + ``` diff --git a/modules/image/Image_gan/style_transfer/lapstyle_stars/model.py b/modules/image/Image_gan/style_transfer/lapstyle_stars/model.py new file mode 100644 index 0000000000000000000000000000000000000000..d66c02322ecf630d643b23e193ac95b05d62a826 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_stars/model.py @@ -0,0 +1,140 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import urllib.request + +import cv2 as cv +import numpy as np +import paddle +import paddle.nn.functional as F +from paddle.vision.transforms import functional +from PIL import Image +from ppgan.models.generators import DecoderNet +from ppgan.models.generators import Encoder +from ppgan.models.generators import RevisionNet +from ppgan.utils.visual import tensor2img + + +def img(img): + # some images have 4 channels + if img.shape[2] > 3: + img = img[:, :, :3] + # HWC to CHW + return img + + +def img_totensor(content_img, style_img): + if content_img.ndim == 2: + content_img = cv.cvtColor(content_img, cv.COLOR_GRAY2RGB) + else: + content_img = cv.cvtColor(content_img, cv.COLOR_BGR2RGB) + h, w, c = content_img.shape + content_img = Image.fromarray(content_img) + content_img = content_img.resize((512, 512), Image.BILINEAR) + content_img = np.array(content_img) + content_img = img(content_img) + content_img = functional.to_tensor(content_img) + + style_img = cv.cvtColor(style_img, cv.COLOR_BGR2RGB) + style_img = Image.fromarray(style_img) + style_img = style_img.resize((512, 512), Image.BILINEAR) + style_img = np.array(style_img) + style_img = img(style_img) + style_img = functional.to_tensor(style_img) + + content_img = paddle.unsqueeze(content_img, axis=0) + style_img = paddle.unsqueeze(style_img, axis=0) + return content_img, style_img, h, w + + +def tensor_resample(tensor, dst_size, mode='bilinear'): + return F.interpolate(tensor, dst_size, mode=mode, align_corners=False) + + +def laplacian(x): + """ + Laplacian + + return: + x - upsample(downsample(x)) + """ + return x - tensor_resample(tensor_resample(x, [x.shape[2] // 2, x.shape[3] // 2]), [x.shape[2], x.shape[3]]) + + +def make_laplace_pyramid(x, levels): + """ + Make Laplacian Pyramid + """ + pyramid = [] + current = x + for i in range(levels): + pyramid.append(laplacian(current)) + current = tensor_resample(current, (max(current.shape[2] // 2, 1), max(current.shape[3] // 2, 1))) + pyramid.append(current) + return pyramid + + +def fold_laplace_pyramid(pyramid): + """ + Fold Laplacian Pyramid + """ + current = pyramid[-1] + for i in range(len(pyramid) - 2, -1, -1): # iterate from len-2 to 0 + up_h, up_w = pyramid[i].shape[2], pyramid[i].shape[3] + current = pyramid[i] + tensor_resample(current, (up_h, up_w)) + return current + + +class LapStylePredictor: + def __init__(self, weight_path=None): + + self.net_enc = Encoder() + self.net_dec = DecoderNet() + self.net_rev = RevisionNet() + self.net_rev_2 = RevisionNet() + + self.net_enc.set_dict(paddle.load(weight_path)['net_enc']) + self.net_enc.eval() + self.net_dec.set_dict(paddle.load(weight_path)['net_dec']) + self.net_dec.eval() + self.net_rev.set_dict(paddle.load(weight_path)['net_rev']) + self.net_rev.eval() + self.net_rev_2.set_dict(paddle.load(weight_path)['net_rev_2']) + self.net_rev_2.eval() + + def run(self, content_img, style_image): + content_img, style_img, h, w = img_totensor(content_img, style_image) + pyr_ci = make_laplace_pyramid(content_img, 2) + pyr_si = make_laplace_pyramid(style_img, 2) + pyr_ci.append(content_img) + pyr_si.append(style_img) + cF = self.net_enc(pyr_ci[2]) + sF = self.net_enc(pyr_si[2]) + stylized_small = self.net_dec(cF, sF) + stylized_up = F.interpolate(stylized_small, scale_factor=2) + + revnet_input = paddle.concat(x=[pyr_ci[1], stylized_up], axis=1) + stylized_rev_lap = self.net_rev(revnet_input) + stylized_rev = fold_laplace_pyramid([stylized_rev_lap, stylized_small]) + + stylized_up = F.interpolate(stylized_rev, scale_factor=2) + + revnet_input = paddle.concat(x=[pyr_ci[0], stylized_up], axis=1) + stylized_rev_lap_second = self.net_rev_2(revnet_input) + stylized_rev_second = fold_laplace_pyramid([stylized_rev_lap_second, stylized_rev_lap, stylized_small]) + + stylized = stylized_rev_second + stylized_visual = tensor2img(stylized, min_max=(0., 1.)) + + return stylized_visual diff --git a/modules/image/Image_gan/style_transfer/lapstyle_stars/module.py b/modules/image/Image_gan/style_transfer/lapstyle_stars/module.py new file mode 100644 index 0000000000000000000000000000000000000000..7dc3700eda1db2356cc439edeaa0e34723b8cecc --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_stars/module.py @@ -0,0 +1,149 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import LapStylePredictor +from .util import base64_to_cv2 +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo( + name="lapstyle_stars", + type="CV/style_transfer", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class Lapstyle_stars: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "lapstyle_stars.pdparams") + + self.network = LapStylePredictor(weight_path=self.pretrained_model) + + def style_transfer(self, + images: list = None, + paths: list = None, + output_dir: str = './transfer_result/', + use_gpu: bool = False, + visualization: bool = True): + ''' + Transfer a image to stars style. + + images (list[dict]): data of images, each element is a dict: + - content (numpy.ndarray): input image,shape is \[H, W, C\],BGR format;
+ - style (numpy.ndarray) : style image,shape is \[H, W, C\],BGR format;
+ paths (list[dict]): paths to images, eacg element is a dict: + - content (str): path to input image;
+ - style (str) : path to style image;
+ + output_dir (str): the dir to save the results + use_gpu (bool): if True, use gpu to perform the computation, otherwise cpu. + visualization (bool): if True, save results in output_dir. + ''' + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image_dict in images: + content_img = image_dict['content'] + style_img = image_dict['style'] + results.append(self.network.run(content_img, style_img)) + + if paths != None: + for path_dict in paths: + content_img = cv2.imread(path_dict['content']) + style_img = cv2.imread(path_dict['style']) + results.append(self.network.run(content_img, style_img)) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + cv2.imwrite(os.path.join(output_dir, 'output_{}.png'.format(i)), out[:, :, ::-1]) + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + + self.style_transfer( + paths=[{ + 'content': self.args.content, + 'style': self.args.style + }], + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = copy.deepcopy(images) + for image in images_decode: + image['content'] = base64_to_cv2(image['content']) + image['style'] = base64_to_cv2(image['style']) + results = self.style_transfer(images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='transfer_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--content', type=str, help="path to content image.") + self.arg_input_group.add_argument('--style', type=str, help="path to style image.") diff --git a/modules/image/Image_gan/style_transfer/lapstyle_stars/requirements.txt b/modules/image/Image_gan/style_transfer/lapstyle_stars/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..67e9bb6fa840355e9ed0d44b7134850f1fe22fe1 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_stars/requirements.txt @@ -0,0 +1 @@ +ppgan diff --git a/modules/image/Image_gan/style_transfer/lapstyle_stars/util.py b/modules/image/Image_gan/style_transfer/lapstyle_stars/util.py new file mode 100644 index 0000000000000000000000000000000000000000..531a0ae0d487822a870ba7f09817e658967aff10 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/lapstyle_stars/util.py @@ -0,0 +1,11 @@ +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/Image_gan/style_transfer/msgnet/README.md b/modules/image/Image_gan/style_transfer/msgnet/README.md index b2ead3a2a4c3e185ef2edf31c8b0e8ceac817451..8314a252f61cb92a8d121d129c6ee47ea9f8ad65 100644 --- a/modules/image/Image_gan/style_transfer/msgnet/README.md +++ b/modules/image/Image_gan/style_transfer/msgnet/README.md @@ -50,13 +50,14 @@ $ hub run msgnet --input_path "/PATH/TO/ORIGIN/IMAGE" --style_path "/PATH/TO/STY - ### 2.预测代码示例 + ```python import paddle import paddlehub as hub if __name__ == '__main__': model = hub.Module(name='msgnet') - result = model.predict(origin=["venice-boat.jpg"], style="candy.jpg", visualization=True, save_path ='style_tranfer') + result = model.predict(origin=["/PATH/TO/ORIGIN/IMAGE"], style="/PATH/TO/STYLE/IMAGE", visualization=True, save_path ="/PATH/TO/SAVE/IMAGE") ``` @@ -86,7 +87,7 @@ if __name__ == '__main__': - `transforms`: 数据预处理方式。 - `mode`: 选择数据模式,可选项有 `train`, `test`, 默认为`train`。 - - 数据集的准备代码可以参考 [minicoco.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.MiniCOCO()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + - 数据集的准备代码可以参考 [minicoco.py](../../paddlehub/datasets/minicoco.py)。`hub.datasets.MiniCOCO()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 - Step3: 加载预训练模型 @@ -117,7 +118,7 @@ if __name__ == '__main__': if __name__ == '__main__': model = hub.Module(name='msgnet', load_checkpoint="/PATH/TO/CHECKPOINT") - result = model.predict(origin=["venice-boat.jpg"], style="candy.jpg", visualization=True, save_path ='style_tranfer') + result = model.predict(origin=["/PATH/TO/ORIGIN/IMAGE"], style="/PATH/TO/STYLE/IMAGE", visualization=True, save_path ="/PATH/TO/SAVE/IMAGE") ``` - 参数配置正确后,请执行脚本`python predict.py`, 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 diff --git a/modules/image/Image_gan/style_transfer/msgnet/README_en.md b/modules/image/Image_gan/style_transfer/msgnet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..30d978b85d329b6fe64d2f86d3b868486e56af95 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/msgnet/README_en.md @@ -0,0 +1,185 @@ +# msgnet + +|Module Name|msgnet| +| :--- | :---: | +|Category|Image editing| +|Network|msgnet| +|Dataset|COCO2014| +|Fine-tuning supported or not|Yes| +|Module Size|68MB| +|Data indicators|-| +|Latest update date|2021-07-29| + + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - Msgnet is a style transfer model. We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to [msgnet](https://github.com/zhanghang1989/PyTorch-Multi-Style-Transfer) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install msgnet + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ``` + $ hub run msgnet --input_path "/PATH/TO/ORIGIN/IMAGE" --style_path "/PATH/TO/STYLE/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + + +- ### 2、Prediction Code Example + + - ```python + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='msgnet') + result = model.predict(origin=["/PATH/TO/ORIGIN/IMAGE"], style="/PATH/TO/STYLE/IMAGE", visualization=True, save_path ="/PATH/TO/SAVE/IMAGE") + ``` + +- ### 3.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the msgnet model to fine-tune datasets such as [MiniCOCO](../../docs/reference/datasets.md#class-hubdatasetsMiniCOCO) by executing `python train.py`. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + import paddlehub.vision.transforms as T + + transform = T.Compose([T.Resize((256, 256), interpolation='LINEAR')]) + ``` + + - `transforms` The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + - ```python + from paddlehub.datasets.minicoco import MiniCOCO + + styledata = MiniCOCO(transform=transform, mode='train') + + ``` + * `transforms`: data preprocessing methods. + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + - Dataset preparation can be referred to [minicoco.py](../../paddlehub/datasets/minicoco.py). `hub.datasets.MiniCOCO()` will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + model = hub.Module(name='msgnet', load_checkpoint=None) + ``` + * `name`: model name. + * `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_style_ckpt') + trainer.train(styledata, epochs=101, batch_size=4, eval_dataset=styledata, log_interval=10, save_interval=10) + ``` + + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + - ```python + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='msgnet', load_checkpoint="/PATH/TO/CHECKPOINT") + result = model.predict(origin=["/PATH/TO/ORIGIN/IMAGE"], style="/PATH/TO/STYLE/IMAGE", visualization=True, save_path ="/PATH/TO/SAVE/IMAGE") + ``` + + - **Parameters** + * `origin`: Image path or ndarray data with format [H, W, C], BGR. + * `style`: Style image path. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'style_tranfer'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of style transfer. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m msgnet + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + - ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # Send an HTTP request + org_im = cv2.imread('/PATH/TO/ORIGIN/IMAGE') + style_im = cv2.imread('/PATH/TO/STYLE/IMAGE') + data = {'images':[[cv2_to_base64(org_im)], cv2_to_base64(style_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/msgnet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data = base64_to_cv2(r.json()["results"]['data'][0]) + cv2.imwrite('style.png', data) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/Image_gan/style_transfer/paint_transformer/README.md b/modules/image/Image_gan/style_transfer/paint_transformer/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ca1309b8cb9d03c87bcd2ce67151f3f5c59bf60a --- /dev/null +++ b/modules/image/Image_gan/style_transfer/paint_transformer/README.md @@ -0,0 +1,134 @@ +# paint_transformer + +|模型名称|paint_transformer| +| :--- | :---: | +|类别|图像 - 风格转换| +|网络|Paint Transformer| +|数据集|百度自建数据集| +|是否支持Fine-tuning|否| +|模型大小|77MB| +|最新更新日期|2021-12-07| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入图像 +
+ +
+ 输出图像 +
+

+ +- ### 模型介绍 + + - 该模型可以实现图像油画风格的转换。 + - 更多详情参考:[Paint Transformer: Feed Forward Neural Painting with Stroke Prediction](https://github.com/wzmsltw/PaintTransformer) + + + +## 二、安装 + +- ### 1、环境依赖 + - ppgan + +- ### 2、安装 + + - ```shell + $ hub install paint_transformer + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a file + $ hub run paint_transformer --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现风格转换模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="paint_transformer") + input_path = ["/PATH/TO/IMAGE"] + # Read from a file + module.style_transfer(paths=input_path, output_dir='./transfer_result/', use_gpu=True) + ``` + +- ### 3、API + + - ```python + style_transfer(images=None, paths=None, output_dir='./transfer_result/', use_gpu=False, need_animation=False, visualization=True): + ``` + - 油画风格转换API。 + + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
+ - paths (list\[str\]): 图片的路径;
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - need_animation(bool): 是否保存中间结果形成动画 + - visualization(bool): 是否保存结果到本地文件夹 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线油画风格转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m paint_transformer + ``` + + - 这样就完成了一个油画风格转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/paint_transformer" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install paint_transformer==1.0.0 + ``` diff --git a/modules/image/Image_gan/style_transfer/paint_transformer/inference.py b/modules/image/Image_gan/style_transfer/paint_transformer/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..5bd2c1113549ceb7c74ab1445c0d39a92a475842 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/paint_transformer/inference.py @@ -0,0 +1,72 @@ +import numpy as np +from PIL import Image +import network +import os +import math +import render_utils +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import cv2 +import render_parallel +import render_serial + + +def main(input_path, model_path, output_dir, need_animation=False, resize_h=None, resize_w=None, serial=False): + if not os.path.exists(output_dir): + os.mkdir(output_dir) + input_name = os.path.basename(input_path) + output_path = os.path.join(output_dir, input_name) + frame_dir = None + if need_animation: + if not serial: + print('It must be under serial mode if animation results are required, so serial flag is set to True!') + serial = True + frame_dir = os.path.join(output_dir, input_name[:input_name.find('.')]) + if not os.path.exists(frame_dir): + os.mkdir(frame_dir) + stroke_num = 8 + + #* ----- load model ----- *# + paddle.set_device('gpu') + net_g = network.Painter(5, stroke_num, 256, 8, 3, 3) + net_g.set_state_dict(paddle.load(model_path)) + net_g.eval() + for param in net_g.parameters(): + param.stop_gradient = True + + #* ----- load brush ----- *# + brush_large_vertical = render_utils.read_img('brush/brush_large_vertical.png', 'L') + brush_large_horizontal = render_utils.read_img('brush/brush_large_horizontal.png', 'L') + meta_brushes = paddle.concat([brush_large_vertical, brush_large_horizontal], axis=0) + + import time + t0 = time.time() + + original_img = render_utils.read_img(input_path, 'RGB', resize_h, resize_w) + if serial: + final_result_list = render_serial.render_serial(original_img, net_g, meta_brushes) + if need_animation: + + print("total frame:", len(final_result_list)) + for idx, frame in enumerate(final_result_list): + cv2.imwrite(os.path.join(frame_dir, '%03d.png' % idx), frame) + else: + cv2.imwrite(output_path, final_result_list[-1]) + else: + final_result = render_parallel.render_parallel(original_img, net_g, meta_brushes) + cv2.imwrite(output_path, final_result) + + print("total infer time:", time.time() - t0) + + +if __name__ == '__main__': + + main( + input_path='input/chicago.jpg', + model_path='paint_best.pdparams', + output_dir='output/', + need_animation=True, # whether need intermediate results for animation. + resize_h=512, # resize original input to this size. None means do not resize. + resize_w=512, # resize original input to this size. None means do not resize. + serial=True) # if need animation, serial must be True. diff --git a/modules/image/Image_gan/style_transfer/paint_transformer/model.py b/modules/image/Image_gan/style_transfer/paint_transformer/model.py new file mode 100644 index 0000000000000000000000000000000000000000..b9f40a3ec0210a961fd90191e228f83712fd5781 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/paint_transformer/model.py @@ -0,0 +1,68 @@ +import paddle +import paddle.nn as nn +import math + + +class Painter(nn.Layer): + """ + network architecture written in paddle. + """ + + def __init__(self, param_per_stroke, total_strokes, hidden_dim, n_heads=8, n_enc_layers=3, n_dec_layers=3): + super().__init__() + self.enc_img = nn.Sequential( + nn.Pad2D([1, 1, 1, 1], 'reflect'), + nn.Conv2D(3, 32, 3, 1), + nn.BatchNorm2D(32), + nn.ReLU(), # maybe replace with the inplace version + nn.Pad2D([1, 1, 1, 1], 'reflect'), + nn.Conv2D(32, 64, 3, 2), + nn.BatchNorm2D(64), + nn.ReLU(), + nn.Pad2D([1, 1, 1, 1], 'reflect'), + nn.Conv2D(64, 128, 3, 2), + nn.BatchNorm2D(128), + nn.ReLU()) + self.enc_canvas = nn.Sequential( + nn.Pad2D([1, 1, 1, 1], 'reflect'), nn.Conv2D(3, 32, 3, 1), nn.BatchNorm2D(32), nn.ReLU(), + nn.Pad2D([1, 1, 1, 1], 'reflect'), nn.Conv2D(32, 64, 3, 2), nn.BatchNorm2D(64), nn.ReLU(), + nn.Pad2D([1, 1, 1, 1], 'reflect'), nn.Conv2D(64, 128, 3, 2), nn.BatchNorm2D(128), nn.ReLU()) + self.conv = nn.Conv2D(128 * 2, hidden_dim, 1) + self.transformer = nn.Transformer(hidden_dim, n_heads, n_enc_layers, n_dec_layers) + self.linear_param = nn.Sequential( + nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), + nn.Linear(hidden_dim, param_per_stroke)) + self.linear_decider = nn.Linear(hidden_dim, 1) + self.query_pos = paddle.static.create_parameter([total_strokes, hidden_dim], + dtype='float32', + default_initializer=nn.initializer.Uniform(0, 1)) + self.row_embed = paddle.static.create_parameter([8, hidden_dim // 2], + dtype='float32', + default_initializer=nn.initializer.Uniform(0, 1)) + self.col_embed = paddle.static.create_parameter([8, hidden_dim // 2], + dtype='float32', + default_initializer=nn.initializer.Uniform(0, 1)) + + def forward(self, img, canvas): + """ + prediction + """ + b, _, H, W = img.shape + img_feat = self.enc_img(img) + canvas_feat = self.enc_canvas(canvas) + h, w = img_feat.shape[-2:] + feat = paddle.concat([img_feat, canvas_feat], axis=1) + feat_conv = self.conv(feat) + + pos_embed = paddle.concat([ + self.col_embed[:w].unsqueeze(0).tile([h, 1, 1]), + self.row_embed[:h].unsqueeze(1).tile([1, w, 1]), + ], + axis=-1).flatten(0, 1).unsqueeze(1) + + hidden_state = self.transformer((pos_embed + feat_conv.flatten(2).transpose([2, 0, 1])).transpose([1, 0, 2]), + self.query_pos.unsqueeze(1).tile([1, b, 1]).transpose([1, 0, 2])) + + param = self.linear_param(hidden_state) + decision = self.linear_decider(hidden_state) + return param, decision diff --git a/modules/image/Image_gan/style_transfer/paint_transformer/module.py b/modules/image/Image_gan/style_transfer/paint_transformer/module.py new file mode 100644 index 0000000000000000000000000000000000000000..d77f8e06025fe281b43d771d36354dc9bd38db2a --- /dev/null +++ b/modules/image/Image_gan/style_transfer/paint_transformer/module.py @@ -0,0 +1,160 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import copy + +import paddle +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable, serving +import numpy as np +import cv2 +from skimage.io import imread +from skimage.transform import rescale, resize + +from .model import Painter +from .render_utils import totensor, read_img +from .render_serial import render_serial +from .util import base64_to_cv2 + + +@moduleinfo( + name="paint_transformer", + type="CV/style_transfer", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class paint_transformer: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "paint_best.pdparams") + + self.network = Painter(5, 8, 256, 8, 3, 3) + self.network.set_state_dict(paddle.load(self.pretrained_model)) + self.network.eval() + for param in self.network.parameters(): + param.stop_gradient = True + #* ----- load brush ----- *# + brush_large_vertical = read_img(os.path.join(self.directory, 'brush/brush_large_vertical.png'), 'L') + brush_large_horizontal = read_img(os.path.join(self.directory, 'brush/brush_large_horizontal.png'), 'L') + self.meta_brushes = paddle.concat([brush_large_vertical, brush_large_horizontal], axis=0) + + def style_transfer(self, + images: list = None, + paths: list = None, + output_dir: str = './transfer_result/', + use_gpu: bool = False, + need_animation: bool = False, + visualization: bool = True): + ''' + + + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR(read by cv2). + paths (list[str]): paths to images + output_dir (str): the dir to save the results + use_gpu (bool): if True, use gpu to perform the computation, otherwise cpu. + need_animation (bool): if True, save every frame to show the process of painting. + visualization (bool): if True, save results in output_dir. + ''' + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + image = totensor(image) + final_result_list = render_serial(image, self.network, self.meta_brushes) + results.append(final_result_list) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + image = totensor(image) + final_result_list = render_serial(image, self.network, self.meta_brushes) + results.append(final_result_list) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + if out: + if need_animation: + curoutputdir = os.path.join(output_dir, 'output_{}'.format(i)) + if not os.path.exists(curoutputdir): + os.makedirs(curoutputdir, exist_ok=True) + for j, outimg in enumerate(out): + cv2.imwrite(os.path.join(curoutputdir, 'frame_{}.png'.format(j)), outimg) + else: + cv2.imwrite(os.path.join(output_dir, 'output_{}.png'.format(i)), out[-1]) + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.style_transfer( + paths=[self.args.input_path], + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + need_animation=self.args.need_animation, + visualization=self.args.visualization) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.style_transfer(images=images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='transfer_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + self.arg_config_group.add_argument( + '--need_animation', type=bool, default=False, help='save intermediate results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/Image_gan/style_transfer/paint_transformer/render_parallel.py b/modules/image/Image_gan/style_transfer/paint_transformer/render_parallel.py new file mode 100644 index 0000000000000000000000000000000000000000..a58ebec4bdae82881c8339dd6cae81ddc11407c2 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/paint_transformer/render_parallel.py @@ -0,0 +1,247 @@ +import render_utils +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import numpy as np +import math + + +def crop(img, h, w): + H, W = img.shape[-2:] + pad_h = (H - h) // 2 + pad_w = (W - w) // 2 + remainder_h = (H - h) % 2 + remainder_w = (W - w) % 2 + img = img[:, :, pad_h:H - pad_h - remainder_h, pad_w:W - pad_w - remainder_w] + return img + + +def stroke_net_predict(img_patch, result_patch, patch_size, net_g, stroke_num, patch_num): + """ + stroke_net_predict + """ + img_patch = img_patch.transpose([0, 2, 1]).reshape([-1, 3, patch_size, patch_size]) + result_patch = result_patch.transpose([0, 2, 1]).reshape([-1, 3, patch_size, patch_size]) + #*----- Stroke Predictor -----*# + shape_param, stroke_decision = net_g(img_patch, result_patch) + stroke_decision = (stroke_decision > 0).astype('float32') + #*----- sampling color -----*# + grid = shape_param[:, :, :2].reshape([img_patch.shape[0] * stroke_num, 1, 1, 2]) + img_temp = img_patch.unsqueeze(1).tile([1, stroke_num, 1, 1, + 1]).reshape([img_patch.shape[0] * stroke_num, 3, patch_size, patch_size]) + color = nn.functional.grid_sample( + img_temp, 2 * grid - 1, align_corners=False).reshape([img_patch.shape[0], stroke_num, 3]) + param = paddle.concat([shape_param, color], axis=-1) + + param = param.reshape([-1, 8]) + param[:, :2] = param[:, :2] / 2 + 0.25 + param[:, 2:4] = param[:, 2:4] / 2 + param = param.reshape([1, patch_num, patch_num, stroke_num, 8]) + decision = stroke_decision.reshape([1, patch_num, patch_num, stroke_num]) #.astype('bool') + return param, decision + + +def param2img_parallel(param, decision, meta_brushes, cur_canvas, stroke_num=8): + """ + Input stroke parameters and decisions for each patch, meta brushes, current canvas, frame directory, + and whether there is a border (if intermediate painting results are required). + Output the painting results of adding the corresponding strokes on the current canvas. + Args: + param: a tensor with shape batch size x patch along height dimension x patch along width dimension + x n_stroke_per_patch x n_param_per_stroke + decision: a 01 tensor with shape batch size x patch along height dimension x patch along width dimension + x n_stroke_per_patch + meta_brushes: a tensor with shape 2 x 3 x meta_brush_height x meta_brush_width. + The first slice on the batch dimension denotes vertical brush and the second one denotes horizontal brush. + cur_canvas: a tensor with shape batch size x 3 x H x W, + where H and W denote height and width of padded results of original images. + + Returns: + cur_canvas: a tensor with shape batch size x 3 x H x W, denoting painting results. + """ + # param: b, h, w, stroke_per_patch, param_per_stroke + # decision: b, h, w, stroke_per_patch + b, h, w, s, p = param.shape + h, w = int(h), int(w) + param = param.reshape([-1, 8]) + decision = decision.reshape([-1, 8]) + + H, W = cur_canvas.shape[-2:] + is_odd_y = h % 2 == 1 + is_odd_x = w % 2 == 1 + render_size_y = 2 * H // h + render_size_x = 2 * W // w + + even_idx_y = paddle.arange(0, h, 2) + even_idx_x = paddle.arange(0, w, 2) + if h > 1: + odd_idx_y = paddle.arange(1, h, 2) + if w > 1: + odd_idx_x = paddle.arange(1, w, 2) + + cur_canvas = F.pad(cur_canvas, [render_size_x // 4, render_size_x // 4, render_size_y // 4, render_size_y // 4]) + + valid_foregrounds = render_utils.param2stroke(param, render_size_y, render_size_x, meta_brushes) + + #* ----- load dilation/erosion ---- *# + dilation = render_utils.Dilation2d(m=1) + erosion = render_utils.Erosion2d(m=1) + + #* ----- generate alphas ----- *# + valid_alphas = (valid_foregrounds > 0).astype('float32') + valid_foregrounds = valid_foregrounds.reshape([-1, stroke_num, 1, render_size_y, render_size_x]) + valid_alphas = valid_alphas.reshape([-1, stroke_num, 1, render_size_y, render_size_x]) + + temp = [dilation(valid_foregrounds[:, i, :, :, :]) for i in range(stroke_num)] + valid_foregrounds = paddle.stack(temp, axis=1) + valid_foregrounds = valid_foregrounds.reshape([-1, 1, render_size_y, render_size_x]) + + temp = [erosion(valid_alphas[:, i, :, :, :]) for i in range(stroke_num)] + valid_alphas = paddle.stack(temp, axis=1) + valid_alphas = valid_alphas.reshape([-1, 1, render_size_y, render_size_x]) + + foregrounds = valid_foregrounds.reshape([-1, h, w, stroke_num, 1, render_size_y, render_size_x]) + alphas = valid_alphas.reshape([-1, h, w, stroke_num, 1, render_size_y, render_size_x]) + decision = decision.reshape([-1, h, w, stroke_num, 1, 1, 1]) + param = param.reshape([-1, h, w, stroke_num, 8]) + + def partial_render(this_canvas, patch_coord_y, patch_coord_x): + canvas_patch = F.unfold( + this_canvas, [render_size_y, render_size_x], strides=[render_size_y // 2, render_size_x // 2]) + # canvas_patch: b, 3 * py * px, h * w + canvas_patch = canvas_patch.reshape([b, 3, render_size_y, render_size_x, h, w]) + canvas_patch = canvas_patch.transpose([0, 4, 5, 1, 2, 3]) + selected_canvas_patch = paddle.gather(canvas_patch, patch_coord_y, 1) + selected_canvas_patch = paddle.gather(selected_canvas_patch, patch_coord_x, 2) + selected_canvas_patch = selected_canvas_patch.reshape([0, 0, 0, 1, 3, render_size_y, render_size_x]) + selected_foregrounds = paddle.gather(foregrounds, patch_coord_y, 1) + selected_foregrounds = paddle.gather(selected_foregrounds, patch_coord_x, 2) + selected_alphas = paddle.gather(alphas, patch_coord_y, 1) + selected_alphas = paddle.gather(selected_alphas, patch_coord_x, 2) + selected_decisions = paddle.gather(decision, patch_coord_y, 1) + selected_decisions = paddle.gather(selected_decisions, patch_coord_x, 2) + selected_color = paddle.gather(param, patch_coord_y, 1) + selected_color = paddle.gather(selected_color, patch_coord_x, 2) + selected_color = paddle.gather(selected_color, paddle.to_tensor([5, 6, 7]), 4) + selected_color = selected_color.reshape([0, 0, 0, stroke_num, 3, 1, 1]) + + for i in range(stroke_num): + i = paddle.to_tensor(i) + + cur_foreground = paddle.gather(selected_foregrounds, i, 3) + cur_alpha = paddle.gather(selected_alphas, i, 3) + cur_decision = paddle.gather(selected_decisions, i, 3) + cur_color = paddle.gather(selected_color, i, 3) + cur_foreground = cur_foreground * cur_color + selected_canvas_patch = cur_foreground * cur_alpha * cur_decision + selected_canvas_patch * ( + 1 - cur_alpha * cur_decision) + + selected_canvas_patch = selected_canvas_patch.reshape([0, 0, 0, 3, render_size_y, render_size_x]) + this_canvas = selected_canvas_patch.transpose([0, 3, 1, 4, 2, 5]) + + # this_canvas: b, 3, h_half, py, w_half, px + h_half = this_canvas.shape[2] + w_half = this_canvas.shape[4] + this_canvas = this_canvas.reshape([b, 3, h_half * render_size_y, w_half * render_size_x]) + # this_canvas: b, 3, h_half * py, w_half * px + return this_canvas + + # even - even area + # 1 | 0 + # 0 | 0 + canvas = partial_render(cur_canvas, even_idx_y, even_idx_x) + if not is_odd_y: + canvas = paddle.concat([canvas, cur_canvas[:, :, -render_size_y // 2:, :canvas.shape[3]]], axis=2) + if not is_odd_x: + canvas = paddle.concat([canvas, cur_canvas[:, :, :canvas.shape[2], -render_size_x // 2:]], axis=3) + cur_canvas = canvas + + # odd - odd area + # 0 | 0 + # 0 | 1 + if h > 1 and w > 1: + canvas = partial_render(cur_canvas, odd_idx_y, odd_idx_x) + canvas = paddle.concat([cur_canvas[:, :, :render_size_y // 2, -canvas.shape[3]:], canvas], axis=2) + canvas = paddle.concat([cur_canvas[:, :, -canvas.shape[2]:, :render_size_x // 2], canvas], axis=3) + if is_odd_y: + canvas = paddle.concat([canvas, cur_canvas[:, :, -render_size_y // 2:, :canvas.shape[3]]], axis=2) + if is_odd_x: + canvas = paddle.concat([canvas, cur_canvas[:, :, :canvas.shape[2], -render_size_x // 2:]], axis=3) + cur_canvas = canvas + + # odd - even area + # 0 | 0 + # 1 | 0 + if h > 1: + canvas = partial_render(cur_canvas, odd_idx_y, even_idx_x) + canvas = paddle.concat([cur_canvas[:, :, :render_size_y // 2, :canvas.shape[3]], canvas], axis=2) + if is_odd_y: + canvas = paddle.concat([canvas, cur_canvas[:, :, -render_size_y // 2:, :canvas.shape[3]]], axis=2) + if not is_odd_x: + canvas = paddle.concat([canvas, cur_canvas[:, :, :canvas.shape[2], -render_size_x // 2:]], axis=3) + cur_canvas = canvas + + # odd - even area + # 0 | 1 + # 0 | 0 + if w > 1: + canvas = partial_render(cur_canvas, even_idx_y, odd_idx_x) + canvas = paddle.concat([cur_canvas[:, :, :canvas.shape[2], :render_size_x // 2], canvas], axis=3) + if not is_odd_y: + canvas = paddle.concat([canvas, cur_canvas[:, :, -render_size_y // 2:, -canvas.shape[3]:]], axis=2) + if is_odd_x: + canvas = paddle.concat([canvas, cur_canvas[:, :, :canvas.shape[2], -render_size_x // 2:]], axis=3) + cur_canvas = canvas + + cur_canvas = cur_canvas[:, :, render_size_y // 4:-render_size_y // 4, render_size_x // 4:-render_size_x // 4] + + return cur_canvas + + +def render_parallel(original_img, net_g, meta_brushes): + + patch_size = 32 + stroke_num = 8 + + with paddle.no_grad(): + + original_h, original_w = original_img.shape[-2:] + K = max(math.ceil(math.log2(max(original_h, original_w) / patch_size)), 0) + original_img_pad_size = patch_size * (2**K) + original_img_pad = render_utils.pad(original_img, original_img_pad_size, original_img_pad_size) + final_result = paddle.zeros_like(original_img) + + for layer in range(0, K + 1): + layer_size = patch_size * (2**layer) + + img = F.interpolate(original_img_pad, (layer_size, layer_size)) + result = F.interpolate(final_result, (layer_size, layer_size)) + img_patch = F.unfold(img, [patch_size, patch_size], strides=[patch_size, patch_size]) + result_patch = F.unfold(result, [patch_size, patch_size], strides=[patch_size, patch_size]) + + # There are patch_num * patch_num patches in total + patch_num = (layer_size - patch_size) // patch_size + 1 + param, decision = stroke_net_predict(img_patch, result_patch, patch_size, net_g, stroke_num, patch_num) + + #print(param.shape, decision.shape) + final_result = param2img_parallel(param, decision, meta_brushes, final_result) + + # paint another time for last layer + border_size = original_img_pad_size // (2 * patch_num) + img = F.interpolate(original_img_pad, (layer_size, layer_size)) + result = F.interpolate(final_result, (layer_size, layer_size)) + img = F.pad(img, [patch_size // 2, patch_size // 2, patch_size // 2, patch_size // 2]) + result = F.pad(result, [patch_size // 2, patch_size // 2, patch_size // 2, patch_size // 2]) + img_patch = F.unfold(img, [patch_size, patch_size], strides=[patch_size, patch_size]) + result_patch = F.unfold(result, [patch_size, patch_size], strides=[patch_size, patch_size]) + final_result = F.pad(final_result, [border_size, border_size, border_size, border_size]) + patch_num = (img.shape[2] - patch_size) // patch_size + 1 + #w = (img.shape[3] - patch_size) // patch_size + 1 + + param, decision = stroke_net_predict(img_patch, result_patch, patch_size, net_g, stroke_num, patch_num) + + final_result = param2img_parallel(param, decision, meta_brushes, final_result) + + final_result = final_result[:, :, border_size:-border_size, border_size:-border_size] + final_result = (final_result.numpy().squeeze().transpose([1, 2, 0])[:, :, ::-1] * 255).astype(np.uint8) + return final_result diff --git a/modules/image/Image_gan/style_transfer/paint_transformer/render_serial.py b/modules/image/Image_gan/style_transfer/paint_transformer/render_serial.py new file mode 100644 index 0000000000000000000000000000000000000000..b3170a29a174bc03593f44ec5d248299724c253f --- /dev/null +++ b/modules/image/Image_gan/style_transfer/paint_transformer/render_serial.py @@ -0,0 +1,280 @@ +# !/usr/bin/env python3 +""" +codes for oilpainting style transfer. +""" +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import numpy as np +from PIL import Image +import math +import cv2 +import time +from .render_utils import param2stroke, Dilation2d, Erosion2d + + +def get_single_layer_lists(param, decision, ori_img, render_size_x, render_size_y, h, w, meta_brushes, dilation, + erosion, stroke_num): + """ + get_single_layer_lists + """ + valid_foregrounds = param2stroke(param[:, :], render_size_y, render_size_x, meta_brushes) + + valid_alphas = (valid_foregrounds > 0).astype('float32') + valid_foregrounds = valid_foregrounds.reshape([-1, stroke_num, 1, render_size_y, render_size_x]) + valid_alphas = valid_alphas.reshape([-1, stroke_num, 1, render_size_y, render_size_x]) + + temp = [dilation(valid_foregrounds[:, i, :, :, :]) for i in range(stroke_num)] + valid_foregrounds = paddle.stack(temp, axis=1) + valid_foregrounds = valid_foregrounds.reshape([-1, 1, render_size_y, render_size_x]) + + temp = [erosion(valid_alphas[:, i, :, :, :]) for i in range(stroke_num)] + valid_alphas = paddle.stack(temp, axis=1) + valid_alphas = valid_alphas.reshape([-1, 1, render_size_y, render_size_x]) + + patch_y = 4 * render_size_y // 5 + patch_x = 4 * render_size_x // 5 + + img_patch = ori_img.reshape([1, 3, h, ori_img.shape[2] // h, w, ori_img.shape[3] // w]) + img_patch = img_patch.transpose([0, 2, 4, 1, 3, 5])[0] + + xid_list = [] + yid_list = [] + error_list = [] + + for flag_idx, flag in enumerate(decision.cpu().numpy()): + if flag: + flag_idx = flag_idx // stroke_num + x_id = flag_idx % w + flag_idx = flag_idx // w + y_id = flag_idx % h + xid_list.append(x_id) + yid_list.append(y_id) + + inner_fores = valid_foregrounds[:, :, render_size_y // 10:9 * render_size_y // 10, render_size_x // 10:9 * + render_size_x // 10] + inner_alpha = valid_alphas[:, :, render_size_y // 10:9 * render_size_y // 10, render_size_x // 10:9 * + render_size_x // 10] + inner_fores = inner_fores.reshape([h * w, stroke_num, 1, patch_y, patch_x]) + inner_alpha = inner_alpha.reshape([h * w, stroke_num, 1, patch_y, patch_x]) + inner_real = img_patch.reshape([h * w, 3, patch_y, patch_x]).unsqueeze(1) + + R = param[:, 5] + G = param[:, 6] + B = param[:, 7] #, G, B = param[5:] + R = R.reshape([-1, stroke_num]).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) + G = G.reshape([-1, stroke_num]).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) + B = B.reshape([-1, stroke_num]).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) + error_R = R * inner_fores - inner_real[:, :, 0:1, :, :] + error_G = G * inner_fores - inner_real[:, :, 1:2, :, :] + error_B = B * inner_fores - inner_real[:, :, 2:3, :, :] + error = paddle.abs(error_R) + paddle.abs(error_G) + paddle.abs(error_B) + + error = error * inner_alpha + error = paddle.sum(error, axis=(2, 3, 4)) / paddle.sum(inner_alpha, axis=(2, 3, 4)) + error_list = error.reshape([-1]).numpy()[decision.numpy()] + error_list = list(error_list) + + valid_foregrounds = paddle.to_tensor(valid_foregrounds.numpy()[decision.numpy()]) + valid_alphas = paddle.to_tensor(valid_alphas.numpy()[decision.numpy()]) + + selected_param = paddle.to_tensor(param.numpy()[decision.numpy()]) + return xid_list, yid_list, valid_foregrounds, valid_alphas, error_list, selected_param + + +def get_single_stroke_on_full_image_A(x_id, y_id, valid_foregrounds, valid_alphas, param, original_img, render_size_x, + render_size_y, patch_x, patch_y): + """ + get_single_stroke_on_full_image_A + """ + tmp_foreground = paddle.zeros_like(original_img) + + patch_y_num = original_img.shape[2] // patch_y + patch_x_num = original_img.shape[3] // patch_x + + brush = valid_foregrounds.unsqueeze(0) + color_map = param[5:] + brush = brush.tile([1, 3, 1, 1]) + color_map = color_map.unsqueeze(-1).unsqueeze(-1).unsqueeze(0) #.repeat(1, 1, H, W) + brush = brush * color_map + + pad_l = x_id * patch_x + pad_r = (patch_x_num - x_id - 1) * patch_x + pad_t = y_id * patch_y + pad_b = (patch_y_num - y_id - 1) * patch_y + tmp_foreground = nn.functional.pad(brush, [pad_l, pad_r, pad_t, pad_b]) + tmp_foreground = tmp_foreground[:, :, render_size_y // 10:-render_size_y // 10, render_size_x // + 10:-render_size_x // 10] + + tmp_alpha = nn.functional.pad(valid_alphas.unsqueeze(0), [pad_l, pad_r, pad_t, pad_b]) + tmp_alpha = tmp_alpha[:, :, render_size_y // 10:-render_size_y // 10, render_size_x // 10:-render_size_x // 10] + return tmp_foreground, tmp_alpha + + +def get_single_stroke_on_full_image_B(x_id, y_id, valid_foregrounds, valid_alphas, param, original_img, render_size_x, + render_size_y, patch_x, patch_y): + """ + get_single_stroke_on_full_image_B + """ + x_expand = patch_x // 2 + render_size_x // 10 + y_expand = patch_y // 2 + render_size_y // 10 + + pad_l = x_id * patch_x + pad_r = original_img.shape[3] + 2 * x_expand - (x_id * patch_x + render_size_x) + pad_t = y_id * patch_y + pad_b = original_img.shape[2] + 2 * y_expand - (y_id * patch_y + render_size_y) + + brush = valid_foregrounds.unsqueeze(0) + color_map = param[5:] + brush = brush.tile([1, 3, 1, 1]) + color_map = color_map.unsqueeze(-1).unsqueeze(-1).unsqueeze(0) #.repeat(1, 1, H, W) + brush = brush * color_map + + tmp_foreground = nn.functional.pad(brush, [pad_l, pad_r, pad_t, pad_b]) + + tmp_foreground = tmp_foreground[:, :, y_expand:-y_expand, x_expand:-x_expand] + tmp_alpha = nn.functional.pad(valid_alphas.unsqueeze(0), [pad_l, pad_r, pad_t, pad_b]) + tmp_alpha = tmp_alpha[:, :, y_expand:-y_expand, x_expand:-x_expand] + return tmp_foreground, tmp_alpha + + +def stroke_net_predict(img_patch, result_patch, patch_size, net_g, stroke_num): + """ + stroke_net_predict + """ + img_patch = img_patch.transpose([0, 2, 1]).reshape([-1, 3, patch_size, patch_size]) + result_patch = result_patch.transpose([0, 2, 1]).reshape([-1, 3, patch_size, patch_size]) + #*----- Stroke Predictor -----*# + shape_param, stroke_decision = net_g(img_patch, result_patch) + stroke_decision = (stroke_decision > 0).astype('float32') + #*----- sampling color -----*# + grid = shape_param[:, :, :2].reshape([img_patch.shape[0] * stroke_num, 1, 1, 2]) + img_temp = img_patch.unsqueeze(1).tile([1, stroke_num, 1, 1, + 1]).reshape([img_patch.shape[0] * stroke_num, 3, patch_size, patch_size]) + color = nn.functional.grid_sample( + img_temp, 2 * grid - 1, align_corners=False).reshape([img_patch.shape[0], stroke_num, 3]) + stroke_param = paddle.concat([shape_param, color], axis=-1) + + param = stroke_param.reshape([-1, 8]) + decision = stroke_decision.reshape([-1]).astype('bool') + param[:, :2] = param[:, :2] / 1.25 + 0.1 + param[:, 2:4] = param[:, 2:4] / 1.25 + return param, decision + + +def sort_strokes(params, decision, scores): + """ + sort_strokes + """ + sorted_scores, sorted_index = paddle.sort(scores, axis=1, descending=False) + sorted_params = [] + for idx in range(8): + tmp_pick_params = paddle.gather(params[:, :, idx], axis=1, index=sorted_index) + sorted_params.append(tmp_pick_params) + sorted_params = paddle.stack(sorted_params, axis=2) + sorted_decison = paddle.gather(decision.squeeze(2), axis=1, index=sorted_index) + return sorted_params, sorted_decison + + +def render_serial(original_img, net_g, meta_brushes): + + patch_size = 32 + stroke_num = 8 + H, W = original_img.shape[-2:] + K = max(math.ceil(math.log2(max(H, W) / patch_size)), 0) + + dilation = Dilation2d(m=1) + erosion = Erosion2d(m=1) + frames_per_layer = [20, 20, 30, 40, 60] + final_frame_list = [] + + with paddle.no_grad(): + #* ----- read in image and init canvas ----- *# + final_result = paddle.zeros_like(original_img) + + for layer in range(0, K + 1): + t0 = time.time() + layer_size = patch_size * (2**layer) + + img = nn.functional.interpolate(original_img, (layer_size, layer_size)) + result = nn.functional.interpolate(final_result, (layer_size, layer_size)) + img_patch = nn.functional.unfold(img, [patch_size, patch_size], strides=[patch_size, patch_size]) + result_patch = nn.functional.unfold(result, [patch_size, patch_size], strides=[patch_size, patch_size]) + h = (img.shape[2] - patch_size) // patch_size + 1 + w = (img.shape[3] - patch_size) // patch_size + 1 + render_size_y = int(1.25 * H // h) + render_size_x = int(1.25 * W // w) + + #* -------------------------------------------------------------*# + #* -------------generate strokes on window type A---------------*# + #* -------------------------------------------------------------*# + param, decision = stroke_net_predict(img_patch, result_patch, patch_size, net_g, stroke_num) + expand_img = original_img + wA_xid_list, wA_yid_list, wA_fore_list, wA_alpha_list, wA_error_list, wA_params = \ + get_single_layer_lists(param, decision, original_img, render_size_x, render_size_y, h, w, + meta_brushes, dilation, erosion, stroke_num) + + #* -------------------------------------------------------------*# + #* -------------generate strokes on window type B---------------*# + #* -------------------------------------------------------------*# + #*----- generate input canvas and target patches -----*# + wB_error_list = [] + + img = nn.functional.pad(img, [patch_size // 2, patch_size // 2, patch_size // 2, patch_size // 2]) + result = nn.functional.pad(result, [patch_size // 2, patch_size // 2, patch_size // 2, patch_size // 2]) + img_patch = nn.functional.unfold(img, [patch_size, patch_size], strides=[patch_size, patch_size]) + result_patch = nn.functional.unfold(result, [patch_size, patch_size], strides=[patch_size, patch_size]) + h += 1 + w += 1 + + param, decision = stroke_net_predict(img_patch, result_patch, patch_size, net_g, stroke_num) + + patch_y = 4 * render_size_y // 5 + patch_x = 4 * render_size_x // 5 + expand_img = nn.functional.pad(original_img, [patch_x // 2, patch_x // 2, patch_y // 2, patch_y // 2]) + wB_xid_list, wB_yid_list, wB_fore_list, wB_alpha_list, wB_error_list, wB_params = \ + get_single_layer_lists(param, decision, expand_img, render_size_x, render_size_y, h, w, + meta_brushes, dilation, erosion, stroke_num) + #* -------------------------------------------------------------*# + #* -------------rank strokes and plot stroke one by one---------*# + #* -------------------------------------------------------------*# + numA = len(wA_error_list) + numB = len(wB_error_list) + total_error_list = wA_error_list + wB_error_list + sort_list = list(np.argsort(total_error_list)) + + sample = 0 + samples = np.linspace(0, len(sort_list) - 2, frames_per_layer[layer]).astype(int) + for ii in sort_list: + ii = int(ii) + if ii < numA: + x_id = wA_xid_list[ii] + y_id = wA_yid_list[ii] + valid_foregrounds = wA_fore_list[ii] + valid_alphas = wA_alpha_list[ii] + sparam = wA_params[ii] + tmp_foreground, tmp_alpha = get_single_stroke_on_full_image_A( + x_id, y_id, valid_foregrounds, valid_alphas, sparam, original_img, render_size_x, render_size_y, + patch_x, patch_y) + else: + x_id = wB_xid_list[ii - numA] + y_id = wB_yid_list[ii - numA] + valid_foregrounds = wB_fore_list[ii - numA] + valid_alphas = wB_alpha_list[ii - numA] + sparam = wB_params[ii - numA] + tmp_foreground, tmp_alpha = get_single_stroke_on_full_image_B( + x_id, y_id, valid_foregrounds, valid_alphas, sparam, original_img, render_size_x, render_size_y, + patch_x, patch_y) + + final_result = tmp_foreground * tmp_alpha + (1 - tmp_alpha) * final_result + if sample in samples: + saveframe = (final_result.numpy().squeeze().transpose([1, 2, 0])[:, :, ::-1] * 255).astype(np.uint8) + final_frame_list.append(saveframe) + #saveframe = cv2.resize(saveframe, (ow, oh)) + + sample += 1 + print("layer %d cost: %.02f" % (layer, time.time() - t0)) + + saveframe = (final_result.numpy().squeeze().transpose([1, 2, 0])[:, :, ::-1] * 255).astype(np.uint8) + final_frame_list.append(saveframe) + return final_frame_list diff --git a/modules/image/Image_gan/style_transfer/paint_transformer/render_utils.py b/modules/image/Image_gan/style_transfer/paint_transformer/render_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..735ac983a343961939fe333b06ac2b1fec01654f --- /dev/null +++ b/modules/image/Image_gan/style_transfer/paint_transformer/render_utils.py @@ -0,0 +1,111 @@ +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import cv2 +import numpy as np +from PIL import Image +import math + + +class Erosion2d(nn.Layer): + """ + Erosion2d + """ + + def __init__(self, m=1): + super(Erosion2d, self).__init__() + self.m = m + self.pad = [m, m, m, m] + + def forward(self, x): + batch_size, c, h, w = x.shape + x_pad = F.pad(x, pad=self.pad, mode='constant', value=1e9) + channel = nn.functional.unfold(x_pad, 2 * self.m + 1, strides=1, paddings=0).reshape([batch_size, c, -1, h, w]) + result = paddle.min(channel, axis=2) + return result + + +class Dilation2d(nn.Layer): + """ + Dilation2d + """ + + def __init__(self, m=1): + super(Dilation2d, self).__init__() + self.m = m + self.pad = [m, m, m, m] + + def forward(self, x): + batch_size, c, h, w = x.shape + x_pad = F.pad(x, pad=self.pad, mode='constant', value=-1e9) + channel = nn.functional.unfold(x_pad, 2 * self.m + 1, strides=1, paddings=0).reshape([batch_size, c, -1, h, w]) + result = paddle.max(channel, axis=2) + return result + + +def param2stroke(param, H, W, meta_brushes): + """ + param2stroke + """ + b = param.shape[0] + param_list = paddle.split(param, 8, axis=1) + x0, y0, w, h, theta = [item.squeeze(-1) for item in param_list[:5]] + sin_theta = paddle.sin(math.pi * theta) + cos_theta = paddle.cos(math.pi * theta) + index = paddle.full((b, ), -1, dtype='int64').numpy() + + index[(h > w).numpy()] = 0 + index[(h <= w).numpy()] = 1 + meta_brushes_resize = F.interpolate(meta_brushes, (H, W)).numpy() + brush = paddle.to_tensor(meta_brushes_resize[index]) + + warp_00 = cos_theta / w + warp_01 = sin_theta * H / (W * w) + warp_02 = (1 - 2 * x0) * cos_theta / w + (1 - 2 * y0) * sin_theta * H / (W * w) + warp_10 = -sin_theta * W / (H * h) + warp_11 = cos_theta / h + warp_12 = (1 - 2 * y0) * cos_theta / h - (1 - 2 * x0) * sin_theta * W / (H * h) + warp_0 = paddle.stack([warp_00, warp_01, warp_02], axis=1) + warp_1 = paddle.stack([warp_10, warp_11, warp_12], axis=1) + warp = paddle.stack([warp_0, warp_1], axis=1) + grid = nn.functional.affine_grid(warp, [b, 3, H, W]) # paddle和torch默认值是反过来的 + brush = nn.functional.grid_sample(brush, grid) + return brush + + +def read_img(img_path, img_type='RGB', h=None, w=None): + """ + read img + """ + img = Image.open(img_path).convert(img_type) + if h is not None and w is not None: + img = img.resize((w, h), resample=Image.NEAREST) + img = np.array(img) + if img.ndim == 2: + img = np.expand_dims(img, axis=-1) + img = img.transpose((2, 0, 1)) + img = paddle.to_tensor(img).unsqueeze(0).astype('float32') / 255. + return img + + +def preprocess(img, w=512, h=512): + image = cv2.resize(img, (w, h), cv2.INTER_NEAREST) + image = image.transpose((2, 0, 1)) + image = paddle.to_tensor(image).unsqueeze(0).astype('float32') / 255. + return image + + +def totensor(img): + image = img.transpose((2, 0, 1)) + image = paddle.to_tensor(image).unsqueeze(0).astype('float32') / 255. + return image + + +def pad(img, H, W): + b, c, h, w = img.shape + pad_h = (H - h) // 2 + pad_w = (W - w) // 2 + remainder_h = (H - h) % 2 + remainder_w = (W - w) % 2 + expand_img = nn.functional.pad(img, [pad_w, pad_w + remainder_w, pad_h, pad_h + remainder_h]) + return expand_img diff --git a/modules/image/Image_gan/style_transfer/paint_transformer/requirements.txt b/modules/image/Image_gan/style_transfer/paint_transformer/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..67e9bb6fa840355e9ed0d44b7134850f1fe22fe1 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/paint_transformer/requirements.txt @@ -0,0 +1 @@ +ppgan diff --git a/modules/image/Image_gan/style_transfer/paint_transformer/util.py b/modules/image/Image_gan/style_transfer/paint_transformer/util.py new file mode 100644 index 0000000000000000000000000000000000000000..b88ac3562b74cadc1d4d6459a56097ca4a938a0b --- /dev/null +++ b/modules/image/Image_gan/style_transfer/paint_transformer/util.py @@ -0,0 +1,10 @@ +import base64 +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/Image_gan/style_transfer/psgan/README.md b/modules/image/Image_gan/style_transfer/psgan/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3d0b63dc1558f861d13b801c58a8a8206eac10ea --- /dev/null +++ b/modules/image/Image_gan/style_transfer/psgan/README.md @@ -0,0 +1,143 @@ +# psgan + +|模型名称|psgan| +| :--- | :---: | +|类别|图像 - 妆容迁移| +|网络|PSGAN| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|121MB| +|最新更新日期|2021-12-07| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入内容图形 +
+ +
+ 输入妆容图形 +
+ +
+ 输出图像 +
+

+ +- ### 模型介绍 + + - PSGAN模型的任务是妆容迁移, 即将任意参照图像上的妆容迁移到不带妆容的源图像上。很多人像美化应用都需要这种技术。 + + - 更多详情参考:[PSGAN: Pose and Expression Robust Spatial-Aware GAN for Customizable Makeup Transfer](https://arxiv.org/pdf/1909.06956.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + - ppgan + - dlib + +- ### 2、安装 + + - ```shell + $ hub install psgan + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a file + $ hub run psgan --content "/PATH/TO/IMAGE" --style "/PATH/TO/IMAGE1" + ``` + - 通过命令行方式实现妆容转换模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="psgan") + content = cv2.imread("/PATH/TO/IMAGE") + style = cv2.imread("/PATH/TO/IMAGE1") + results = module.makeup_transfer(images=[{'content':content, 'style':style}], output_dir='./transfer_result', use_gpu=True) + ``` + +- ### 3、API + + - ```python + makeup_transfer(images=None, paths=None, output_dir='./transfer_result/', use_gpu=False, visualization=True) + ``` + - 妆容风格转换API。 + + - **参数** + + - images (list[dict]): data of images, 每一个元素都为一个 dict,有关键字 content, style, 相应取值为: + - content (numpy.ndarray): 待转换的图片,shape 为 \[H, W, C\],BGR格式;
+ - style (numpy.ndarray) : 风格图像,shape为 \[H, W, C\],BGR格式;
+ - paths (list[str]): paths to images, 每一个元素都为一个dict, 有关键字 content, style, 相应取值为: + - content (str): 待转换的图片的路径;
+ - style (str) : 风格图像的路径;
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线妆容风格转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m psgan + ``` + + - 这样就完成了一个妆容风格转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[{'content': cv2_to_base64(cv2.imread("/PATH/TO/IMAGE")), 'style': cv2_to_base64(cv2.imread("/PATH/TO/IMAGE1"))}]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/psgan" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install psgan==1.0.0 + ``` diff --git a/modules/image/Image_gan/style_transfer/psgan/makeup.yaml b/modules/image/Image_gan/style_transfer/psgan/makeup.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05723e02b4c96c460e18affbb8774b36c5c6b532 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/psgan/makeup.yaml @@ -0,0 +1,76 @@ +epochs: 100 +output_dir: tmp +checkpoints_dir: checkpoints +find_unused_parameters: True + +model: + name: MakeupModel + generator: + name: GeneratorPSGANAttention + conv_dim: 64 + repeat_num: 6 + discriminator: + name: NLayerDiscriminator + ndf: 64 + n_layers: 3 + input_nc: 3 + norm_type: spectral + cycle_criterion: + name: L1Loss + idt_criterion: + name: L1Loss + loss_weight: 0.5 + l1_criterion: + name: L1Loss + l2_criterion: + name: MSELoss + gan_criterion: + name: GANLoss + gan_mode: lsgan + +dataset: + train: + name: MakeupDataset + trans_size: 256 + dataroot: data/MT-Dataset + cls_list: [non-makeup, makeup] + phase: train + test: + name: MakeupDataset + trans_size: 256 + dataroot: data/MT-Dataset + cls_list: [non-makeup, makeup] + phase: test + + +lr_scheduler: + name: LinearDecay + learning_rate: 0.0002 + start_epoch: 100 + decay_epochs: 100 + # will get from real dataset + iters_per_epoch: 1 + +optimizer: + optimizer_G: + name: Adam + net_names: + - netG + beta1: 0.5 + optimizer_DA: + name: Adam + net_names: + - netD_A + beta1: 0.5 + optimizer_DB: + name: Adam + net_names: + - netD_B + beta1: 0.5 + +log_config: + interval: 10 + visiual_interval: 500 + +snapshot_config: + interval: 5 diff --git a/modules/image/Image_gan/style_transfer/psgan/model.py b/modules/image/Image_gan/style_transfer/psgan/model.py new file mode 100644 index 0000000000000000000000000000000000000000..c4dcf64157b1a3a3d5a55da56cd5c82d49c13ce6 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/psgan/model.py @@ -0,0 +1,170 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import os +import sys +from pathlib import Path + +import numpy as np +import paddle +import paddle.vision.transforms as T +import ppgan.faceutils as futils +from paddle.utils.download import get_weights_path_from_url +from PIL import Image +from ppgan.models.builder import build_model +from ppgan.utils.config import get_config +from ppgan.utils.filesystem import load +from ppgan.utils.options import parse_args +from ppgan.utils.preprocess import * + + +def toImage(net_output): + img = net_output.squeeze(0).transpose((1, 2, 0)).numpy() # [1,c,h,w]->[h,w,c] + img = (img * 255.0).clip(0, 255) + img = np.uint8(img) + img = Image.fromarray(img, mode='RGB') + return img + + +PS_WEIGHT_URL = "https://paddlegan.bj.bcebos.com/models/psgan_weight.pdparams" + + +class PreProcess: + def __init__(self, config, need_parser=True): + self.img_size = 256 + self.transform = transform = T.Compose([ + T.Resize(size=256), + T.ToTensor(), + ]) + self.norm = T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) + if need_parser: + self.face_parser = futils.mask.FaceParser() + self.up_ratio = 0.6 / 0.85 + self.down_ratio = 0.2 / 0.85 + self.width_ratio = 0.2 / 0.85 + + def __call__(self, image): + face = futils.dlib.detect(image) + + if not face: + return + face_on_image = face[0] + image, face, crop_face = futils.dlib.crop(image, face_on_image, self.up_ratio, self.down_ratio, + self.width_ratio) + np_image = np.array(image) + image_trans = self.transform(np_image) + mask = self.face_parser.parse(np.float32(cv2.resize(np_image, (512, 512)))) + mask = cv2.resize(mask.numpy(), (self.img_size, self.img_size), interpolation=cv2.INTER_NEAREST) + mask = mask.astype(np.uint8) + mask_tensor = paddle.to_tensor(mask) + + lms = futils.dlib.landmarks(image, face) / image_trans.shape[:2] * self.img_size + lms = lms.round() + + P_np = generate_P_from_lmks(lms, self.img_size, self.img_size, self.img_size) + + mask_aug = generate_mask_aug(mask, lms) + + return [self.norm(image_trans).unsqueeze(0), + np.float32(mask_aug), + np.float32(P_np), + np.float32(mask)], face_on_image, crop_face + + +class PostProcess: + def __init__(self, config): + self.denoise = True + self.img_size = 256 + + def __call__(self, source: Image, result: Image): + # TODO: Refract -> name, resize + source = np.array(source) + result = np.array(result) + + height, width = source.shape[:2] + small_source = cv2.resize(source, (self.img_size, self.img_size)) + laplacian_diff = source.astype(np.float) - cv2.resize(small_source, (width, height)).astype(np.float) + result = (cv2.resize(result, (width, height)) + laplacian_diff).round().clip(0, 255).astype(np.uint8) + if self.denoise: + result = cv2.fastNlMeansDenoisingColored(result) + result = Image.fromarray(result).convert('RGB') + return result + + +class Inference: + def __init__(self, config, model_path=''): + self.model = build_model(config.model) + self.preprocess = PreProcess(config) + self.model_path = model_path + + def transfer(self, source, reference, with_face=False): + source_input, face, crop_face = self.preprocess(source) + reference_input, face, crop_face = self.preprocess(reference) + + consis_mask = np.float32(calculate_consis_mask(source_input[1], reference_input[1])) + consis_mask = paddle.to_tensor(np.expand_dims(consis_mask, 0)) + + if not (source_input and reference_input): + if with_face: + return None, None + return + + for i in range(1, len(source_input) - 1): + source_input[i] = paddle.to_tensor(np.expand_dims(source_input[i], 0)) + + for i in range(1, len(reference_input) - 1): + reference_input[i] = paddle.to_tensor(np.expand_dims(reference_input[i], 0)) + + input_data = { + 'image_A': source_input[0], + 'image_B': reference_input[0], + 'mask_A_aug': source_input[1], + 'mask_B_aug': reference_input[1], + 'P_A': source_input[2], + 'P_B': reference_input[2], + 'consis_mask': consis_mask + } + + state_dicts = load(self.model_path) + for net_name, net in self.model.nets.items(): + net.set_state_dict(state_dicts[net_name]) + result, _ = self.model.test(input_data) + min_, max_ = result.min(), result.max() + result += -min_ + result = paddle.divide(result, max_ - min_ + 1e-5) + img = toImage(result) + + if with_face: + return img, crop_face + + return img + + +class PSGANPredictor: + def __init__(self, cfg, weight_path): + self.cfg = cfg + self.weight_path = weight_path + + def run(self, source, reference): + source = Image.fromarray(source) + reference = Image.fromarray(reference) + inference = Inference(self.cfg, self.weight_path) + postprocess = PostProcess(self.cfg) + + # Transfer the psgan from reference to source. + image, face = inference.transfer(source, reference, with_face=True) + source_crop = source.crop((face.left(), face.top(), face.right(), face.bottom())) + image = postprocess(source_crop, image) + image = np.array(image) + return image diff --git a/modules/image/Image_gan/style_transfer/psgan/module.py b/modules/image/Image_gan/style_transfer/psgan/module.py new file mode 100644 index 0000000000000000000000000000000000000000..754af703458578fbda1e06e623b5ae91d3c807c0 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/psgan/module.py @@ -0,0 +1,144 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from ppgan.utils.config import get_config +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import PSGANPredictor +from .util import base64_to_cv2 +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="psgan", type="CV/gan", author="paddlepaddle", author_email="", summary="", version="1.0.0") +class psgan: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "psgan_weight.pdparams") + cfg = get_config(os.path.join(self.directory, 'makeup.yaml')) + self.network = PSGANPredictor(cfg, self.pretrained_model) + + def makeup_transfer(self, + images=None, + paths=None, + output_dir='./transfer_result/', + use_gpu=False, + visualization=True): + ''' + Transfer a image to stars style. + + images (list[dict]): data of images, 每一个元素都为一个 dict,有关键字 content, style, 相应取值为: + - content (numpy.ndarray): 待转换的图片,shape 为 \[H, W, C\],BGR格式;
+ - style (numpy.ndarray) : 妆容图像,shape为 \[H, W, C\],BGR格式;
+ paths (list[str]): paths to images, 每一个元素都为一个dict, 有关键字 content, style, 相应取值为: + - content (str): 待转换的图片的路径;
+ - style (str) : 妆容图像的路径;
+ + output_dir: the dir to save the results + use_gpu: if True, use gpu to perform the computation, otherwise cpu. + visualization: if True, save results in output_dir. + ''' + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image_dict in images: + content_img = image_dict['content'][:, :, ::-1] + style_img = image_dict['style'][:, :, ::-1] + results.append(self.network.run(content_img, style_img)) + + if paths != None: + for path_dict in paths: + content_img = cv2.imread(path_dict['content'])[:, :, ::-1] + style_img = cv2.imread(path_dict['style'])[:, :, ::-1] + results.append(self.network.run(content_img, style_img)) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + cv2.imwrite(os.path.join(output_dir, 'output_{}.png'.format(i)), out[:, :, ::-1]) + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + + self.makeup_transfer( + paths=[{ + 'content': self.args.content, + 'style': self.args.style + }], + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = copy.deepcopy(images) + for image in images_decode: + image['content'] = base64_to_cv2(image['content']) + image['style'] = base64_to_cv2(image['style']) + results = self.makeup_transfer(images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='transfer_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--content', type=str, help="path to content image.") + self.arg_input_group.add_argument('--style', type=str, help="path to style image.") diff --git a/modules/image/Image_gan/style_transfer/psgan/requirements.txt b/modules/image/Image_gan/style_transfer/psgan/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..d9bfc85782a3ee323241fe7beb87a9f281c120fe --- /dev/null +++ b/modules/image/Image_gan/style_transfer/psgan/requirements.txt @@ -0,0 +1,2 @@ +ppgan +dlib diff --git a/modules/image/Image_gan/style_transfer/psgan/util.py b/modules/image/Image_gan/style_transfer/psgan/util.py new file mode 100644 index 0000000000000000000000000000000000000000..531a0ae0d487822a870ba7f09817e658967aff10 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/psgan/util.py @@ -0,0 +1,11 @@ +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/Image_gan/style_transfer/stylepro_artistic/README.md b/modules/image/Image_gan/style_transfer/stylepro_artistic/README.md index 38f44af58a5f86c009648cf3abfac54822f33c7c..2d44dae432bdb5ce20c932d5d3e5022cfdbe27cc 100644 --- a/modules/image/Image_gan/style_transfer/stylepro_artistic/README.md +++ b/modules/image/Image_gan/style_transfer/stylepro_artistic/README.md @@ -48,7 +48,7 @@ $ hub run stylepro_artistic --input_path "/PATH/TO/IMAGE" ``` - 通过命令行方式实现风格转换模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/Image_gan/style_transfer/stylepro_artistic/README_en.md b/modules/image/Image_gan/style_transfer/stylepro_artistic/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..95484165c8def005b9b87b0a1a773210764078c6 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/stylepro_artistic/README_en.md @@ -0,0 +1,186 @@ +# stylepro_artistic + +|Module Name|stylepro_artistic| +| :--- | :---: | +|Category|image generation| +|Network|StyleProNet| +|Dataset|MS-COCO + WikiArt| +|Fine-tuning supported or not|No| +|Module Size|28MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+
+

+ +- ### Module Introduction + + - StyleProNet is a model for style transfer, which is light-weight and responds quickly. This module is based on StyleProNet, trained on WikiArt(MS-COCO) and WikiArt(style) datasets, and can be used for style transfer. For more information, please refer to [StyleProNet](https://arxiv.org/abs/2003.07694). + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install stylepro_artistic + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run stylepro_artistic --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + stylepro_artistic = hub.Module(name="stylepro_artistic") + result = stylepro_artistic.style_transfer( + images=[{ + 'content': cv2.imread('/PATH/TO/CONTENT_IMAGE'), + 'styles': [cv2.imread('/PATH/TO/STYLE_IMAGE')] + }]) + + # or + # result = stylepro_artistic.style_transfer( + # paths=[{ + # 'content': '/PATH/TO/CONTENT_IMAGE', + # 'styles': ['/PATH/TO/STYLE_IMAGE'] + # }]) + ``` + +- ### 3、API + + - ```python + def style_transfer(images=None, + paths=None, + alpha=1, + use_gpu=False, + visualization=False, + output_dir='transfer_result') + ``` + + - Style transfer API. + + - **Parameters** + - images (list\[dict\]): each element is a dict,includes: + - content (numpy.ndarray): input image array,shape is \[H, W, C\],BGR format;
+ - styles (list\[numpy.ndarray\]) : list of style image arrays,shape is \[H, W, C\],BGR format;
+ - weights (list\[float\], optioal) : weight for each style, if not set, each style has the same weight;
+ - paths (list\[dict\]): each element is a dict,includes: + - content (str): path for input image;
+ - styles (list\[str\]) : paths for style images;
+ - weights (list\[float\], optioal) : weight for each style, if not set, each style has the same weight;
+ - alpha (float) : alpha value,\[0, 1\] ,default is 1
+ - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - visualization (bool): Whether to save the results as picture files; + - output_dir (str): save path of images; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - path (str): path for input image + - data (numpy.ndarray): output image + + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of style transfer. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m stylepro_artistic + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # Send an HTTP request + data = {'images':[ + { + 'content':cv2_to_base64(cv2.imread('/PATH/TO/CONTENT_IMAGE')), + 'styles':[cv2_to_base64(cv2.imread('/PATH/TO/STYLE_IMAGE'))] + } + ]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/stylepro_artistic" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(base64_to_cv2(r.json()["results"][0]['data'])) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + - ```shell + $ hub install stylepro_artistic==1.0.1 + ``` diff --git a/modules/image/classification/DriverStatusRecognition/README_en.md b/modules/image/classification/DriverStatusRecognition/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..47c5b48b419821be70785e298df9f8d98070bb3e --- /dev/null +++ b/modules/image/classification/DriverStatusRecognition/README_en.md @@ -0,0 +1,89 @@ +# DriverStatusRecognition + +|Module Name|DriverStatusRecognition| +| :--- | :---: | +|Category|image classification| +|Network|MobileNetV3_small_ssld| +|Dataset|Distractible Driver Dataset| +|Fine-tuning supported or not|No| +|Module Size|6MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - This module can be used for recognizing distractible drivers by analysing the expression on the face. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + - paddlex >= 1.3.7 + + +- ### 2、Installation + + - ```shell + $ hub install DriverStatusRecognition + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +- ### 3、Online experience + [AI Studio](https://aistudio.baidu.com/aistudio/projectdetail/1649513) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run DriverStatusRecognition --input_path /PATH/TO/IMAGE + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="DriverStatusRecognition") + images = [cv2.imread('/PATH/TO/IMAGE')] + results = classifier.predict(images=images) + for result in results: + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - classification API. + - **Parameters** + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install DriverStatusRecognition==1.0.0 + ``` diff --git a/modules/image/classification/SnakeIdentification/README_en.md b/modules/image/classification/SnakeIdentification/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..c97d45c7c022f2c4e2342eb0697b9221915c7752 --- /dev/null +++ b/modules/image/classification/SnakeIdentification/README_en.md @@ -0,0 +1,89 @@ +# SnakeIdentification + +|Module Name|SnakeIdentification| +| :--- | :---: | +|Category|image classification| +|Network|ResNet50_vd_ssld| +|Dataset|Snake Dataset| +|Fine-tuning supported or not|No| +|Module Size|84MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - This module can be used to identify the kind of snake, and judge the toxicity. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + - paddlex >= 1.3.7 + + +- ### 2、Installation + + - ```shell + $ hub install SnakeIdentification + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +- ### 3、Online experience + [AI Studio](https://aistudio.baidu.com/aistudio/projectdetail/1646951) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run SnakeIdentification --input_path /PATH/TO/IMAGE + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="SnakeIdentification") + images = [cv2.imread('/PATH/TO/IMAGE')] + results = classifier.predict(images=images) + for result in results: + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - classification API. + - **Parameters** + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install SnakeIdentification==1.0.0 + ``` diff --git a/modules/image/classification/alexnet_imagenet/README_en.md b/modules/image/classification/alexnet_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..08c3b378441d2a377cafbd5e2b9b2f7936385f62 --- /dev/null +++ b/modules/image/classification/alexnet_imagenet/README_en.md @@ -0,0 +1,83 @@ +# alexnet_imagenet + +|Module Name|alexnet_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|AlexNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|234MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - AlexNet was a classification model proposed by Alex Krizhevsky in 2012, and gained the champion of ILSVRC 2012. This module is based on AlexNet, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install alexnet_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run alexnet_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="alexnet_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install alexnet_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/darknet53_imagenet/README_en.md b/modules/image/classification/darknet53_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..eb06fe36d007432c852c0ff609415aa6d9933ef7 --- /dev/null +++ b/modules/image/classification/darknet53_imagenet/README_en.md @@ -0,0 +1,83 @@ +# darknet53_imagenet + +|Module Name|darknet53_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|DarkNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|160MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - DarkNet is a classification model proposed by Joseph Redmon, which uses Yolov3 as backbone to extract features. This module is based on darknet53, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install darknet53_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run darknet53_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="darknet53_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install darknet53_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/densenet121_imagenet/README_en.md b/modules/image/classification/densenet121_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..a07b4e877bec9778fc3fb985540165011ef22c7f --- /dev/null +++ b/modules/image/classification/densenet121_imagenet/README_en.md @@ -0,0 +1,83 @@ +# densenet121_imagenet + +|Module Name|densenet121_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|DenseNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|34MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - DenseNet is the model in CVPR2017 best paper. Every layer outputs its result as input for the layer after it, and forms the dense connection topology. The dense connection ease the probblem of vanishing gradient and improve the information flow. This module is based on DenseNet121, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install densenet121_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run densenet121_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="densenet121_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install densenet121_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/densenet161_imagenet/README_en.md b/modules/image/classification/densenet161_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..40391e2bd0b152eced1d01bc49aec61e08113165 --- /dev/null +++ b/modules/image/classification/densenet161_imagenet/README_en.md @@ -0,0 +1,83 @@ +# densenet161_imagenet + +|Module Name|densenet161_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|DenseNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|114MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - DenseNet is the model in CVPR2017 best paper. Every layer outputs its result as input for the layer after it, and forms the dense connection topology. The dense connection ease the probblem of vanishing gradient and improve the information flow. This module is based on DenseNet161, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install densenet161_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run densenet161_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="densenet161_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install densenet161_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/densenet169_imagenet/README_en.md b/modules/image/classification/densenet169_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..78c6f11334d642f4e9c8c9f8047369952e950c5e --- /dev/null +++ b/modules/image/classification/densenet169_imagenet/README_en.md @@ -0,0 +1,83 @@ +# densenet169_imagenet + +|Module Name|densenet169_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|DenseNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|59MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - DenseNet is the model in CVPR2017 best paper. Every layer outputs its result as input for the layer after it, and forms the dense connection topology. The dense connection ease the probblem of vanishing gradient and improve the information flow. This module is based on DenseNet169, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install densenet169_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run densenet169_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="densenet169_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install densenet169_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/densenet201_imagenet/README_en.md b/modules/image/classification/densenet201_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..42f8430c66c9dc623e051524951ff0e566f344ac --- /dev/null +++ b/modules/image/classification/densenet201_imagenet/README_en.md @@ -0,0 +1,83 @@ +# densenet201_imagenet + +|Module Name|densenet201_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|DenseNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|82MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - DenseNet is the model in CVPR2017 best paper. Every layer outputs its result as input for the layer after it, and forms the dense connection topology. The dense connection ease the probblem of vanishing gradient and improve the information flow. This module is based on DenseNet201, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install densenet201_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run densenet201_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="densenet201_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install densenet201_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/densenet264_imagenet/README_en.md b/modules/image/classification/densenet264_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..02940f020856fe502096b6e6fb59dbb0bd2fa21f --- /dev/null +++ b/modules/image/classification/densenet264_imagenet/README_en.md @@ -0,0 +1,83 @@ +# densenet264_imagenet + +|Module Name|densenet264_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|DenseNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|135MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - DenseNet is the model in CVPR2017 best paper. Every layer outputs its result as input for the layer after it, and forms the dense connection topology. The dense connection ease the probblem of vanishing gradient and improve the information flow. This module is based on DenseNet264, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install densenet264_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run densenet264_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="densenet264_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install densenet264_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/dpn107_imagenet/README_en.md b/modules/image/classification/dpn107_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..20d21fdc1ba7d77c4801142b31ffe45389ce3944 --- /dev/null +++ b/modules/image/classification/dpn107_imagenet/README_en.md @@ -0,0 +1,84 @@ +# dpn107_imagenet + +|Module Name|dpn107_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|DPN| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|335MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - DPN(Dual Path Networks) is the champion of ILSVRC2017 in Object Localization Task. This module is based on DPN107, trained on ImageNet-2012, can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install dpn107_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run dpn107_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="dpn107_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install dpn107_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/dpn131_imagenet/README_en.md b/modules/image/classification/dpn131_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..cabfb0ce9516a2a5ed69a00a2e780e489ecc6e8b --- /dev/null +++ b/modules/image/classification/dpn131_imagenet/README_en.md @@ -0,0 +1,84 @@ +# dpn131_imagenet + +|Module Name|dpn131_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|DPN| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|306MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - DPN(Dual Path Networks) is the champion of ILSVRC2017 in Object Localization Task. This module is based on DPN131, trained on ImageNet-2012, can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install dpn131_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run dpn131_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="dpn131_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install dpn131_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/dpn68_imagenet/README_en.md b/modules/image/classification/dpn68_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..456da156172689ad6f0a0d804a2c96787a33935e --- /dev/null +++ b/modules/image/classification/dpn68_imagenet/README_en.md @@ -0,0 +1,83 @@ +# dpn68_imagenet + +|Module Name|dpn68_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|DPN| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|50MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - DPN(Dual Path Networks) is the champion of ILSVRC2017 in Object Localization Task. This module is based on DPN68, trained on ImageNet-2012, can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install dpn68_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run dpn68_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="dpn68_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install dpn68_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/dpn92_imagenet/README_en.md b/modules/image/classification/dpn92_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..e64de3694484d190373be57055d717d6aa976eda --- /dev/null +++ b/modules/image/classification/dpn92_imagenet/README_en.md @@ -0,0 +1,84 @@ +# dpn92_imagenet + +|Module Name|dpn92_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|DPN| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|146MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - DPN(Dual Path Networks) is the champion of ILSVRC2017 in Object Localization Task. This module is based on DPN92, trained on ImageNet-2012, can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install dpn92_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run dpn92_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="dpn92_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install dpn92_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/dpn98_imagenet/README_en.md b/modules/image/classification/dpn98_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..5faa94815944f99040bb8310951cc900d8d8919e --- /dev/null +++ b/modules/image/classification/dpn98_imagenet/README_en.md @@ -0,0 +1,85 @@ +# dpn98_imagenet + +|Module Name|dpn98_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|DPN| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|238MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - DPN(Dual Path Networks) is the champion of ILSVRC2017 in Object Localization Task. This module is based on DPN98, trained on ImageNet-2012, can predict an image of size 224*224*3. + + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install dpn98_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run dpn98_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="dpn98_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install dpn98_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/efficientnetb0_imagenet/README_en.md b/modules/image/classification/efficientnetb0_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..b9e7f8d6f7fd19aac2503d53bcf5ed59fc388fca --- /dev/null +++ b/modules/image/classification/efficientnetb0_imagenet/README_en.md @@ -0,0 +1,136 @@ +# efficientnetb0_imagenet + +|Module Name|efficientnetb0_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|EfficientNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|22MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - EfficientNet is a light-weight model proposed by google, which consists of MBConv, and takes advantage of squeeze-and-excitation operation. This module is based on EfficientNetB0, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install efficientnetb0_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run efficientnetb0_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb0_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m efficientnetb0_imagenet + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb0_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.1.0 + + Improve the prediction performance and users' experience + - ```shell + $ hub install efficientnetb0_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb0_small_imagenet/README_en.md b/modules/image/classification/efficientnetb0_small_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..6aba803d3d5920e6a6f87131aab5839c27c7e203 --- /dev/null +++ b/modules/image/classification/efficientnetb0_small_imagenet/README_en.md @@ -0,0 +1,135 @@ +# efficientnetb0_small_imagenet + +|Module Name|efficientnetb0_small_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|EfficientNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|20MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - EfficientNet is a light-weight model proposed by google, which consists of MBConv, and takes advantage of squeeze-and-excitation operation. This module is based on EfficientNetB0, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install efficientnetb0_small_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run efficientnetb0_small_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb0_small_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m efficientnetb0_small_imagenet + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb0_small_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install efficientnetb0_small_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/efficientnetb1_imagenet/README_en.md b/modules/image/classification/efficientnetb1_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..e578354f2def3d71197306700a341b4a6132cfb0 --- /dev/null +++ b/modules/image/classification/efficientnetb1_imagenet/README_en.md @@ -0,0 +1,134 @@ +# efficientnetb1_imagenet + +|Module Name|efficientnetb1_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|EfficientNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|33MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - EfficientNet is a light-weight model proposed by google, which consists of MBConv, and takes advantage of squeeze-and-excitation operation. This module is based on EfficientNetB1, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install efficientnetb1_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run efficientnetb1_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb1_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m efficientnetb1_imagenet + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb1_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.1.0 + + Improve the prediction performance and users' experience + - ```shell + $ hub install efficientnetb1_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb2_imagenet/README_en.md b/modules/image/classification/efficientnetb2_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..01f5180bd30dcb846b1b73dfdc434e42be1a9a4b --- /dev/null +++ b/modules/image/classification/efficientnetb2_imagenet/README_en.md @@ -0,0 +1,135 @@ +# efficientnetb2_imagenet + +|Module Name|efficientnetb2_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|EfficientNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|38MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - EfficientNet is a light-weight model proposed by google, which consists of MBConv, and takes advantage of squeeze-and-excitation operation. This module is based on EfficientNetB2, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install efficientnetb2_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run efficientnetb2_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb2_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m efficientnetb2_imagenet + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb2_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.1.0 + + Improve the prediction performance and users' experience + - ```shell + $ hub install efficientnetb2_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb3_imagenet/README_en.md b/modules/image/classification/efficientnetb3_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..d305e4c75fc38e15ed76f03f1bee463bdb7a2119 --- /dev/null +++ b/modules/image/classification/efficientnetb3_imagenet/README_en.md @@ -0,0 +1,134 @@ +# efficientnetb3_imagenet + +|Module Name|efficientnetb3_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|EfficientNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|51MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - EfficientNet is a light-weight model proposed by google, which consists of MBConv, and takes advantage of squeeze-and-excitation operation. This module is based on EfficientNetB3, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install efficientnetb3_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run efficientnetb3_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb3_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m efficientnetb3_imagenet + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb3_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.1.0 + + Improve the prediction performance and users' experience + - ```shell + $ hub install efficientnetb3_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb4_imagenet/README_en.md b/modules/image/classification/efficientnetb4_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..e04148127a2b6adadc6871ef95f2efc2335f2e3d --- /dev/null +++ b/modules/image/classification/efficientnetb4_imagenet/README_en.md @@ -0,0 +1,136 @@ +# efficientnetb4_imagenet + +|Module Name|efficientnetb4_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|EfficientNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|77MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - EfficientNet is a light-weight model proposed by google, which consists of MBConv, and takes advantage of squeeze-and-excitation operation. This module is based on EfficientNetB4, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install efficientnetb4_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run efficientnetb4_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb4_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m efficientnetb4_imagenet + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb4_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.1.0 + + Improve the prediction performance and users' experience + - ```shell + $ hub install efficientnetb4_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb5_imagenet/README_en.md b/modules/image/classification/efficientnetb5_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..2562ba1338b5f9ffe9525b1f2596de0ce7a7f1ba --- /dev/null +++ b/modules/image/classification/efficientnetb5_imagenet/README_en.md @@ -0,0 +1,136 @@ +# efficientnetb5_imagenet + +|Module Name|efficientnetb5_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|EfficientNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|121MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - EfficientNet is a light-weight model proposed by google, which consists of MBConv, and takes advantage of squeeze-and-excitation operation. This module is based on EfficientNetB5, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install efficientnetb5_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run efficientnetb5_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb5_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m efficientnetb5_imagenet + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb5_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.1.0 + + Improve the prediction performance and users' experience + - ```shell + $ hub install efficientnetb5_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb6_imagenet/README_en.md b/modules/image/classification/efficientnetb6_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..cc10c72624a3b4914a59b8c6e617fa258d2241e2 --- /dev/null +++ b/modules/image/classification/efficientnetb6_imagenet/README_en.md @@ -0,0 +1,135 @@ +# efficientnetb6_imagenet + +|Module Name|efficientnetb6_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|EfficientNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|170MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - EfficientNet is a light-weight model proposed by google, which consists of MBConv, and takes advantage of squeeze-and-excitation operation. This module is based on EfficientNetB6, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install efficientnetb6_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run efficientnetb6_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb6_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m efficientnetb6_imagenet + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb6_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.1.0 + + Improve the prediction performance and users' experience + - ```shell + $ hub install efficientnetb6_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb7_imagenet/README_en.md b/modules/image/classification/efficientnetb7_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..d61af6696f0642ac92dff68ec9a8bb2f2a87e1c0 --- /dev/null +++ b/modules/image/classification/efficientnetb7_imagenet/README_en.md @@ -0,0 +1,136 @@ +# efficientnetb7_imagenet + +|Module Name|efficientnetb7_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|EfficientNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|260MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - EfficientNet is a light-weight model proposed by google, which consists of MBConv, and takes advantage of squeeze-and-excitation operation. This module is based on EfficientNetB7, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install efficientnetb7_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run efficientnetb7_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb7_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m efficientnetb7_imagenet + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb7_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.1.0 + + Improve the prediction performance and users' experience + - ```shell + $ hub install efficientnetb7_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/README_en.md b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..2634e00ee0194aff5c603a1b6b05e90ffacb8224 --- /dev/null +++ b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/README_en.md @@ -0,0 +1,134 @@ +# fix_resnext101_32x48d_wsl_imagenet + +|Module Name|fix_resnext101_32x48d_wsl_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|3.1GB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNeXt is proposed by UC San Diego and Facebook AI Research in 2017. This module is based on ResNeXt model. It is weak-supervised trained on billions of socail images, finetuned on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install fix_resnext101_32x48d_wsl_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run fix_resnext101_32x48d_wsl_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="fix_resnext101_32x48d_wsl_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m fix_resnext101_32x48d_wsl_imagenet + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/fix_resnext101_32x48d_wsl_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + - ```shell + $ hub install fix_resnext101_32x48d_wsl_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/food_classification/README.md b/modules/image/classification/food_classification/README.md index 01f910138e18aee8c45d1a2f56f493d547988d50..c711de3a8029d6385a41d38cadce788ccd839886 100644 --- a/modules/image/classification/food_classification/README.md +++ b/modules/image/classification/food_classification/README.md @@ -23,8 +23,6 @@ - ### 1、环境依赖 - - paddlepaddle >= 2.0.0 - - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) - paddlex >= 1.3.7 diff --git a/modules/image/classification/food_classification/README_en.md b/modules/image/classification/food_classification/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..672e967fcdb42ba672232e711de677b1e90ac450 --- /dev/null +++ b/modules/image/classification/food_classification/README_en.md @@ -0,0 +1,87 @@ +# food_classification + +|Module Name|food_classification| +| :--- | :---: | +|Category|image classification| +|Network|ResNet50_vd_ssld| +|Dataset|Food Dataset| +|Fine-tuning supported or not|No| +|Module Size|91MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - This module can be used for food classification. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlehub >= 2.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + - paddlex >= 1.3.7 + + +- ### 2、Installation + + - ```shell + $ hub install food_classification + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run food_classification --input_path /PATH/TO/IMAGE + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="food_classification") + images = [cv2.imread('/PATH/TO/IMAGE')] + results = classifier.predict(images=images) + for result in results: + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - classification API. + - **Parameters** + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + - category_id (int): category id; + - category(str): category name; + - score(float): probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install food_classification==1.0.0 + ``` diff --git a/modules/image/classification/food_classification/requirements.txt b/modules/image/classification/food_classification/requirements.txt index f3c5b8fb12473794251e0a4669dac313cb93eff4..ac9e21c6cb27f4666c3899eb813db94629a79085 100644 --- a/modules/image/classification/food_classification/requirements.txt +++ b/modules/image/classification/food_classification/requirements.txt @@ -1,3 +1 @@ -paddlepaddle >= 2.0.0 -paddlehub >= 2.0.0 paddlex == 1.3.7 diff --git a/modules/image/classification/googlenet_imagenet/README_en.md b/modules/image/classification/googlenet_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..56e61c0ab2c0101722fa3c536ccdddd5518d9286 --- /dev/null +++ b/modules/image/classification/googlenet_imagenet/README_en.md @@ -0,0 +1,83 @@ +# googlenet_imagenet + +|Module Name|googlenet_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|GoogleNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|28MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - GoogleNet was proposed by Christian Szegedy in 2014 and gained the champion of ILSVRC 2014. This module is based on GoogleNet, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install googlenet_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run googlenet_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="googlenet_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install googlenet_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/inception_v4_imagenet/README_en.md b/modules/image/classification/inception_v4_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..c8dd3f6b30ca0827e46da28f36d35f3d8212cb5e --- /dev/null +++ b/modules/image/classification/inception_v4_imagenet/README_en.md @@ -0,0 +1,82 @@ +# inception_v4_imagenet + +|Module Name|inception_v4_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|Inception_V4| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|167MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + - Inception structure is first introduced in GoogLeNet, so GoogLeNet is named Inception-v1. Inception-v4 is an improvement on it, which takas advantage of sereral useful strategies such as batch normalization, residual learning. This module is based on Inception-v4, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install inception_v4_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run inception_v4_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="inception_v4_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install inception_v4_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/marine_biometrics/README_en.md b/modules/image/classification/marine_biometrics/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..76fd189dd19e4655c8e0cc513240ee721f23de20 --- /dev/null +++ b/modules/image/classification/marine_biometrics/README_en.md @@ -0,0 +1,84 @@ +# marine_biometrics + +|Module Name|marine_biometrics| +| :--- | :---: | +|Category|image classification| +|Network|ResNet50_vd_ssld| +|Dataset|Fish4Knowledge| +|Fine-tuning supported or not|No| +|Module Size|84MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - This module can be used to classify marine biometrics. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install marine_biometrics + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run marine_biometrics --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="marine_biometrics") + images = [cv2.imread('/PATH/TO/IMAGE')] + results = classifier.predict(images=images) + for result in results: + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - classification API. + - **Parameters** + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install marine_biometrics==1.0.0 + ``` diff --git a/modules/image/classification/mobilenet_v2_animals/README_en.md b/modules/image/classification/mobilenet_v2_animals/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..9f0835948e21c3bfbcb7cea3f3a758ef69d50052 --- /dev/null +++ b/modules/image/classification/mobilenet_v2_animals/README_en.md @@ -0,0 +1,135 @@ +# mobilenet_v2_animals + +|Module Name|mobilenet_v2_animals| +| :--- | :---: | +|Category|image classification| +|Network|MobileNet_v2| +|Dataset|Baidu Animal Dataset| +|Fine-tuning supported or not|No| +|Module Size|50MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - MobileNet is a light-weight convolution network. This module is trained on Baidu animal dataset, and can classify 7978 kinds of animals. + - For more information, please refer to:[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/pdf/1801.04381.pdf) + + + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install mobilenet_v2_animals + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run mobilenet_v2_animals --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="mobilenet_v2_animals") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m mobilenet_v2_animals + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/mobilenet_v2_animals" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + - ```shell + $ hub install mobilenet_v2_animals==1.0.0 + ``` diff --git a/modules/image/classification/mobilenet_v2_dishes/README_en.md b/modules/image/classification/mobilenet_v2_dishes/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..70d3f41a629557222b5c62d403077fdb45e46e08 --- /dev/null +++ b/modules/image/classification/mobilenet_v2_dishes/README_en.md @@ -0,0 +1,138 @@ +# mobilenet_v2_dishes + +|Module Name|mobilenet_v2_dishes| +| :--- | :---: | +|Category|image classification| +|Network|MobileNet_v2| +|Dataset|Baidu food Dataset| +|Fine-tuning supported or not|No| +|Module Size|52MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - MobileNet is a light-weight convolution network. This module is trained on Baidu food dataset, and can classify 8416 kinds of food. + +

+
+

+ + - For more information, please refer to:[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/pdf/1801.04381.pdf) + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install mobilenet_v2_dishes + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run mobilenet_v2_dishes --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="mobilenet_v2_dishes") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m mobilenet_v2_dishes + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/mobilenet_v2_dishes" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install mobilenet_v2_dishes==1.0.0 + ``` diff --git a/modules/image/classification/mobilenet_v2_imagenet/README_en.md b/modules/image/classification/mobilenet_v2_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..5f73d8f142391743b2b360547e75a7a05b79febd --- /dev/null +++ b/modules/image/classification/mobilenet_v2_imagenet/README_en.md @@ -0,0 +1,87 @@ +# mobilenet_v2_imagenet + +|Module Name|mobilenet_v2_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|Mobilenet_v2| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|15MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - MobileNet V2 is an image classification model proposed by Mark Sandler, Andrew Howard et al. in 2018. This model is a light-weight model for mobile and embedded device, and can reach high accurary with a few parameters. This module is based on MobileNet V2, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install mobilenet_v2_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run mobilenet_v2_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="mobilenet_v2_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Fix the problem of encoding in python2 + + - ```shell + $ hub install mobilenet_v2_imagenet==1.0.1 + ``` diff --git a/modules/image/classification/mobilenet_v2_imagenet_ssld/README_en.md b/modules/image/classification/mobilenet_v2_imagenet_ssld/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..ef9d8988b610331a07af12fd367689de447a9192 --- /dev/null +++ b/modules/image/classification/mobilenet_v2_imagenet_ssld/README_en.md @@ -0,0 +1,132 @@ +# mobilenet_v2_imagenet_ssld + +|Module Name|mobilenet_v2_imagenet_ssld| +| :--- | :---: | +|Category|image classification| +|Network|Mobilenet_v2| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|15MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - MobileNet V2 is an image classification model proposed by Mark Sandler, Andrew Howard et al. in 2018. This model is a light-weight model for mobile and embedded device, and can reach high accurary with a few parameters. This module is based on MobileNet V2, trained on ImageNet-2012 with SSLD distillation strategy, and can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install mobilenet_v2_imagenet_ssld + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run mobilenet_v2_imagenet_ssld --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="mobilenet_v2_imagenet_ssld") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m mobilenet_v2_imagenet_ssld + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/mobilenet_v2_imagenet_ssld" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install mobilenet_v2_imagenet_ssld==1.0.0 + ``` diff --git a/modules/image/classification/mobilenet_v3_large_imagenet_ssld/README_en.md b/modules/image/classification/mobilenet_v3_large_imagenet_ssld/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..ec1b3b31e048a9556dd662804eb4991aec113258 --- /dev/null +++ b/modules/image/classification/mobilenet_v3_large_imagenet_ssld/README_en.md @@ -0,0 +1,133 @@ +# mobilenet_v3_large_imagenet_ssld + +|Module Name|mobilenet_v3_large_imagenet_ssld| +| :--- | :---: | +|Category|image classification| +|Network|Mobilenet_v3_large| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|23MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - MobileNetV3 is an image classification model proposed by Google in 2019. The authors proposed to search the network architecture by combination of NAS and NetAdapt, and provide two versions of this model, i.e. Large and Small version. This module is based on MobileNetV3 Large, trained on ImageNet-2012 with SSLD distillation strategy, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install mobilenet_v3_large_imagenet_ssld + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run mobilenet_v3_large_imagenet_ssld --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="mobilenet_v3_large_imagenet_ssld") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m mobilenet_v3_large_imagenet_ssld + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/mobilenet_v3_large_imagenet_ssld" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install mobilenet_v3_large_imagenet_ssld==1.0.0 + ``` diff --git a/modules/image/classification/mobilenet_v3_small_imagenet_ssld/README_en.md b/modules/image/classification/mobilenet_v3_small_imagenet_ssld/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..0a77131a44f4a4fe54557b1acc07f67efaf4376b --- /dev/null +++ b/modules/image/classification/mobilenet_v3_small_imagenet_ssld/README_en.md @@ -0,0 +1,134 @@ +# mobilenet_v3_small_imagenet_ssld + +|Module Name|mobilenet_v3_small_imagenet_ssld| +| :--- | :---: | +|Category|image classification| +|Network|Mobilenet_v3_Small| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|13MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - MobileNetV3 is an image classification model proposed by Google in 2019. The authors proposed to search the network architecture by combination of NAS and NetAdapt, and provide two versions of this model, i.e. Large and Small version. This module is based on MobileNetV3 Small, trained on ImageNet-2012 with SSLD distillation strategy, and can predict an image of size 224*224*3. + + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install mobilenet_v3_small_imagenet_ssld + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run mobilenet_v3_small_imagenet_ssld --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="mobilenet_v3_small_imagenet_ssld") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m mobilenet_v3_small_imagenet_ssld + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/mobilenet_v3_small_imagenet_ssld" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install mobilenet_v3_small_imagenet_ssld==1.0.0 + ``` diff --git a/modules/image/classification/nasnet_imagenet/README_en.md b/modules/image/classification/nasnet_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..ba0f84ae8082843ce1ab670d11d7366b42664714 --- /dev/null +++ b/modules/image/classification/nasnet_imagenet/README_en.md @@ -0,0 +1,85 @@ +# nasnet_imagenet + +|Module Name|nasnet_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|NASNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|345MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + - NASNet is proposed by Google, which is trained by AutoML. This module is based on NASNet, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install nasnet_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run nasnet_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="nasnet_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Fix the problem of encoding in python2 + - ```shell + $ hub install nasnet_imagenet==1.0.1 + ``` diff --git a/modules/image/classification/pnasnet_imagenet/README_en.md b/modules/image/classification/pnasnet_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..7454af0216bb65efc0f935186b87727903172675 --- /dev/null +++ b/modules/image/classification/pnasnet_imagenet/README_en.md @@ -0,0 +1,87 @@ +# pnasnet_imagenet + +|Module Name|pnasnet_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|PNASNet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|333MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - PNASNet is proposed by Google, which is trained by AutoML. This module is based on PNASNet, trained on ImageNet-2012, and can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install pnasnet_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run pnasnet_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="pnasnet_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Fix the problem of encoding in python2 + - ```shell + $ hub install pnasnet_imagenet==1.0.1 + ``` diff --git a/modules/image/classification/res2net101_vd_26w_4s_imagenet/README_en.md b/modules/image/classification/res2net101_vd_26w_4s_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..d7f7f442acb2b04e706f2559a0b6e590fd62cd87 --- /dev/null +++ b/modules/image/classification/res2net101_vd_26w_4s_imagenet/README_en.md @@ -0,0 +1,133 @@ +# res2net101_vd_26w_4s_imagenet + +|Module Name|res2net101_vd_26w_4s_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|Res2Net| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|179MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - Res2Net is an improvement on ResNet, which can improve performance without increasing computation. This module is based on Res2Net, trained on ImageNet-2012, and can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install res2net101_vd_26w_4s_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run res2net101_vd_26w_4s_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="res2net101_vd_26w_4s_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m res2net101_vd_26w_4s_imagenet + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/res2net101_vd_26w_4s_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install res2net101_vd_26w_4s_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnet18_vd_imagenet/README_en.md b/modules/image/classification/resnet18_vd_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..6ce03023c18bd92b7962437893511cc240e083d8 --- /dev/null +++ b/modules/image/classification/resnet18_vd_imagenet/README_en.md @@ -0,0 +1,136 @@ +# resnet18_vd_imagenet + +|Module Name|resnet18_vd_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNet_vd| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|46MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNet proposed a residual unit to solve the problem of training an extremely deep network, and improved the prediction accuracy of models. ResNet-vd is a variant of ResNet. This module is based on ResNet_vd, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnet18_vd_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnet18_vd_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet18_vd_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m resnet18_vd_imagenet + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/resnet18_vd_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnet18_vd_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnet50_vd_10w/README_en.md b/modules/image/classification/resnet50_vd_10w/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..2f7bbb8c1f2e4d94fbcfb56bf2284c7a7179f643 --- /dev/null +++ b/modules/image/classification/resnet50_vd_10w/README_en.md @@ -0,0 +1,94 @@ +# resnet50_vd_10w + +|Module Name|resnet50_vd_10w| +| :--- | :---: | +|Category|image classification| +|Network|ResNet_vd| +|Dataset|Baidu Dataset| +|Fine-tuning supported or not|No| +|Module Size|92MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNet proposed a residual unit to solve the problem of training an extremely deep network, and improved the prediction accuracy of models. ResNet-vd is a variant of ResNet. This module is based on ResNet_vd, trained on Baidu dataset(consists of 100 thousand classes, 40 million pairs of data), and can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnet50_vd_10w + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet50_vd_10w") + input_dict, output_dict, program = classifier.context(trainable=True) + ``` + +- ### 2、API + + - ```python + def context(trainable=True, pretrained=True) + ``` + - **Parameters** + - trainable (bool): whether parameters are trainable;
+ - pretrained (bool): whether load the pre-trained model. + + - **Return** + - inputs (dict): model inputs,key is 'image', value is the image tensor;
+ - outputs (dict): model outputs,key is 'classification' and 'feature_map',values: + - classification (paddle.fluid.framework.Variable): classification result; + - feature\_map (paddle.fluid.framework.Variable): feature map extracted by model. + - context\_prog(fluid.Program): computation graph, used for transfer learning. + + + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - **Parameters** + - dirname: output dir for saving model;
+ - model_filename: filename of model, default is \_\_model\_\_;
+ - params_filename: filename of parameters,default is \_\_params\_\_(only effective when `combined` is True);
+ - combined: whether save parameters into one file + + + + + + +## V.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnet50_vd_10w==1.0.0 + ``` diff --git a/modules/image/classification/resnet50_vd_animals/README.md b/modules/image/classification/resnet50_vd_animals/README.md index a42168e27330a2e66d93a463ca8ce87553c2a2c8..0b7deba6c890ab1fd3a9d57d9c28afddb01b8940 100644 --- a/modules/image/classification/resnet50_vd_animals/README.md +++ b/modules/image/classification/resnet50_vd_animals/README.md @@ -44,7 +44,7 @@ hub run resnet50_vd_animals --input_path "/PATH/TO/IMAGE" ``` -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/classification/resnet50_vd_animals/README_en.md b/modules/image/classification/resnet50_vd_animals/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..031f469fc7025996491dcd5613a1d5cc7bd8f817 --- /dev/null +++ b/modules/image/classification/resnet50_vd_animals/README_en.md @@ -0,0 +1,173 @@ +# resnet50_vd_animals + +|Module Name|resnet50_vd_animals| +| :--- | :---: | +|Category |Image classification| +|Network|ResNet50_vd| +|Dataset|Baidu self-built dataset| +|Fine-tuning supported or not|No| +|Module Size|154MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I. Basic Information + +- ### Application Effect Display + + - ResNet-vd is a variant of ResNet, which can be used for image classification and feature extraction. This module is trained by Baidu self-built animal data set and supports the classification and recognition of 7,978 animal species. + - For more information, please refer to [ResNet-vd](https://arxiv.org/pdf/1812.01187.pdf) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + + +- ### 2、Installation + + - ```shell + $ hub install resnet50_vd_animals + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnet50_vd_animals --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet50_vd_animals") + + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def get_expected_image_width() + ``` + + - Returns the preprocessed image width, which is 224. + + - ```python + def get_expected_image_height() + ``` + + - Returns the preprocessed image height, which is 224. + + - ```python + def get_pretrained_images_mean() + ``` + + - Returns the mean value of the preprocessed image, which is \[0.485, 0.456, 0.406\]. + + - ```python + def get_pretrained_images_std() + ``` + + - Return the standard deviation of the preprocessed image, which is \[0.229, 0.224, 0.225\]. + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + + - **Parameter** + + * images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + * paths (list\[str\]): image path; + * batch\_size (int): batch size; + * use\_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + * top\_k (int): return the top k prediction results. + + - **Return** + + - res (list\[dict\]): the list of classification results,key is the prediction label, value is the corresponding confidence. + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + + - Save the model to the specified path. + + - **Parameters** + * dirname: Save path. + * model\_filename: model file name,defalt is \_\_model\_\_ + * params\_filename: parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) + * combined: Whether to save the parameters to a unified file. + + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of animal classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m resnet50_vd_animals + ``` + + - The servitization API is now deployed and the default port number is 8866. + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/resnet50_vd_animals" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V. Release Note + +- 1.0.0 + + First release + diff --git a/modules/image/classification/resnet50_vd_dishes/README_en.md b/modules/image/classification/resnet50_vd_dishes/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..2526c1a8c3c83f7d1ff589f8d3d475dcf421d0da --- /dev/null +++ b/modules/image/classification/resnet50_vd_dishes/README_en.md @@ -0,0 +1,139 @@ +# resnet50_vd_dishes + +|Module Name|resnet50_vd_dishes| +| :--- | :---: | +|Category|image classification| +|Network|ResNet50_vd| +|Dataset|Baidu Food Dataset| +|Fine-tuning supported or not|No| +|Module Size|158MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNet proposed a residual unit to solve the problem of training an extremely deep network, and improved the prediction accuracy of models. ResNet-vd is a variant of ResNet. This module is based on ResNet-vd and can classify 8416 kinds of food. + +

+
+

+ + - For more information, please refer to:[Bag of Tricks for Image Classification with Convolutional Neural Networks](https://arxiv.org/pdf/1812.01187.pdf) + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnet50_vd_dishes + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnet50_vd_dishes --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet50_vd_dishes") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m resnet50_vd_dishes + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/resnet50_vd_dishes" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnet50_vd_dishes==1.0.0 + ``` diff --git a/modules/image/classification/resnet50_vd_imagenet_ssld/README.md b/modules/image/classification/resnet50_vd_imagenet_ssld/README.md index 229e5d0c8400152d73f354f13dab546a3f8b749c..7563ae023257a077ef302d8992ee51307246e3c4 100644 --- a/modules/image/classification/resnet50_vd_imagenet_ssld/README.md +++ b/modules/image/classification/resnet50_vd_imagenet_ssld/README.md @@ -50,7 +50,7 @@ if __name__ == '__main__': model = hub.Module(name='resnet50_vd_imagenet_ssld') - result = model.predict(['flower.jpg']) + result = model.predict(['/PATH/TO/IMAGE']) ``` - ### 3.如何开始Fine-tune @@ -134,7 +134,7 @@ if __name__ == '__main__': model = hub.Module(name='resnet50_vd_imagenet_ssld', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) + result = model.predict(['/PATH/TO/IMAGE']) ``` diff --git a/modules/image/classification/resnet50_vd_imagenet_ssld/README_en.md b/modules/image/classification/resnet50_vd_imagenet_ssld/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..9cf41b043fd11c4e30181dfccafe04d39533d083 --- /dev/null +++ b/modules/image/classification/resnet50_vd_imagenet_ssld/README_en.md @@ -0,0 +1,198 @@ +# resnet50_vd_imagenet_ssld + +|Module Name|resnet50_vd_imagenet_ssld| +| :--- | :---: | +|Category |Image classification| +|Network|ResNet_vd| +|Dataset|ImageNet-2012| +|Fine-tuning supported or notFine-tuning|Yes| +|Module Size|148MB| +|Data indicators|-| +|Latest update date|2021-02-26| + + +## I. Basic Information + +- ### Module Introduction + + - ResNet-vd is a variant of ResNet, which can be used for image classification and feature extraction. + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install resnet50_vd_imagenet_ssld + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III. Module API Prediction + +- ### 1、Command line Prediction + + ```shell + $ hub run resnet50_vd_imagenet_ssld --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2、Prediction Code Example + + ```python + import paddle + import paddlehub as hub + + if __name__ == '__main__': + + model = hub.Module(name='resnet50_vd_imagenet_ssld') + result = model.predict(['/PATH/TO/IMAGE']) + ``` +- ### 3.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the user_guided_colorization model to fine-tune datasets such as [Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers) by excuting `python train.py`. + + - Steps: + + - Step1: Define the data preprocessing method + - ```python + import paddlehub.vision.transforms as T + + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` + + - `transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import Flowers + + flowers = Flowers(transforms) + + flowers_validate = Flowers(transforms, mode='val') + ``` + + * `transforms`: data preprocessing methods. + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + * `hub.datasets.Flowers()` will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + model = hub.Module(name="resnet50_vd_imagenet_ssld", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: model name. + * `label_list`: set the output classification category. Default is Imagenet2012 category. + + - Step4: Optimization strategy + + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` + + + - Run configuration + + - `Trainer` mainly control the training of Fine-tune, including the following controllable parameters: + + * `model`: Optimized model. + * `optimizer`: Optimizer selection. + * `use_vdl`: Whether to use vdl to visualize the training process. + * `checkpoint_dir`: The storage address of the model parameters. + * `compare_metrics`: The measurement index of the optimal model. + + - `trainer.train` mainly control the specific training process, including the following controllable parameters: + + * `train_dataset`: Training dataset. + * `epochs`: Epochs of training process. + * `batch_size`: Batch size. + * `num_workers`: Number of workers. + * `eval_dataset`: Validation dataset. + * `log_interval`:The interval for printing logs. + * `save_interval`: The interval for saving model parameters. + + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + - ```python + import paddle + import paddlehub as hub + + if __name__ == '__main__': + + model = hub.Module(name='resnet50_vd_imagenet_ssld', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['/PATH/TO/IMAGE']) + ``` + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m resnet50_vd_imagenet_ssld + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # Send an HTTP request + org_im = cv2.imread('/PATH/TO/IMAGE') + + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/resnet50_vd_imagenet_ssld" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## V. Release Note + +* 1.0.0 + + First release + +* 1.1.0 + + Upgrade to dynamic version diff --git a/modules/image/classification/resnet50_vd_wildanimals/README_en.md b/modules/image/classification/resnet50_vd_wildanimals/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..9a526d581511fd56250d9b4d4fe490349b367c2a --- /dev/null +++ b/modules/image/classification/resnet50_vd_wildanimals/README_en.md @@ -0,0 +1,134 @@ +# resnet50_vd_wildanimals + +|Module Name|resnet50_vd_wildanimals| +| :--- | :---: | +|Category|image classification| +|Network|ResNet_vd| +|Dataset|IFAW Wild Animal Dataset| +|Fine-tuning supported or not|No| +|Module Size|92MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNet proposed a residual unit to solve the problem of training an extremely deep network, and improved the prediction accuracy of models. ResNet-vd is a variant of ResNet. This module is based on ResNet_vd, trained on IFAW Wild Animal dataset, and can predict ten kinds of wild animal components. + + + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnet50_vd_wildanimals + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnet50_vd_wildanimals --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet50_vd_wildanimals") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - classification API. + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - top\_k (int): return the first k results + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of image classification. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m resnet50_vd_wildanimals + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/resnet50_vd_wildanimals" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + - ```shell + $ hub install resnet50_vd_wildanimals==1.0.0 + ``` diff --git a/modules/image/classification/resnet_v2_101_imagenet/README_en.md b/modules/image/classification/resnet_v2_101_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..2d39b5fa10c99180b39dcac8360da2f18a6ab6ad --- /dev/null +++ b/modules/image/classification/resnet_v2_101_imagenet/README_en.md @@ -0,0 +1,85 @@ +# resnet_v2_101_imagenet + +|Module Name|resnet_v2_101_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNet V2 101| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|173MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNet proposed a residual unit to solve the problem of training an extremely deep network, and improved the prediction accuracy of models. This module is based on ResNet101, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnet_v2_101_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnet_v2_101_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet_v2_101_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + Fix the problem of encoding in python2 + - ```shell + $ hub install resnet_v2_101_imagenet==1.0.1 + ``` diff --git a/modules/image/classification/resnet_v2_152_imagenet/README_en.md b/modules/image/classification/resnet_v2_152_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..1fc4640be72f744b2dad4bf8239af25556ab319d --- /dev/null +++ b/modules/image/classification/resnet_v2_152_imagenet/README_en.md @@ -0,0 +1,85 @@ +# resnet_v2_152_imagenet + +|Module Name|resnet_v2_152_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNet V2| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|234MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNet proposed a residual unit to solve the problem of training an extremely deep network, and improved the prediction accuracy of models. This module is based on ResNet152, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnet_v2_152_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnet_v2_152_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet_v2_152_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + Fix the problem of encoding in python2 + - ```shell + $ hub install resnet_v2_152_imagenet==1.0.1 + ``` diff --git a/modules/image/classification/resnet_v2_18_imagenet/README_en.md b/modules/image/classification/resnet_v2_18_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..233bc1ad7c61e8b08f7bf42737a6c9b23464c7bf --- /dev/null +++ b/modules/image/classification/resnet_v2_18_imagenet/README_en.md @@ -0,0 +1,82 @@ +# resnet_v2_18_imagenet + +|Module Name|resnet_v2_18_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNet V2| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|46MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + - ResNet proposed a residual unit to solve the problem of training an extremely deep network, and improved the prediction accuracy of models. This module is based on ResNet18, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnet_v2_18_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnet_v2_18_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet_v2_18_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnet_v2_18_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnet_v2_34_imagenet/README_en.md b/modules/image/classification/resnet_v2_34_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..c5cdbd51bb275fd089176be21ba7b1dabf18c4d2 --- /dev/null +++ b/modules/image/classification/resnet_v2_34_imagenet/README_en.md @@ -0,0 +1,83 @@ +# resnet_v2_34_imagenet + +|Module Name|resnet_v2_34_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNet V2| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|85MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNet proposed a residual unit to solve the problem of training an extremely deep network, and improved the prediction accuracy of models. This module is based on ResNet34, trained on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnet_v2_34_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnet_v2_34_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet_v2_34_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnet_v2_34_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnet_v2_50_imagenet/README_en.md b/modules/image/classification/resnet_v2_50_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..63fc42ed66a723115e5d97597b7032eedfc1ab87 --- /dev/null +++ b/modules/image/classification/resnet_v2_50_imagenet/README_en.md @@ -0,0 +1,88 @@ +# resnet_v2_50_imagenet + +|Module Name|resnet_v2_50_imagenet| +| :--- | :---: | +|Category |Image classification| +|Network|ResNet V2| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|99MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I. Basic Information + +- ### Application Effect Display + + - This module utilizes ResNet50 structure and it is trained on ImageNet-2012. + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnet_v2_50_imagenet + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnet_v2_50_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet_v2_50_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - Prediction API for classification. + + - **Parameter** + - data (dict): Key is 'image',value is the list of image path. + + - **Return** + - result (list[dict]): The list of classification results,key is the prediction label, value is the corresponding confidence. + + + + + +## IV. Release Note + +- 1.0.0 + + First release + +- 1.0.1 + + Fix encoding problem in python2 + + - ```shell + $ hub install resnet_v2_50_imagenet==1.0.1 + ``` diff --git a/modules/image/classification/resnext101_32x16d_wsl/README_en.md b/modules/image/classification/resnext101_32x16d_wsl/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..7317b4464320600e0312dc660ac74cca26699653 --- /dev/null +++ b/modules/image/classification/resnext101_32x16d_wsl/README_en.md @@ -0,0 +1,83 @@ +# resnext101_32x16d_wsl + +|Module Name|resnext101_32x16d_wsl| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt_wsl| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|744MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - The scale of dataset annotated by people is close to limit, researchers in Facebook adopt a new method of transfer learning to train the network. They use hashtag to annotate images, and trained on billions of social images, then transfer to weakly supervised learning. The top-1 accuracy of ResNeXt101_32x16d_wsl on ImageNet reaches 84.24%. This module is based on ResNeXt101_32x16d_wsl, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext101_32x16d_wsl + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext101_32x16d_wsl --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_32x16d_wsl") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnext101_32x16d_wsl==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_32x32d_wsl/README_en.md b/modules/image/classification/resnext101_32x32d_wsl/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..c9ded02a5db8cc834c68635edb567bf270ecfe84 --- /dev/null +++ b/modules/image/classification/resnext101_32x32d_wsl/README_en.md @@ -0,0 +1,83 @@ +# resnext101_32x32d_wsl + +|Module Name|resnext101_32x32d_wsl| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt_wsl| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|1.8GB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - The scale of dataset annotated by people is close to limit, researchers in Facebook adopt a new method of transfer learning to train the network. They use hashtag to annotate images, and trained on billions of social images, then transfer to weakly supervised learning. The top-1 accuracy of ResNeXt101_32x32d_wsl on ImageNet reaches 84.97%. This module is based on ResNeXt101_32x32d_wsl, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext101_32x32d_wsl + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext101_32x32d_wsl --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_32x32d_wsl") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnext101_32x32d_wsl==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_32x48d_wsl/README_en.md b/modules/image/classification/resnext101_32x48d_wsl/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..bf0541f2b8c9481bdbbceda4c162b90d7af329ec --- /dev/null +++ b/modules/image/classification/resnext101_32x48d_wsl/README_en.md @@ -0,0 +1,83 @@ +# resnext101_32x48d_wsl + +|Module Name|resnext101_32x48d_wsl| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt_wsl| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|342MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - The scale of dataset annotated by people is close to limit, researchers in Facebook adopt a new method of transfer learning to train the network. They use hashtag to annotate images, and trained on billions of social images, then transfer to weakly supervised learning. The top-1 accuracy of ResNeXt101_32x48d_wsl on ImageNet reaches 85.4%. This module is based on ResNeXt101_32x48d_wsl, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext101_32x48d_wsl + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext101_32x48d_wsl --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_32x48d_wsl") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnext101_32x48d_wsl==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_32x4d_imagenet/README_en.md b/modules/image/classification/resnext101_32x4d_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..811a6c726f1949410a6a816a0c03a4b9c4346bc5 --- /dev/null +++ b/modules/image/classification/resnext101_32x4d_imagenet/README_en.md @@ -0,0 +1,84 @@ +# resnext101_32x4d_imagenet + +|Module Name|resnext101_32x4d_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|172MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNeXt is proposed by UC San Diego and Facebook AI Research in 2017. This module is based on resnext101_32x4d, which denotes 101 layers ,32 branches,and the number of input and output branch channels is 4 in the network. It is weak-supervised trained on billions of socail images, finetuned on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext101_32x4d_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext101_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnext101_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_32x8d_wsl/README_en.md b/modules/image/classification/resnext101_32x8d_wsl/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..14494475ecaed061dba5cd639ef8994f02a9d28d --- /dev/null +++ b/modules/image/classification/resnext101_32x8d_wsl/README_en.md @@ -0,0 +1,83 @@ +# resnext101_32x8d_wsl + +|Module Name|resnext101_32x8d_wsl| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt_wsl| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|317MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - The scale of dataset annotated by people is close to limit, researchers in Facebook adopt a new method of transfer learning to train the network. They use hashtag to annotate images, and trained on billions of social images, then transfer to weakly supervised learning. The top-1 accuracy of ResNeXt101_32x8d_wsl on ImageNet reaches 82.55%. This module is based on ResNeXt101_32x8d_wsl, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext101_32x8d_wsl + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext101_32x8d_wsl --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_32x8d_wsl") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnext101_32x8d_wsl==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_64x4d_imagenet/README_en.md b/modules/image/classification/resnext101_64x4d_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..7c12f4336f440cf21215792808a11289b9cc3300 --- /dev/null +++ b/modules/image/classification/resnext101_64x4d_imagenet/README_en.md @@ -0,0 +1,83 @@ +# resnext101_64x4d_imagenet + +|Module Name|resnext101_64x4d_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|322MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNeXt is proposed by UC San Diego and Facebook AI Research in 2017. This module is based on resnext101_64x4d, which denotes 101 layers ,64 branches,and the number of input and output branch channels is 4 in the network. It is weak-supervised trained on billions of socail images, finetuned on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext101_64x4d_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext101_64x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_64x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnext101_64x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_vd_32x4d_imagenet/README_en.md b/modules/image/classification/resnext101_vd_32x4d_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..3c6bcbf70785bc86d3056866ae25c578c05f9c33 --- /dev/null +++ b/modules/image/classification/resnext101_vd_32x4d_imagenet/README_en.md @@ -0,0 +1,82 @@ +# resnext101_vd_32x4d_imagenet + +|Module Name|resnext101_vd_32x4d_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|172MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNeXt is proposed by UC San Diego and Facebook AI Research in 2017. This module is based on resnext101_vd_32x4d, which denotes 101 layers ,32 branches,and the number of input and output branch channels is 4 in the network. It is weak-supervised trained on billions of socail images, finetuned on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext101_vd_32x4d_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext101_vd_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_vd_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + - ```shell + $ hub install resnext101_vd_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_vd_64x4d_imagenet/README_en.md b/modules/image/classification/resnext101_vd_64x4d_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..4fa61f9c87968d7227b3c88511bd1696a5fc8574 --- /dev/null +++ b/modules/image/classification/resnext101_vd_64x4d_imagenet/README_en.md @@ -0,0 +1,82 @@ +# resnext101_vd_64x4d_imagenet + +|Module Name|resnext101_vd_64x4d_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt_vd| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|172MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNeXt is proposed by UC San Diego and Facebook AI Research in 2017. This module is based on resnext101_vd_64x4d_imagenet, which denotes 101 layers ,64 branches,and the number of input and output branch channels is 4 in the network. It is weak-supervised trained on billions of socail images, finetuned on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext101_vd_64x4d_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext101_vd_64x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_vd_64x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + - ```shell + $ hub install resnext101_vd_64x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext152_32x4d_imagenet/README_en.md b/modules/image/classification/resnext152_32x4d_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..90423f00f48734f9ec545e1fbc2bf87dee4d5462 --- /dev/null +++ b/modules/image/classification/resnext152_32x4d_imagenet/README_en.md @@ -0,0 +1,83 @@ +# resnext152_32x4d_imagenet + +|Module Name|resnext152_32x4d_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|233MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNeXt is proposed by UC San Diego and Facebook AI Research in 2017. This module is based on resnext152_32x4d_imagenet, which denotes 152 layers ,32 branches,and the number of input and output branch channels is 4 in the network. It is weak-supervised trained on billions of socail images, finetuned on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext152_32x4d_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext152_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext152_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnext152_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext152_64x4d_imagenet/README_en.md b/modules/image/classification/resnext152_64x4d_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..3805380bdaf39ad8a904042443cfe2426190366e --- /dev/null +++ b/modules/image/classification/resnext152_64x4d_imagenet/README_en.md @@ -0,0 +1,84 @@ +# resnext152_64x4d_imagenet + +|Module Name|resnext152_64x4d_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|444MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNeXt is proposed by UC San Diego and Facebook AI Research in 2017. This module is based on resnext152_64x4d_imagenet, which denotes 152 layers ,64 branches,and the number of input and output branch channels is 4 in the network. It is weak-supervised trained on billions of socail images, finetuned on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext152_64x4d_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext152_64x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext152_64x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnext152_64x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext152_vd_64x4d_imagenet/README_en.md b/modules/image/classification/resnext152_vd_64x4d_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..059333b582c4a87d84b100a87a7ddca50090eede --- /dev/null +++ b/modules/image/classification/resnext152_vd_64x4d_imagenet/README_en.md @@ -0,0 +1,84 @@ +# resnext152_vd_64x4d_imagenet + +|Module Name|resnext152_vd_64x4d_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt_vd| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|444MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNeXt is proposed by UC San Diego and Facebook AI Research in 2017. This module is based on resnext152_vd_64x4d_imagenet, which denotes 152 layers ,64 branches,and the number of input and output branch channels is 4 in the network. It is weak-supervised trained on billions of socail images, finetuned on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext152_vd_64x4d_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext152_vd_64x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext152_vd_64x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnext152_vd_64x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext50_32x4d_imagenet/README_en.md b/modules/image/classification/resnext50_32x4d_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..d696f4a455752e97740424798d79e56db5e37cf3 --- /dev/null +++ b/modules/image/classification/resnext50_32x4d_imagenet/README_en.md @@ -0,0 +1,82 @@ +# resnext50_32x4d_imagenet + +|Module Name|resnext50_32x4d_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|97MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + - ResNeXt is proposed by UC San Diego and Facebook AI Research in 2017. This module is based on resnext50_32x4d, which denotes 50 layers ,32 branches,and the number of input and output branch channels is 4 in the network. It is weak-supervised trained on billions of socail images, finetuned on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext50_32x4d_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext50_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext50_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnext50_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext50_64x4d_imagenet/README_en.md b/modules/image/classification/resnext50_64x4d_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..b9708b3f4461b340bc59d94d359566e2d1340237 --- /dev/null +++ b/modules/image/classification/resnext50_64x4d_imagenet/README_en.md @@ -0,0 +1,83 @@ +# resnext50_64x4d_imagenet + +|Module Name|resnext50_64x4d_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|174MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNeXt is proposed by UC San Diego and Facebook AI Research in 2017. This module is based on resnext50_64x4d_imagenet, which denotes 50 layers ,60 branches,and the number of input and output branch channels is 4 in the network. It is weak-supervised trained on billions of socail images, finetuned on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext50_64x4d_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext50_64x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext50_64x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnext50_64x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext50_vd_32x4d_imagenet/README_en.md b/modules/image/classification/resnext50_vd_32x4d_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..1d5aae6deafcb19a511d64665b8aed5a6ba85014 --- /dev/null +++ b/modules/image/classification/resnext50_vd_32x4d_imagenet/README_en.md @@ -0,0 +1,83 @@ +# resnext50_vd_32x4d_imagenet + +|Module Name|resnext50_vd_32x4d_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt_vd| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|98MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNeXt is proposed by UC San Diego and Facebook AI Research in 2017. This module is based on resnext50_vd_32x4d, which denotes 50 layers ,32 branches,and the number of input and output branch channels is 4 in the network. It is weak-supervised trained on billions of socail images, finetuned on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext50_vd_32x4d_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext50_vd_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext50_vd_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install resnext50_vd_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext50_vd_64x4d_imagenet/README_en.md b/modules/image/classification/resnext50_vd_64x4d_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..fe746d989e649f5a6af01e6a3976ba9f974973af --- /dev/null +++ b/modules/image/classification/resnext50_vd_64x4d_imagenet/README_en.md @@ -0,0 +1,82 @@ +# resnext50_vd_64x4d_imagenet + +|Module Name|resnext50_vd_64x4d_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ResNeXt_vd| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|175MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ResNeXt is proposed by UC San Diego and Facebook AI Research in 2017. This module is based on resnext50_vd_64x4d_imagenet, which denotes 50 layers ,64 branches,and the number of input and output branch channels is 4 in the network. It is weak-supervised trained on billions of socail images, finetuned on ImageNet-2012 dataset, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install resnext50_vd_64x4d_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run resnext50_vd_64x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext50_vd_64x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + - ```shell + $ hub install resnext50_vd_64x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/se_resnext101_32x4d_imagenet/README_en.md b/modules/image/classification/se_resnext101_32x4d_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..a3308b7fdfdfcca610dcf25e8aee63f12de8151e --- /dev/null +++ b/modules/image/classification/se_resnext101_32x4d_imagenet/README_en.md @@ -0,0 +1,83 @@ +# se_resnext101_32x4d_imagenet + +|Module Name|se_resnext101_32x4d_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|SE_ResNeXt| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|191MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - Squeeze-and-Excitation Network is proposed by Momenta in 2017. This model learns the weight to strengthen important channels of features and improves classification accuracy, which is the champion of ILSVR 2017. This module is based on se_resnext101_32x4d, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install se_resnext101_32x4d_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run se_resnext101_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="se_resnext101_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install se_resnext101_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/se_resnext50_32x4d_imagenet/README_en.md b/modules/image/classification/se_resnext50_32x4d_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..9bf94e300d87f87d4b6e7061b73c28ce42107e3a --- /dev/null +++ b/modules/image/classification/se_resnext50_32x4d_imagenet/README_en.md @@ -0,0 +1,83 @@ +# se_resnext50_32x4d_imagenet + +|Module Name|se_resnext50_32x4d_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|SE_ResNeXt| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|107MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - Squeeze-and-Excitation Network is proposed by Momenta in 2017. This model learns the weight to strengthen important channels of features and improves classification accuracy, which is the champion of ILSVR 2017. This module is based on SE_ResNeXt50_32x4d, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install se_resnext50_32x4d_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run se_resnext50_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="se_resnext50_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install se_resnext50_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/shufflenet_v2_imagenet/README_en.md b/modules/image/classification/shufflenet_v2_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..e2c6dfc1b54693ec5c7d1a25460b3bf35377d8bf --- /dev/null +++ b/modules/image/classification/shufflenet_v2_imagenet/README_en.md @@ -0,0 +1,83 @@ +# shufflenet_v2_imagenet + +|Module Name|shufflenet_v2_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|ShuffleNet V2| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|11MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - ShuffleNet V2 is a light-weight model proposed by MEGVII in 2018. This model proposed pointwise group convolution and channel shuffle to keep accurary and reduce the amount of computation. This module is based on ShuffleNet V2, trained on ImageNet-2012, and can predict an image of 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install shufflenet_v2_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run shufflenet_v2_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="shufflenet_v2_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install shufflenet_v2_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/spinalnet_res101_gemstone/README_en.md b/modules/image/classification/spinalnet_res101_gemstone/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..123f84b5c27a77b74da74a409c9a502c99a0cae3 --- /dev/null +++ b/modules/image/classification/spinalnet_res101_gemstone/README_en.md @@ -0,0 +1,80 @@ +# spinalnet_res101_gemstone + +|Module Name|spinalnet_res101_gemstone| +| :--- | :---: | +|Category|image classification| +|Network|resnet101| +|Dataset|gemstone| +|Fine-tuning supported or not|No| +|Module Size|246MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - This module is based on SpinalNet trained on gemstone dataset, and can be used to classify a gemstone. +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install spinalnet_res101_gemstone + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run spinalnet_res101_gemstone --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="spinalnet_res101_gemstone") + result = classifier.predict(['/PATH/TO/IMAGE']) + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - classification API. + - **Parameters** + - images(list[numpy.ndarray]): image data. + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + - ```shell + $ hub install spinalnet_res101_gemstone==1.0.0 + ``` diff --git a/modules/image/classification/spinalnet_res50_gemstone/README_en.md b/modules/image/classification/spinalnet_res50_gemstone/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..1af1f9f06761211ed1bd58e3b74709d3b61ea546 --- /dev/null +++ b/modules/image/classification/spinalnet_res50_gemstone/README_en.md @@ -0,0 +1,80 @@ +# spinalnet_res50_gemstone + +|Module Name|spinalnet_res50_gemstone| +| :--- | :---: | +|Category|image classification| +|Network|resnet50| +|Dataset|gemstone| +|Fine-tuning supported or not|No| +|Module Size|137MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - This module is based on SpinalNet trained on gemstone dataset, and can be used to classify a gemstone. +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install spinalnet_res50_gemstone + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run spinalnet_res50_gemstone --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="spinalnet_res50_gemstone") + result = classifier.predict(['/PATH/TO/IMAGE']) + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - classification API. + - **Parameters** + - images: list类型,待预测的图像. + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + - ```shell + $ hub install spinalnet_res50_gemstone==1.0.0 + ``` diff --git a/modules/image/classification/spinalnet_vgg16_gemstone/README.md b/modules/image/classification/spinalnet_vgg16_gemstone/README.md index 5ca6eacd550179c5cb0c838d0c2451eb3d61f02f..34de23e6766539360c28826fbe3f7e3528a15b55 100644 --- a/modules/image/classification/spinalnet_vgg16_gemstone/README.md +++ b/modules/image/classification/spinalnet_vgg16_gemstone/README.md @@ -18,6 +18,7 @@ - ### 模型介绍 - 使用PaddleHub的SpinalNet预训练模型进行宝石识别或finetune并完成宝石的预测任务。 + ## 二、安装 - ### 1、环境依赖 diff --git a/modules/image/classification/spinalnet_vgg16_gemstone/README_en.md b/modules/image/classification/spinalnet_vgg16_gemstone/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..a36834a516b7b913eb44121accab4705cb3413d0 --- /dev/null +++ b/modules/image/classification/spinalnet_vgg16_gemstone/README_en.md @@ -0,0 +1,81 @@ +# spinalnet_vgg16_gemstone + +|Module Name|spinalnet_vgg16_gemstone| +| :--- | :---: | +|Category|image classification| +|Network|vgg16| +|Dataset|gemstone| +|Fine-tuning supported or not|No| +|Module Size|1.5GB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - This module is based on SpinalNet trained on gemstone dataset, and can be used to classify a gemstone. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install spinalnet_vgg16_gemstone + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run spinalnet_vgg16_gemstone --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="spinalnet_vgg16_gemstone") + result = classifier.predict(['/PATH/TO/IMAGE']) + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - classification API. + - **Parameters** + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + - ```shell + $ hub install spinalnet_vgg16_gemstone==1.0.0 + ``` diff --git a/modules/image/classification/vgg11_imagenet/README_en.md b/modules/image/classification/vgg11_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..40e8c02941e2059e7e59355887b64c26e6377daf --- /dev/null +++ b/modules/image/classification/vgg11_imagenet/README_en.md @@ -0,0 +1,83 @@ +# vgg11_imagenet + +|Module Name|vgg11_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|VGG| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|507MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - VGG is a serial of models for image classification proposed by university of Oxford and DeepMind. The serial models demonstrate 'the deeper the network is, the better the performance is'. And VGG is used for feature extraction as the backbone by most image classification tasks. This module is based on VGG11, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install vgg11_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run vgg11_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="vgg11_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install vgg11_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/vgg13_imagenet/README_en.md b/modules/image/classification/vgg13_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..25ef73852cde16f854eab48ffe3e5081a5d414f9 --- /dev/null +++ b/modules/image/classification/vgg13_imagenet/README_en.md @@ -0,0 +1,83 @@ +# vgg13_imagenet + +|Module Name|vgg13_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|VGG| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|508MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - VGG is a serial of models for image classification proposed by university of Oxford and DeepMind. The serial models demonstrate 'the deeper the network is, the better the performance is'. And VGG is used for feature extraction as the backbone by most image classification tasks. This module is based on VGG13, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install vgg13_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run vgg13_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="vgg13_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install vgg13_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/vgg16_imagenet/README_en.md b/modules/image/classification/vgg16_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..01cf0d8a2f13e24461168abf94cec79d321fbcb6 --- /dev/null +++ b/modules/image/classification/vgg16_imagenet/README_en.md @@ -0,0 +1,83 @@ +# vgg16_imagenet + +|Module Name|vgg16_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|VGG| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|528MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - VGG is a serial of models for image classification proposed by university of Oxford and DeepMind. The serial models demonstrate 'the deeper the network is, the better the performance is'. And VGG is used for feature extraction as the backbone by most image classification tasks. This module is based on VGG16, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install vgg16_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run vgg16_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="vgg16_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install vgg16_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/vgg19_imagenet/README_en.md b/modules/image/classification/vgg19_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..894ba47a8658ae7370b9641a342a75065c53eec8 --- /dev/null +++ b/modules/image/classification/vgg19_imagenet/README_en.md @@ -0,0 +1,82 @@ +# vgg19_imagenet + +|Module Name|vgg19_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|vgg19_imagenet| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|549MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + - VGG is a serial of models for image classification proposed by university of Oxford and DeepMind. The serial models demonstrate 'the deeper the network is, the better the performance is'. And VGG is used for feature extraction as the backbone by most image classification tasks. This module is based on VGG19, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install vgg19_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run vgg19_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="vgg19_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install vgg19_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/xception41_imagenet/README_en.md b/modules/image/classification/xception41_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..9835e3db48d476695a6b7a6d7bc41316c298471e --- /dev/null +++ b/modules/image/classification/xception41_imagenet/README_en.md @@ -0,0 +1,83 @@ +# xception41_imagenet + +|Module Name|xception41_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|Xception| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - Xception is a model proposed by Google in 2016, which is an improvement on Inception V3. This module is based on Xception41, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install xception41_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run xception41_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="xception41_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install xception41_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/xception65_imagenet/README_en.md b/modules/image/classification/xception65_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..db81a7b5d3cac0ff16c235b1e9f33916f4456cc9 --- /dev/null +++ b/modules/image/classification/xception65_imagenet/README_en.md @@ -0,0 +1,83 @@ +# xception65_imagenet + +|Module Name|xception65_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|Xception| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|140MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - Xception is a model proposed by Google in 2016, which is an improvement on Inception V3. This module is based on Xception65, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install xception65_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run xception65_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="xception65_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install xception65_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/xception71_imagenet/README_en.md b/modules/image/classification/xception71_imagenet/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..118ddc704b15f64defad8e15d08d37df3a3a8edd --- /dev/null +++ b/modules/image/classification/xception71_imagenet/README_en.md @@ -0,0 +1,83 @@ +# xception71_imagenet + +|Module Name|xception71_imagenet| +| :--- | :---: | +|Category|image classification| +|Network|Xception| +|Dataset|ImageNet-2012| +|Fine-tuning supported or not|No| +|Module Size|147MB| +|Latest update date|-| +|Data indicators|-| + + +## I.Basic Information + + + +- ### Module Introduction + + - Xception is a model proposed by Google in 2016, which is an improvement on Inception V3. This module is based on Xception71, trained on ImageNet-2012, and can predict an image of size 224*224*3. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + +- ### 2、Installation + + - ```shell + $ hub install xception71_imagenet + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run xception71_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="xception71_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - classification API. + - **Parameters** + - data (dict): key is "image", value is a list of image paths + + - **Return** + - result(list[dict]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install xception71_imagenet==1.0.0 + ``` diff --git a/modules/image/depth_estimation/MiDaS_Large/README.md b/modules/image/depth_estimation/MiDaS_Large/README.md index ce6d1028e2a82e05a53377a6d115a7425fae84c1..73e22792ebef4eaec9ad387cb223b9f1b241f826 100644 --- a/modules/image/depth_estimation/MiDaS_Large/README.md +++ b/modules/image/depth_estimation/MiDaS_Large/README.md @@ -1,92 +1,92 @@ -# MiDaS_Large - -|模型名称|MiDaS_Large| -| :--- | :---: | -|类别|图像 - 深度估计| -|网络|-| -|数据集|3D Movies, WSVD, ReDWeb, MegaDepth| -|是否支持Fine-tuning|否| -|模型大小|399MB| -|最新更新日期|2021-02-26| -|数据指标|-| - - -## 一、模型基本信息 - -- ### 应用效果展示 - - 样例结果示例: -

-
-

- - -- ### 模型介绍 - - - MiDaS_Large是一个单目深度估计模型,模型可通过输入图像估计其中的深度信息。 - - -## 二、安装 - -- ### 1、环境依赖 - - - paddlepaddle >= 2.0.0 - - - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) - -- ### 2、安装 - - - ```shell - $ hub install MiDaS_Large - ``` - - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) - | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) - -## 三、模型API预测 - -- ### 1、代码示例 - - - ```python - import paddlehub as hub - import cv2 - - model = hub.Module(name="MiDaS_Large") - result = model.depth_estimation(images=[cv2.imread('/PATH/TO/IMAGE')]) - # or - # result = model.depth_estimation(paths=['/PATH/TO/IMAGE']) - ``` - -- ### 2、API - - - ```python - def depth_estimation(images=None, - paths=None, - batch_size=1, - output_dir='output', - visualization=False): - ``` - - - 深度估计API。 - - - **参数** - - - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
- - paths (list\[str\]): 图片的路径;
- - batch_size (int) : batch 的大小;
- - output\_dir (str): 图片的保存路径,默认设为 output;
- - visualization (bool) : 是否将结果保存为图片文件。 - - **NOTE:** paths和images两个参数选择其一进行提供数据 - - - **返回** - - res (list\[numpy.ndarray\]): 图像深度数据,ndarray.shape 为 \[H, W\] - - -## 四、更新历史 - -* 1.0.0 - - 初始发布 - - - ```shell - $ hub install MiDaS_Large==1.0.0 +# MiDaS_Large + +|模型名称|MiDaS_Large| +| :--- | :---: | +|类别|图像 - 深度估计| +|网络|-| +|数据集|3D Movies, WSVD, ReDWeb, MegaDepth| +|是否支持Fine-tuning|否| +|模型大小|399MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+
+

+ + +- ### 模型介绍 + + - MiDaS_Large是一个单目深度估计模型,模型可通过输入图像估计其中的深度信息。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install MiDaS_Large + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="MiDaS_Large") + result = model.depth_estimation(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.depth_estimation(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def depth_estimation(images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False): + ``` + + - 深度估计API。 + + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
+ - paths (list\[str\]): 图片的路径;
+ - batch_size (int) : batch 的大小;
+ - output\_dir (str): 图片的保存路径,默认设为 output;
+ - visualization (bool) : 是否将结果保存为图片文件。 + + **NOTE:** paths和images两个参数选择其一进行提供数据 + + - **返回** + - res (list\[numpy.ndarray\]): 图像深度数据,ndarray.shape 为 \[H, W\] + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install MiDaS_Large==1.0.0 ``` diff --git a/modules/image/depth_estimation/MiDaS_Large/README_en.md b/modules/image/depth_estimation/MiDaS_Large/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..5aa893fcb8765059a287ce5c4538e092343f658a --- /dev/null +++ b/modules/image/depth_estimation/MiDaS_Large/README_en.md @@ -0,0 +1,91 @@ +# MiDaS_Large + +|Module Name|MiDaS_Large| +| :--- | :---: | +|Category|depth estimation| +|Network|-| +|Dataset|3D Movies, WSVD, ReDWeb, MegaDepth| +|Fine-tuning supported or not|No| +|Module Size|399MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+
+

+ + +- ### Module Introduction + + - MiDas_Large module is used for monocular depth estimation. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install MiDaS_Large + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="MiDaS_Large") + result = model.depth_estimation(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.depth_estimation(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def depth_estimation(images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False): + ``` + + - depth estimation API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + - res (list\[numpy.ndarray\]): depth data,ndarray.shape is \[H, W\] + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install MiDaS_Large==1.0.0 + ``` diff --git a/modules/image/face_detection/pyramidbox_face_detection/README.md b/modules/image/face_detection/pyramidbox_face_detection/README.md index ded72af898a2e0167e54986f0ee700b8b0d378c2..d7c26e9b27b317a23879ed35eb0593e9844317f9 100644 --- a/modules/image/face_detection/pyramidbox_face_detection/README.md +++ b/modules/image/face_detection/pyramidbox_face_detection/README.md @@ -51,7 +51,7 @@ ``` - 通过命令行方式实现人脸检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/face_detection/pyramidbox_face_detection/README_en.md b/modules/image/face_detection/pyramidbox_face_detection/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..5f12c1def5e9d756f229cca2f8dc327aabb27fdc --- /dev/null +++ b/modules/image/face_detection/pyramidbox_face_detection/README_en.md @@ -0,0 +1,169 @@ +# pyramidbox_face_detection + +|Module Name|pyramidbox_face_detection| +| :--- | :---: | +|Category|face detection| +|Network|PyramidBox| +|Dataset|WIDER FACEDataset| +|Fine-tuning supported or not|No| +|Module Size|220MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ + +- ### Module Introduction + + - PyramidBox is a one-stage face detector based on SSD. It can redict results across six scale levels of feature maps. This module is based on PyramidBox, trained on WIDER FACE Dataset, and supports face detection. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install pyramidbox_face_detection + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run pyramidbox_face_detection --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + face_detector = hub.Module(name="pyramidbox_face_detection") + result = face_detector.face_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = face_detector.face_detection(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def face_detection(images=None, + paths=None, + use_gpu=False, + output_dir='detection_result', + visualization=False, + score_thresh=0.15) + ``` + + - Detect all faces in image + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + - score_thresh (float): the confidence threshold + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - path (str): path for input image + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of face detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m pyramidbox_face_detection + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/pyramidbox_face_detection" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.1.0 + + Fix the problem of reading numpy + - ```shell + $ hub install pyramidbox_face_detection==1.1.0 + ``` diff --git a/modules/image/face_detection/pyramidbox_lite_mobile/README.md b/modules/image/face_detection/pyramidbox_lite_mobile/README.md index d4d9b218272d32c8cdb53546c305ec1015bd3dfe..7a1f984f4bf97268c2427c02cff45fb000cfc486 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile/README.md +++ b/modules/image/face_detection/pyramidbox_lite_mobile/README.md @@ -50,7 +50,7 @@ ``` - 通过命令行方式实现人脸检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/face_detection/pyramidbox_lite_mobile/README_en.md b/modules/image/face_detection/pyramidbox_lite_mobile/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..088fd4725d3babbab8716f38015eb65a898df7cb --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_mobile/README_en.md @@ -0,0 +1,169 @@ +# pyramidbox_lite_mobile + +|Module Name|pyramidbox_lite_mobile| +| :--- | :---: | +|Category|face detection| +|Network|PyramidBox| +|Dataset|WIDER FACEDataset + Baidu Face Dataset| +|Fine-tuning supported or not|No| +|Module Size|7.3MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - PyramidBox-Lite is a light-weight model based on PyramidBox proposed by Baidu in ECCV 2018. This model has solid robustness against interferences such as light and scale variation. This module is optimized for mobile device, based on PyramidBox, trained on WIDER FACE Dataset and Baidu Face Dataset, and can be used for face detection. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install pyramidbox_lite_mobile + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run pyramidbox_lite_mobile --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + face_detector = hub.Module(name="pyramidbox_lite_mobile") + result = face_detector.face_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = face_detector.face_detection(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def face_detection(images=None, + paths=None, + use_gpu=False, + output_dir='detection_result', + visualization=False, + shrink=0.5, + confs_threshold=0.6) + ``` + + - Detect all faces in image + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + - shrink (float): the scale to resize image + - confs\_threshold (float): the confidence threshold + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - path (str): path for input image + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of face detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m pyramidbox_lite_mobile + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/pyramidbox_lite_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.2.0 + + - ```shell + $ hub install pyramidbox_lite_mobile==1.2.0 + ``` diff --git a/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md b/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md index 10e431c32f9216c59f7c090306d4f5a2136871f2..434d01b1960abdd2940fdd4f5febe480012586e4 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md +++ b/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md @@ -50,7 +50,7 @@ ``` - 通过命令行方式实现人脸检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/face_detection/pyramidbox_lite_mobile_mask/README_en.md b/modules/image/face_detection/pyramidbox_lite_mobile_mask/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..56661fd6f1eb02024cd1197a3ab4e0577b525e29 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_mobile_mask/README_en.md @@ -0,0 +1,190 @@ +# pyramidbox_lite_mobile_mask + +|Module Name|pyramidbox_lite_mobile_mask| +| :--- | :---: | +|Category|face detection| +|Network|PyramidBox| +|Dataset|WIDER FACEDataset + Baidu Face Dataset| +|Fine-tuning supported or not|No| +|Module Size|1.2MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - PyramidBox-Lite is a light-weight model based on PyramidBox proposed by Baidu in ECCV 2018. This model has solid robustness against interferences such as light and scale variation. This module is optimized for mobile device, based on PyramidBox, trained on WIDER FACE Dataset and Baidu Face Dataset, and can be used for mask detection. + + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install pyramidbox_lite_mobile_mask + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run pyramidbox_lite_mobile_mask --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + mask_detector = hub.Module(name="pyramidbox_lite_mobile_mask") + result = mask_detector.face_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = mask_detector.face_detection(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def face_detection(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='detection_result', + use_multi_scale=False, + shrink=0.5, + confs_threshold=0.6) + ``` + + - Detect all faces in image, and judge the existence of mask. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - visualization (bool): Whether to save the results as picture files; + - output_dir (str): save path of images; + - use\_multi\_scale (bool) : whether to detect across multiple scales; + - shrink (float): the scale to resize image + - confs\_threshold (float): the confidence threshold + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - path (str): path for input image + - data (list): detection results, each element in the list is dict + - label (str): 'NO MASK' or 'MASK'; + - confidence (float): the confidence of the result + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of face detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m pyramidbox_lite_mobile_mask + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/pyramidbox_lite_mobile_mask" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` +## V.Paddle Lite Deployment +- ### Save model demo + - ```python + import paddlehub as hub + pyramidbox_lite_mobile_mask = hub.Module(name="pyramidbox_lite_mobile_mask") + + # save model in directory named test_program + pyramidbox_lite_mobile_mask.save_inference_model(dirname="test_program") + ``` + +- ### transform model + + - The model downloaded from paddlehub is a prediction model. If we want to deploy it in mobile device, we can use OPT tool provided by PaddleLite to transform the model. For more information, please refer to [OPT tool](https://paddle-lite.readthedocs.io/zh/latest/user_guides/model_optimize_tool.html)) + +- ### Deploy the model with Paddle Lite + - Please refer to[Paddle-Lite mask detection model deployment demo](https://github.com/PaddlePaddle/Paddle-Lite/tree/develop/lite/demo/cxx) + +## V.Release Note + +* 1.0.0 + + First release + +* 1.3.0 + - ```shell + $ hub install pyramidbox_lite_mobile_mask==1.3.0 + ``` diff --git a/modules/image/face_detection/pyramidbox_lite_server/README.md b/modules/image/face_detection/pyramidbox_lite_server/README.md index db16223634f549da69e4d09b1ae9b6e7d4311498..b52fb6a74d0d51619c99181d154bad2038cbbc9a 100644 --- a/modules/image/face_detection/pyramidbox_lite_server/README.md +++ b/modules/image/face_detection/pyramidbox_lite_server/README.md @@ -50,7 +50,7 @@ ``` - 通过命令行方式实现人脸检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/face_detection/pyramidbox_lite_server/README_en.md b/modules/image/face_detection/pyramidbox_lite_server/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..92818bf93f9f2ba964205fa198093475b871efae --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_server/README_en.md @@ -0,0 +1,171 @@ +# pyramidbox_lite_server + +|Module Name|pyramidbox_lite_server| +| :--- | :---: | +|Category|face detection| +|Network|PyramidBox| +|Dataset|WIDER FACEDataset + Baidu Face Dataset| +|Fine-tuning supported or not|No| +|Module Size|8MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - PyramidBox-Lite is a light-weight model based on PyramidBox proposed by Baidu in ECCV 2018. This model has solid robustness against interferences such as light and scale variation. This module is based on PyramidBox, trained on WIDER FACE Dataset and Baidu Face Dataset, and can be used for face detection. + + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install pyramidbox_lite_server + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run pyramidbox_lite_server --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + face_detector = hub.Module(name="pyramidbox_lite_server") + result = face_detector.face_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = face_detector.face_detection(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def face_detection(images=None, + paths=None, + use_gpu=False, + output_dir='detection_result', + visualization=False, + shrink=0.5, + confs_threshold=0.6) + ``` + + - Detect all faces in image + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + - shrink (float): the scale to resize image + - confs\_threshold (float): the confidence threshold + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - path (str): path for input image + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of face detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m pyramidbox_lite_server + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/pyramidbox_lite_server" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.2.0 + + Fix the problem of reading numpy + - ```shell + $ hub install pyramidbox_lite_server==1.2.0 + ``` diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/README.md b/modules/image/face_detection/pyramidbox_lite_server_mask/README.md index d1e6a1769c008aa6cf2d4ed97a9637a90af47281..1c49c750b09ff29afedeaa921b85c04690a9ae0b 100644 --- a/modules/image/face_detection/pyramidbox_lite_server_mask/README.md +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/README.md @@ -50,7 +50,7 @@ ``` - 通过命令行方式实现人脸检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/README_en.md b/modules/image/face_detection/pyramidbox_lite_server_mask/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..7efc0d92286eec292aecce3197f09cde3c3754e7 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/README_en.md @@ -0,0 +1,191 @@ +# pyramidbox_lite_server_mask + +|Module Name|pyramidbox_lite_server_mask| +| :--- | :---: | +|Category|face detection| +|Network|PyramidBox| +|Dataset|WIDER FACEDataset + Baidu Face Dataset| +|Fine-tuning supported or not|No| +|Module Size|1.2MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - PyramidBox-Lite is a light-weight model based on PyramidBox proposed by Baidu in ECCV 2018. This model has solid robustness against interferences such as light and scale variation. This module is based on PyramidBox, trained on WIDER FACE Dataset and Baidu Face Dataset, and can be used for mask detection. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install pyramidbox_lite_server_mask + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run pyramidbox_lite_server_mask --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + mask_detector = hub.Module(name="pyramidbox_lite_server_mask") + result = mask_detector.face_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = mask_detector.face_detection(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def face_detection(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='detection_result', + use_multi_scale=False, + shrink=0.5, + confs_threshold=0.6) + ``` + + - Detect all faces in image, and judge the existence of mask. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - visualization (bool): Whether to save the results as picture files; + - output_dir (str): save path of images; + - use\_multi\_scale (bool) : whether to detect across multiple scales; + - shrink (float): the scale to resize image + - confs\_threshold (float): the confidence threshold + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - path (str): path for input image + - data (list): detection results, each element in the list is dict + - label (str): 'NO MASK' or 'MASK'; + - confidence (float): the confidence of the result + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + + + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of face detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m pyramidbox_lite_server_mask + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/pyramidbox_lite_server_mask" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` +## V.Paddle Lite Deployment +- ### Save model demo + - ```python + import paddlehub as hub + pyramidbox_lite_server_mask = hub.Module(name="pyramidbox_lite_server_mask") + + # save model in directory named test_program + pyramidbox_lite_server_mask.save_inference_model(dirname="test_program") + ``` + + +- ### transform model + + - The model downloaded from paddlehub is a prediction model. If we want to deploy it in mobile device, we can use OPT tool provided by PaddleLite to transform the model. For more information, please refer to [OPT tool](https://paddle-lite.readthedocs.io/zh/latest/user_guides/model_optimize_tool.html)) + +- ### Deploy the model with Paddle Lite + - Please refer to[Paddle-Lite mask detection model deployment demo](https://github.com/PaddlePaddle/Paddle-Lite/tree/develop/lite/demo/cxx) + +## V.Release Note + +* 1.0.0 + + First release + +* 1.3.1 + - ```shell + $ hub install pyramidbox_lite_server_mask==1.3.1 + ``` diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md index 6931f9d4e8fb0a41bdaf4933d0b539bbbfb71062..ea00f987851f19c6f5ff5356df20e3796abe3e87 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md @@ -50,7 +50,7 @@ ``` - 通过命令行方式实现人脸检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README_en.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..31d0758cbe5850d260e79993a78b5e88b5321321 --- /dev/null +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README_en.md @@ -0,0 +1,169 @@ +# ultra_light_fast_generic_face_detector_1mb_320 + +|Module Name|ultra_light_fast_generic_face_detector_1mb_320| +| :--- | :---: | +|Category|face detection| +|Network|Ultra-Light-Fast-Generic-Face-Detector-1MB| +|Dataset|WIDER FACEDataset| +|Fine-tuning supported or not|No| +|Module Size|2.6MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - Ultra-Light-Fast-Generic-Face-Detector-1MB is an extreme light-weight model for real-time face detection in low computation power devices. This module is based on Ultra-Light-Fast-Generic-Face-Detector-1MB, trained on WIDER FACEDataset, and can be used for face detection. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install ultra_light_fast_generic_face_detector_1mb_320 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run ultra_light_fast_generic_face_detector_1mb_320 --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + face_detector = hub.Module(name="ultra_light_fast_generic_face_detector_1mb_320") + result = face_detector.face_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = face_detector.face_detection(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def face_detection(images=None, + paths=None, + batch\_size=1, + use_gpu=False, + output_dir='face_detector_640_predict_output', + visualization=False, + confs_threshold=0.5) + ``` + + - Detect all faces in image + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + - confs\_threshold (float): the confidence threshold + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - path (str): path for input image + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str): path for saving output image + + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of face detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m ultra_light_fast_generic_face_detector_1mb_320 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ultra_light_fast_generic_face_detector_1mb_320" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.1.2 + - ```shell + $ hub install ultra_light_fast_generic_face_detector_1mb_320==1.1.2 + ``` diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md index 2b1da1cc58c7f42461d9536f18c061224e278cc2..afa5ad3e3e2b4bc30c4a15f7310779ec08f202c8 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md @@ -50,7 +50,7 @@ ``` - 通过命令行方式实现人脸检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README_en.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..6eeed2c2c16313bb9d14e71583d6f3c2d084a0a3 --- /dev/null +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README_en.md @@ -0,0 +1,169 @@ +# ultra_light_fast_generic_face_detector_1mb_640 + +|Module Name|ultra_light_fast_generic_face_detector_1mb_640| +| :--- | :---: | +|Category|face detection| +|Network|Ultra-Light-Fast-Generic-Face-Detector-1MB| +|Dataset|WIDER FACEDataset| +|Fine-tuning supported or not|No| +|Module Size|2.9MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - Ultra-Light-Fast-Generic-Face-Detector-1MB is an extreme light-weight model for real-time face detection in low computation power devices. This module is based on Ultra-Light-Fast-Generic-Face-Detector-1MB, trained on WIDER FACEDataset, and can be used for face detection. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install ultra_light_fast_generic_face_detector_1mb_640 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run ultra_light_fast_generic_face_detector_1mb_640 --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + face_detector = hub.Module(name="ultra_light_fast_generic_face_detector_1mb_640") + result = face_detector.face_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = face_detector.face_detection(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def face_detection(images=None, + paths=None, + batch\_size=1, + use_gpu=False, + output_dir='face_detector_640_predict_output', + visualization=False, + confs_threshold=0.5) + ``` + + - Detect all faces in image + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + - confs\_threshold (float): the confidence threshold + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - path (str): path for input image + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str): path for saving output image + + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of face detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m ultra_light_fast_generic_face_detector_1mb_640 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ultra_light_fast_generic_face_detector_1mb_640" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.1.2 + - ```shell + $ hub install ultra_light_fast_generic_face_detector_1mb_640==1.1.2 + ``` diff --git a/modules/image/image_processing/enlightengan/README.md b/modules/image/image_processing/enlightengan/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ef46cb940029b07ca2a2d5594c831815962b1be9 --- /dev/null +++ b/modules/image/image_processing/enlightengan/README.md @@ -0,0 +1,137 @@ +# enlightengan + +|模型名称|enlightengan| +| :--- | :---: | +|类别|图像 - 暗光增强| +|网络|EnlightenGAN| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|83MB| +|最新更新日期|2021-11-04| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入图像 +
+ +
+ 输出图像 +
+

+ +- ### 模型介绍 + + - EnlightenGAN使用非成对的数据进行训练,通过设计自特征保留损失函数和自约束注意力机制,训练的网络可以应用到多种场景下的暗光增强中。 + + - 更多详情参考:[EnlightenGAN: Deep Light Enhancement without Paired Supervision](https://arxiv.org/abs/1906.06972) + + + +## 二、安装 + +- ### 1、环境依赖 + - onnxruntime + - x2paddle + - pillow + +- ### 2、安装 + + - ```shell + $ hub install enlightengan + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a file + $ hub run enlightengan --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现暗光增强模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + enlightener = hub.Module(name="enlightengan") + input_path = ["/PATH/TO/IMAGE"] + # Read from a file + enlightener.enlightening(paths=input_path, output_dir='./enlightening_result/', use_gpu=True) + ``` + +- ### 3、API + + - ```python + def enlightening(images=None, paths=None, output_dir='./enlightening_result/', use_gpu=False, visualization=True) + ``` + - 暗光增强API。 + + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
+ - paths (list\[str\]): 图片的路径;
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像风格转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m enlightengan + ``` + + - 这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/enlightengan" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install enlightengan==1.0.0 + ``` diff --git a/modules/image/image_processing/enlightengan/enlighten_inference/pd_model/x2paddle_code.py b/modules/image/image_processing/enlightengan/enlighten_inference/pd_model/x2paddle_code.py new file mode 100755 index 0000000000000000000000000000000000000000..d211efac274f7d6be42ee8a765726526d9e51888 --- /dev/null +++ b/modules/image/image_processing/enlightengan/enlighten_inference/pd_model/x2paddle_code.py @@ -0,0 +1,201 @@ +import paddle +import math + + +class ONNXModel(paddle.nn.Layer): + def __init__(self): + super(ONNXModel, self).__init__() + self.conv0 = paddle.nn.Conv2D(in_channels=3, out_channels=3, kernel_size=[1, 1], groups=3) + self.pool0 = paddle.nn.MaxPool2D(kernel_size=[2, 2], stride=2) + self.pool1 = paddle.nn.MaxPool2D(kernel_size=[2, 2], stride=2) + self.conv1 = paddle.nn.Conv2D(in_channels=4, out_channels=32, kernel_size=[3, 3], padding=1) + self.pool2 = paddle.nn.MaxPool2D(kernel_size=[2, 2], stride=2) + self.leakyrelu0 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.pool3 = paddle.nn.MaxPool2D(kernel_size=[2, 2], stride=2) + self.batchnorm0 = paddle.nn.BatchNorm( + num_channels=32, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.conv2 = paddle.nn.Conv2D(in_channels=32, out_channels=32, kernel_size=[3, 3], padding=1) + self.leakyrelu1 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm1 = paddle.nn.BatchNorm( + num_channels=32, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.pool4 = paddle.nn.MaxPool2D(kernel_size=[2, 2], stride=2) + self.conv3 = paddle.nn.Conv2D(in_channels=32, out_channels=64, kernel_size=[3, 3], padding=1) + self.leakyrelu2 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm2 = paddle.nn.BatchNorm( + num_channels=64, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.conv4 = paddle.nn.Conv2D(in_channels=64, out_channels=64, kernel_size=[3, 3], padding=1) + self.leakyrelu3 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm3 = paddle.nn.BatchNorm( + num_channels=64, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.pool5 = paddle.nn.MaxPool2D(kernel_size=[2, 2], stride=2) + self.conv5 = paddle.nn.Conv2D(in_channels=64, out_channels=128, kernel_size=[3, 3], padding=1) + self.leakyrelu4 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm4 = paddle.nn.BatchNorm( + num_channels=128, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.conv6 = paddle.nn.Conv2D(in_channels=128, out_channels=128, kernel_size=[3, 3], padding=1) + self.leakyrelu5 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm5 = paddle.nn.BatchNorm( + num_channels=128, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.pool6 = paddle.nn.MaxPool2D(kernel_size=[2, 2], stride=2) + self.conv7 = paddle.nn.Conv2D(in_channels=128, out_channels=256, kernel_size=[3, 3], padding=1) + self.leakyrelu6 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm6 = paddle.nn.BatchNorm( + num_channels=256, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.conv8 = paddle.nn.Conv2D(in_channels=256, out_channels=256, kernel_size=[3, 3], padding=1) + self.leakyrelu7 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm7 = paddle.nn.BatchNorm( + num_channels=256, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.pool7 = paddle.nn.MaxPool2D(kernel_size=[2, 2], stride=2) + self.conv9 = paddle.nn.Conv2D(in_channels=256, out_channels=512, kernel_size=[3, 3], padding=1) + self.leakyrelu8 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm8 = paddle.nn.BatchNorm( + num_channels=512, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.conv10 = paddle.nn.Conv2D(in_channels=512, out_channels=512, kernel_size=[3, 3], padding=1) + self.leakyrelu9 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm9 = paddle.nn.BatchNorm( + num_channels=512, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.conv11 = paddle.nn.Conv2D(in_channels=512, out_channels=256, kernel_size=[3, 3], padding=1) + self.conv12 = paddle.nn.Conv2D(in_channels=512, out_channels=256, kernel_size=[3, 3], padding=1) + self.leakyrelu10 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm10 = paddle.nn.BatchNorm( + num_channels=256, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.conv13 = paddle.nn.Conv2D(in_channels=256, out_channels=256, kernel_size=[3, 3], padding=1) + self.leakyrelu11 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm11 = paddle.nn.BatchNorm( + num_channels=256, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.conv14 = paddle.nn.Conv2D(in_channels=256, out_channels=128, kernel_size=[3, 3], padding=1) + self.conv15 = paddle.nn.Conv2D(in_channels=256, out_channels=128, kernel_size=[3, 3], padding=1) + self.leakyrelu12 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm12 = paddle.nn.BatchNorm( + num_channels=128, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.conv16 = paddle.nn.Conv2D(in_channels=128, out_channels=128, kernel_size=[3, 3], padding=1) + self.leakyrelu13 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm13 = paddle.nn.BatchNorm( + num_channels=128, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.conv17 = paddle.nn.Conv2D(in_channels=128, out_channels=64, kernel_size=[3, 3], padding=1) + self.conv18 = paddle.nn.Conv2D(in_channels=128, out_channels=64, kernel_size=[3, 3], padding=1) + self.leakyrelu14 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm14 = paddle.nn.BatchNorm( + num_channels=64, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.conv19 = paddle.nn.Conv2D(in_channels=64, out_channels=64, kernel_size=[3, 3], padding=1) + self.leakyrelu15 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm15 = paddle.nn.BatchNorm( + num_channels=64, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.conv20 = paddle.nn.Conv2D(in_channels=64, out_channels=32, kernel_size=[3, 3], padding=1) + self.conv21 = paddle.nn.Conv2D(in_channels=64, out_channels=32, kernel_size=[3, 3], padding=1) + self.leakyrelu16 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.batchnorm16 = paddle.nn.BatchNorm( + num_channels=32, momentum=0.8999999761581421, epsilon=9.999999747378752e-06, is_test=True) + self.conv22 = paddle.nn.Conv2D(in_channels=32, out_channels=32, kernel_size=[3, 3], padding=1) + self.leakyrelu17 = paddle.nn.LeakyReLU(negative_slope=0.20000000298023224) + self.conv23 = paddle.nn.Conv2D(in_channels=32, out_channels=3, kernel_size=[1, 1]) + + def forward(self, x2paddle_input): + x2paddle_137 = paddle.full(dtype='float32', shape=[1], fill_value=1.0) + x2paddle_145 = paddle.full(dtype='float32', shape=[1], fill_value=0.29899999499320984) + x2paddle_147 = paddle.full(dtype='float32', shape=[1], fill_value=0.5870000123977661) + x2paddle_150 = paddle.full(dtype='float32', shape=[1], fill_value=0.11400000005960464) + x2paddle_153 = paddle.full(dtype='float32', shape=[1], fill_value=2.0) + x2paddle_155 = paddle.full(dtype='float32', shape=[1], fill_value=1.0) + x2paddle_256 = paddle.full(dtype='float32', shape=[1], fill_value=1.0) + x2paddle_134 = self.conv0(x2paddle_input) + x2paddle_135, = paddle.split(x=x2paddle_134, num_or_sections=[1]) + x2paddle_257 = paddle.multiply(x=x2paddle_134, y=x2paddle_256) + x2paddle_136 = paddle.squeeze(x=x2paddle_135, axis=[0]) + x2paddle_138 = paddle.add(x=x2paddle_136, y=x2paddle_137) + x2paddle_139_p0, x2paddle_139_p1, x2paddle_139_p2 = paddle.split(x=x2paddle_138, num_or_sections=[1, 1, 1]) + x2paddle_142 = paddle.squeeze(x=x2paddle_139_p0, axis=[0]) + x2paddle_143 = paddle.squeeze(x=x2paddle_139_p1, axis=[0]) + x2paddle_144 = paddle.squeeze(x=x2paddle_139_p2, axis=[0]) + x2paddle_146 = paddle.multiply(x=x2paddle_142, y=x2paddle_145) + x2paddle_148 = paddle.multiply(x=x2paddle_143, y=x2paddle_147) + x2paddle_151 = paddle.multiply(x=x2paddle_144, y=x2paddle_150) + x2paddle_149 = paddle.add(x=x2paddle_146, y=x2paddle_148) + x2paddle_152 = paddle.add(x=x2paddle_149, y=x2paddle_151) + x2paddle_154 = paddle.divide(x=x2paddle_152, y=x2paddle_153) + x2paddle_156 = paddle.subtract(x=x2paddle_155, y=x2paddle_154) + x2paddle_157 = paddle.unsqueeze(x=x2paddle_156, axis=[0]) + x2paddle_158 = paddle.unsqueeze(x=x2paddle_157, axis=[0]) + x2paddle_159 = self.pool0(x2paddle_158) + x2paddle_163 = paddle.concat(x=[x2paddle_134, x2paddle_158], axis=1) + x2paddle_160 = self.pool1(x2paddle_159) + x2paddle_164 = self.conv1(x2paddle_163) + x2paddle_161 = self.pool2(x2paddle_160) + x2paddle_165 = self.leakyrelu0(x2paddle_164) + x2paddle_162 = self.pool3(x2paddle_161) + x2paddle_166 = self.batchnorm0(x2paddle_165) + x2paddle_167 = self.conv2(x2paddle_166) + x2paddle_168 = self.leakyrelu1(x2paddle_167) + x2paddle_169 = self.batchnorm1(x2paddle_168) + x2paddle_170 = self.pool4(x2paddle_169) + x2paddle_246 = paddle.multiply(x=x2paddle_169, y=x2paddle_158) + x2paddle_171 = self.conv3(x2paddle_170) + x2paddle_172 = self.leakyrelu2(x2paddle_171) + x2paddle_173 = self.batchnorm2(x2paddle_172) + x2paddle_174 = self.conv4(x2paddle_173) + x2paddle_175 = self.leakyrelu3(x2paddle_174) + x2paddle_176 = self.batchnorm3(x2paddle_175) + x2paddle_177 = self.pool5(x2paddle_176) + x2paddle_232 = paddle.multiply(x=x2paddle_176, y=x2paddle_159) + x2paddle_178 = self.conv5(x2paddle_177) + x2paddle_179 = self.leakyrelu4(x2paddle_178) + x2paddle_180 = self.batchnorm4(x2paddle_179) + x2paddle_181 = self.conv6(x2paddle_180) + x2paddle_182 = self.leakyrelu5(x2paddle_181) + x2paddle_183 = self.batchnorm5(x2paddle_182) + x2paddle_184 = self.pool6(x2paddle_183) + x2paddle_218 = paddle.multiply(x=x2paddle_183, y=x2paddle_160) + x2paddle_185 = self.conv7(x2paddle_184) + x2paddle_186 = self.leakyrelu6(x2paddle_185) + x2paddle_187 = self.batchnorm6(x2paddle_186) + x2paddle_188 = self.conv8(x2paddle_187) + x2paddle_189 = self.leakyrelu7(x2paddle_188) + x2paddle_190 = self.batchnorm7(x2paddle_189) + x2paddle_191 = self.pool7(x2paddle_190) + x2paddle_204 = paddle.multiply(x=x2paddle_190, y=x2paddle_161) + x2paddle_192 = self.conv9(x2paddle_191) + x2paddle_193 = self.leakyrelu8(x2paddle_192) + x2paddle_194 = self.batchnorm8(x2paddle_193) + x2paddle_195 = paddle.multiply(x=x2paddle_194, y=x2paddle_162) + x2paddle_196 = self.conv10(x2paddle_195) + x2paddle_197 = self.leakyrelu9(x2paddle_196) + x2paddle_198 = self.batchnorm9(x2paddle_197) + x2paddle_203 = paddle.nn.functional.interpolate(x=x2paddle_198, scale_factor=[2.0, 2.0], mode='bilinear') + x2paddle_205 = self.conv11(x2paddle_203) + x2paddle_206 = paddle.concat(x=[x2paddle_205, x2paddle_204], axis=1) + x2paddle_207 = self.conv12(x2paddle_206) + x2paddle_208 = self.leakyrelu10(x2paddle_207) + x2paddle_209 = self.batchnorm10(x2paddle_208) + x2paddle_210 = self.conv13(x2paddle_209) + x2paddle_211 = self.leakyrelu11(x2paddle_210) + x2paddle_212 = self.batchnorm11(x2paddle_211) + x2paddle_217 = paddle.nn.functional.interpolate(x=x2paddle_212, scale_factor=[2.0, 2.0], mode='bilinear') + x2paddle_219 = self.conv14(x2paddle_217) + x2paddle_220 = paddle.concat(x=[x2paddle_219, x2paddle_218], axis=1) + x2paddle_221 = self.conv15(x2paddle_220) + x2paddle_222 = self.leakyrelu12(x2paddle_221) + x2paddle_223 = self.batchnorm12(x2paddle_222) + x2paddle_224 = self.conv16(x2paddle_223) + x2paddle_225 = self.leakyrelu13(x2paddle_224) + x2paddle_226 = self.batchnorm13(x2paddle_225) + x2paddle_231 = paddle.nn.functional.interpolate(x=x2paddle_226, scale_factor=[2.0, 2.0], mode='bilinear') + x2paddle_233 = self.conv17(x2paddle_231) + x2paddle_234 = paddle.concat(x=[x2paddle_233, x2paddle_232], axis=1) + x2paddle_235 = self.conv18(x2paddle_234) + x2paddle_236 = self.leakyrelu14(x2paddle_235) + x2paddle_237 = self.batchnorm14(x2paddle_236) + x2paddle_238 = self.conv19(x2paddle_237) + x2paddle_239 = self.leakyrelu15(x2paddle_238) + x2paddle_240 = self.batchnorm15(x2paddle_239) + x2paddle_245 = paddle.nn.functional.interpolate(x=x2paddle_240, scale_factor=[2.0, 2.0], mode='bilinear') + x2paddle_247 = self.conv20(x2paddle_245) + x2paddle_248 = paddle.concat(x=[x2paddle_247, x2paddle_246], axis=1) + x2paddle_249 = self.conv21(x2paddle_248) + x2paddle_250 = self.leakyrelu16(x2paddle_249) + x2paddle_251 = self.batchnorm16(x2paddle_250) + x2paddle_252 = self.conv22(x2paddle_251) + x2paddle_253 = self.leakyrelu17(x2paddle_252) + x2paddle_254 = self.conv23(x2paddle_253) + x2paddle_255 = paddle.multiply(x=x2paddle_254, y=x2paddle_158) + x2paddle_output = paddle.add(x=x2paddle_255, y=x2paddle_257) + return x2paddle_output, x2paddle_255 diff --git a/modules/image/image_processing/enlightengan/module.py b/modules/image/image_processing/enlightengan/module.py new file mode 100644 index 0000000000000000000000000000000000000000..0c8f441c55c32c364112d3b3121183cb3964596f --- /dev/null +++ b/modules/image/image_processing/enlightengan/module.py @@ -0,0 +1,147 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import os + +import cv2 +import numpy as np +import paddle + +import paddlehub as hub +from .enlighten_inference.pd_model.x2paddle_code import ONNXModel +from .util import base64_to_cv2 +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="enlightengan", + type="CV/enlighten", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class EnlightenGAN: + + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "enlighten_inference/pd_model") + self.model = ONNXModel() + params = paddle.load(os.path.join(self.pretrained_model, 'model.pdparams')) + self.model.set_dict(params, use_structured_name=True) + + def enlightening(self, + images: list = None, + paths: list = None, + output_dir: str = './enlightening_result/', + use_gpu: bool = False, + visualization: bool = True): + ''' + enlighten images in the low-light scene. + + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR(read by cv2). + paths (list[str]): paths to images + output_dir (str): the dir to save the results + use_gpu (bool): if True, use gpu to perform the computation, otherwise cpu. + visualization (bool): if True, save results in output_dir. + ''' + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + self.model.eval() + + if images != None: + for image in images: + image = image[:, :, ::-1] + image = np.expand_dims(np.transpose(image, (2, 0, 1)).astype(np.float32) / 255., 0) + inputtensor = paddle.to_tensor(image) + out, out1 = self.model(inputtensor) + out = out.numpy()[0] + out = (np.transpose(out, (1, 2, 0)) + 1) / 2.0 * 255.0 + out = np.clip(out, 0, 255) + out = out.astype('uint8') + results.append(out) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + image = np.expand_dims(np.transpose(image, (2, 0, 1)).astype(np.float32) / 255., 0) + inputtensor = paddle.to_tensor(image) + out, out1 = self.model(inputtensor) + out = out.numpy()[0] + out = (np.transpose(out, (1, 2, 0)) + 1) / 2.0 * 255.0 + out = np.clip(out, 0, 255) + out = out.astype('uint8') + results.append(out) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + cv2.imwrite(os.path.join(output_dir, 'output_{}.png'.format(i)), out[:, :, ::-1]) + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.enlightening(paths=[self.args.input_path], + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.enlightening(images=images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--output_dir', + type=str, + default='enlightening_result', + help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/image_processing/enlightengan/util.py b/modules/image/image_processing/enlightengan/util.py new file mode 100644 index 0000000000000000000000000000000000000000..531a0ae0d487822a870ba7f09817e658967aff10 --- /dev/null +++ b/modules/image/image_processing/enlightengan/util.py @@ -0,0 +1,11 @@ +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/image_processing/prnet/README.md b/modules/image/image_processing/prnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..742e9c265c96bc651e0f20aa794057a1b30051b6 --- /dev/null +++ b/modules/image/image_processing/prnet/README.md @@ -0,0 +1,152 @@ +# prnet + +|模型名称|prnet| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|PRN| +|数据集|300W-LP| +|是否支持Fine-tuning|否| +|模型大小|154MB| +|最新更新日期|2021-11-20| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入原图像 +
+ +
+ 输入参考图像 +
+ +
+ 输出图像 +
+

+ +- ### 模型介绍 + + - PRNet提出一种方法同时重建3D的脸部结构和脸部对齐,可应用于脸部对齐、3D脸重建、脸部纹理编辑等任务。该模块引入了脸部纹理编辑的功能,可以将参考图像的脸部纹理转移到原图像上。 + + - 更多详情参考:[Joint 3D Face Reconstruction and Dense Alignment with Position Map Regression Network](https://arxiv.org/pdf/1803.07835.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + - dlib + - scikit-image + +- ### 2、安装 + + - ```shell + $ hub install prnet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run prnet --source "/PATH/TO/IMAGE1" --ref "/PATH/TO/IMAGE2" + ``` + - 通过命令行方式实现脸部纹理编辑的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + module = hub.Module(name="prnet") + source_path = "/PATH/TO/IMAGE1" + ref_path = "/PATH/TO/IMAGE2" + module.face_swap(paths=[{'source':input_path, 'ref':ref_path}], + mode = 0, + output_dir='./swapping_result/', + use_gpu=True, + visualization=True) + ``` + +- ### 3、API + + - ```python + def face_swap(self, + images=None, + paths=None, + mode = 0, + output_dir='./swapping_result/', + use_gpu=False, + visualization=True): + ``` + - 脸部纹理编辑API,将参考图像的脸部纹理转移到原图像上。 + + - **参数** + - images (list[dict]): data of images, 每一个元素都为一个 dict,有关键字 source, ref, 相应取值为: + - source (numpy.ndarray): 待转换的图片,shape 为 \[H, W, C\],BGR格式;
+ - ref (numpy.ndarray) : 参考图像,shape为 \[H, W, C\],BGR格式;
+ - paths (list[str]): paths to images, 每一个元素都为一个dict, 有关键字 source, ref, 相应取值为: + - source (str): 待转换的图片的路径;
+ - ref (str) : 参考图像的路径;
+ - mode(int): option, 0表示改变局部纹理, 1表示改变整个脸;
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像风格转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m prnet + ``` + + - 这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import rawpy + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[{'source': cv2_to_base64(cv2.imread("/PATH/TO/IMAGE1")), 'ref':cv2_to_base64(cv2.imread("/PATH/TO/IMAGE2"))}]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/prnet/" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install prnet==1.0.0 + ``` diff --git a/modules/image/image_processing/prnet/api.py b/modules/image/image_processing/prnet/api.py new file mode 100644 index 0000000000000000000000000000000000000000..2593a4c4ef9d1ff9cce2eb5d6f5053fab052c628 --- /dev/null +++ b/modules/image/image_processing/prnet/api.py @@ -0,0 +1,203 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from time import time + +import numpy as np +import paddle +from skimage.io import imread +from skimage.io import imsave +from skimage.transform import estimate_transform +from skimage.transform import warp + +from .predictor import PosPrediction + + +class PRN: + ''' Joint 3D Face Reconstruction and Dense Alignment with Position Map Regression Network + Args: + is_dlib(bool, optional): If true, dlib is used for detecting faces. + prefix(str, optional): If run at another folder, the absolute path is needed to load the data. + ''' + + def __init__(self, is_dlib=False, prefix='.'): + + # resolution of input and output image size. + self.resolution_inp = 256 + self.resolution_op = 256 + + #---- load detectors + if is_dlib: + import dlib + detector_path = os.path.join(prefix, 'Data/net-data/mmod_human_face_detector.dat') + self.face_detector = dlib.cnn_face_detection_model_v1(detector_path) + + #---- load PRN + params = paddle.load(os.path.join(prefix, "pd_model/model.pdparams")) + self.pos_predictor = PosPrediction(params, self.resolution_inp, self.resolution_op) + + # uv file + self.uv_kpt_ind = np.loadtxt(os.path.join(prefix, + 'Data/uv-data/uv_kpt_ind.txt')).astype(np.int32) # 2 x 68 get kpt + self.face_ind = np.loadtxt(os.path.join(prefix, 'Data/uv-data/face_ind.txt')).astype( + np.int32) # get valid vertices in the pos map + self.triangles = np.loadtxt(os.path.join(prefix, 'Data/uv-data/triangles.txt')).astype(np.int32) # ntri x 3 + + self.uv_coords = self.generate_uv_coords() + + def generate_uv_coords(self): + resolution = self.resolution_op + uv_coords = np.meshgrid(range(resolution), range(resolution)) + uv_coords = np.transpose(np.array(uv_coords), [1, 2, 0]) + uv_coords = np.reshape(uv_coords, [resolution**2, -1]) + uv_coords = uv_coords[self.face_ind, :] + uv_coords = np.hstack((uv_coords[:, :2], np.zeros([uv_coords.shape[0], 1]))) + return uv_coords + + def dlib_detect(self, image): + return self.face_detector(image, 1) + + def net_forward(self, image): + ''' The core of out method: regress the position map of a given image. + Args: + image: (256,256,3) array. value range: 0~1 + Returns: + pos: the 3D position map. (256, 256, 3) array. + ''' + return self.pos_predictor.predict(image) + + def process(self, input, image_info=None): + ''' process image with crop operation. + Args: + input: (h,w,3) array or str(image path). image value range:1~255. + image_info(optional): the bounding box information of faces. if None, will use dlib to detect face. + + Returns: + pos: the 3D position map. (256, 256, 3). + ''' + if isinstance(input, str): + try: + image = imread(input) + except IOError: + print("error opening file: ", input) + return None + else: + image = input + + if image.ndim < 3: + image = np.tile(image[:, :, np.newaxis], [1, 1, 3]) + + if image_info is not None: + if np.max(image_info.shape) > 4: # key points to get bounding box + kpt = image_info + if kpt.shape[0] > 3: + kpt = kpt.T + left = np.min(kpt[0, :]) + right = np.max(kpt[0, :]) + top = np.min(kpt[1, :]) + bottom = np.max(kpt[1, :]) + else: # bounding box + bbox = image_info + left = bbox[0] + right = bbox[1] + top = bbox[2] + bottom = bbox[3] + old_size = (right - left + bottom - top) / 2 + center = np.array([right - (right - left) / 2.0, bottom - (bottom - top) / 2.0]) + size = int(old_size * 1.6) + else: + detected_faces = self.dlib_detect(image) + if len(detected_faces) == 0: + print('warning: no detected face') + return None + + d = detected_faces[ + 0].rect ## only use the first detected face (assume that each input image only contains one face) + left = d.left() + right = d.right() + top = d.top() + bottom = d.bottom() + old_size = (right - left + bottom - top) / 2 + center = np.array([right - (right - left) / 2.0, bottom - (bottom - top) / 2.0 + old_size * 0.14]) + size = int(old_size * 1.58) + + # crop image + src_pts = np.array([[center[0] - size / 2, center[1] - size / 2], [center[0] - size / 2, center[1] + size / 2], + [center[0] + size / 2, center[1] - size / 2]]) + DST_PTS = np.array([[0, 0], [0, self.resolution_inp - 1], [self.resolution_inp - 1, 0]]) + tform = estimate_transform('similarity', src_pts, DST_PTS) + + image = image / 255. + cropped_image = warp(image, tform.inverse, output_shape=(self.resolution_inp, self.resolution_inp)) + + cropped_pos = self.net_forward(cropped_image) + + # restore + cropped_vertices = np.reshape(cropped_pos, [-1, 3]).T + z = cropped_vertices[2, :].copy() / tform.params[0, 0] + cropped_vertices[2, :] = 1 + vertices = np.dot(np.linalg.inv(tform.params), cropped_vertices) + vertices = np.vstack((vertices[:2, :], z)) + pos = np.reshape(vertices.T, [self.resolution_op, self.resolution_op, 3]) + + return pos + + def get_landmarks(self, pos): + ''' + Args: + pos: the 3D position map. shape = (256, 256, 3). + Returns: + kpt: 68 3D landmarks. shape = (68, 3). + ''' + kpt = pos[self.uv_kpt_ind[1, :], self.uv_kpt_ind[0, :], :] + return kpt + + def get_vertices(self, pos): + ''' + Args: + pos: the 3D position map. shape = (256, 256, 3). + Returns: + vertices: the vertices(point cloud). shape = (num of points, 3). n is about 40K here. + ''' + all_vertices = np.reshape(pos, [self.resolution_op**2, -1]) + vertices = all_vertices[self.face_ind, :] + + return vertices + + def get_colors_from_texture(self, texture): + ''' + Args: + texture: the texture map. shape = (256, 256, 3). + Returns: + colors: the corresponding colors of vertices. shape = (num of points, 3). n is 45128 here. + ''' + all_colors = np.reshape(texture, [self.resolution_op**2, -1]) + colors = all_colors[self.face_ind, :] + + return colors + + def get_colors(self, image, vertices): + ''' + Args: + pos: the 3D position map. shape = (256, 256, 3). + Returns: + colors: the corresponding colors of vertices. shape = (num of points, 3). n is 45128 here. + ''' + [h, w, _] = image.shape + vertices[:, 0] = np.minimum(np.maximum(vertices[:, 0], 0), w - 1) # x + vertices[:, 1] = np.minimum(np.maximum(vertices[:, 1], 0), h - 1) # y + ind = np.round(vertices).astype(np.int32) + colors = image[ind[:, 1], ind[:, 0], :] # n x 3 + + return colors diff --git a/modules/image/image_processing/prnet/module.py b/modules/image/image_processing/prnet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..8f074541689b4091cf764f3c47a2bbed4aa46c7e --- /dev/null +++ b/modules/image/image_processing/prnet/module.py @@ -0,0 +1,228 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .api import PRN +from .predictor import PosPrediction +from .util import base64_to_cv2 +from .utils.render import render_texture +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="prnet", type="CV/", author="paddlepaddle", author_email="", summary="", version="1.0.0") +class PRNet: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "pd_model/model.pdparams") + self.network = PRN(is_dlib=True, prefix=self.directory) + + def face_swap(self, + images: list = None, + paths: list = None, + mode: int = 0, + output_dir: str = './swapping_result/', + use_gpu: bool = False, + visualization: bool = True): + ''' + Denoise a raw image in the low-light scene. + + images (list[dict]): data of images, each element is a dict: + - source (numpy.ndarray): input image,shape is \[H, W, C\],BGR format;
+ - ref (numpy.ndarray) : style image,shape is \[H, W, C\],BGR format;
+ paths (list[dict]): paths to images, eacg element is a dict: + - source (str): path to input image;
+ - ref (str) : path to reference image;
+ mode (int): option, 0 for change part of texture, 1 for change whole face + output_dir (str): the dir to save the results + use_gpu (bool): if True, use gpu to perform the computation, otherwise cpu. + visualization (bool): if True, save results in output_dir. + ''' + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image_dict in images: + source_img = image_dict['source'][:, :, ::-1] + ref_img = image_dict['ref'][:, :, ::-1] + results.append(self.texture_editing(source_img, ref_img, mode)) + + if paths != None: + for path_dict in paths: + source_img = cv2.imread(path_dict['source'])[:, :, ::-1] + ref_img = cv2.imread(path_dict['ref'])[:, :, ::-1] + results.append(self.texture_editing(source_img, ref_img, mode)) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + cv2.imwrite(os.path.join(output_dir, 'output_{}.png'.format(i)), out[:, :, ::-1]) + + return results + + def texture_editing(self, source_img, ref_img, mode): + # read image + image = source_img + [h, w, _] = image.shape + prn = self.network + #-- 1. 3d reconstruction -> get texture. + pos = prn.process(image) + vertices = prn.get_vertices(pos) + image = image / 255. + texture = cv2.remap( + image, + pos[:, :, :2].astype(np.float32), + None, + interpolation=cv2.INTER_NEAREST, + borderMode=cv2.BORDER_CONSTANT, + borderValue=(0)) + + #-- 2. Texture Editing + Mode = mode + # change part of texture(for data augumentation/selfie editing. Here modify eyes for example) + if Mode == 0: + # load eye mask + uv_face_eye = imread(os.path.join(self.directory, 'Data/uv-data/uv_face_eyes.png'), as_gray=True) / 255. + uv_face = imread(os.path.join(self.directory, 'Data/uv-data/uv_face.png'), as_gray=True) / 255. + eye_mask = (abs(uv_face_eye - uv_face) > 0).astype(np.float32) + + # texture from another image or a processed texture + ref_image = ref_img + ref_pos = prn.process(ref_image) + ref_image = ref_image / 255. + ref_texture = cv2.remap( + ref_image, + ref_pos[:, :, :2].astype(np.float32), + None, + interpolation=cv2.INTER_NEAREST, + borderMode=cv2.BORDER_CONSTANT, + borderValue=(0)) + + # modify texture + new_texture = texture * (1 - eye_mask[:, :, np.newaxis]) + ref_texture * eye_mask[:, :, np.newaxis] + + # change whole face(face swap) + elif Mode == 1: + # texture from another image or a processed texture + ref_image = ref_img + ref_pos = prn.process(ref_image) + ref_image = ref_image / 255. + ref_texture = cv2.remap( + ref_image, + ref_pos[:, :, :2].astype(np.float32), + None, + interpolation=cv2.INTER_NEAREST, + borderMode=cv2.BORDER_CONSTANT, + borderValue=(0)) + ref_vertices = prn.get_vertices(ref_pos) + new_texture = ref_texture #(texture + ref_texture)/2. + + else: + print('Wrong Mode! Mode should be 0 or 1.') + exit() + + #-- 3. remap to input image.(render) + vis_colors = np.ones((vertices.shape[0], 1)) + face_mask = render_texture(vertices.T, vis_colors.T, prn.triangles.T, h, w, c=1) + face_mask = np.squeeze(face_mask > 0).astype(np.float32) + + new_colors = prn.get_colors_from_texture(new_texture) + new_image = render_texture(vertices.T, new_colors.T, prn.triangles.T, h, w, c=3) + new_image = image * (1 - face_mask[:, :, np.newaxis]) + new_image * face_mask[:, :, np.newaxis] + + # Possion Editing for blending image + vis_ind = np.argwhere(face_mask > 0) + vis_min = np.min(vis_ind, 0) + vis_max = np.max(vis_ind, 0) + center = (int((vis_min[1] + vis_max[1]) / 2 + 0.5), int((vis_min[0] + vis_max[0]) / 2 + 0.5)) + output = cv2.seamlessClone((new_image * 255).astype(np.uint8), (image * 255).astype(np.uint8), + (face_mask * 255).astype(np.uint8), center, cv2.NORMAL_CLONE) + + return output + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + + self.face_swap( + paths=[{ + 'source': self.args.source, + 'ref': self.args.ref + }], + mode=self.args.mode, + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = copy.deepcopy(images) + for image in images_decode: + image['source'] = base64_to_cv2(image['source']) + image['ref'] = base64_to_cv2(image['ref']) + results = self.face_swap(images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument( + '--mode', type=int, default=0, help='process option, 0 for part texture, 1 for whole face.', choices=[0, 1]) + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='swapping_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--source', type=str, help="path to source image.") + self.arg_input_group.add_argument('--ref', type=str, help="path to reference image.") diff --git a/modules/image/image_processing/prnet/pd_model/x2paddle_code.py b/modules/image/image_processing/prnet/pd_model/x2paddle_code.py new file mode 100755 index 0000000000000000000000000000000000000000..c1a3e9af6f8f4f2d05459d734bd63fb11965307d --- /dev/null +++ b/modules/image/image_processing/prnet/pd_model/x2paddle_code.py @@ -0,0 +1,1547 @@ +import paddle +import math + + +class TFModel(paddle.nn.Layer): + def __init__(self): + super(TFModel, self).__init__() + self.conv0 = paddle.nn.Conv2D( + weight_attr='conv0.weight', + bias_attr=False, + in_channels=3, + out_channels=16, + kernel_size=[4, 4], + padding='SAME') + self.bn0 = paddle.nn.BatchNorm( + num_channels=16, + epsilon=0.0010000000474974513, + param_attr='resfcn256_Conv_BatchNorm_FusedBatchNorm_resfcn256_Conv_BatchNorm_gamma', + bias_attr='resfcn256_Conv_BatchNorm_FusedBatchNorm_resfcn256_Conv_BatchNorm_beta', + moving_mean_name='resfcn256_Conv_BatchNorm_FusedBatchNorm_resfcn256_Conv_BatchNorm_moving_mean', + moving_variance_name='resfcn256_Conv_BatchNorm_FusedBatchNorm_resfcn256_Conv_BatchNorm_moving_variance', + is_test=True) + self.relu0 = paddle.nn.ReLU() + self.conv1 = paddle.nn.Conv2D( + weight_attr='conv1.weight', + bias_attr=False, + in_channels=16, + out_channels=32, + kernel_size=[1, 1], + stride=2, + padding='SAME') + self.conv2 = paddle.nn.Conv2D( + weight_attr='conv2.weight', + bias_attr=False, + in_channels=16, + out_channels=16, + kernel_size=[1, 1], + padding='SAME') + self.bn1 = paddle.nn.BatchNorm( + num_channels=16, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_Conv_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_Conv_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_Conv_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_Conv_BatchNorm_moving_variance', + is_test=True) + self.relu1 = paddle.nn.ReLU() + self.conv3 = paddle.nn.Conv2D( + weight_attr='conv3.weight', + bias_attr=False, + in_channels=16, + out_channels=16, + kernel_size=[4, 4], + stride=2, + padding='SAME') + self.bn2 = paddle.nn.BatchNorm( + num_channels=16, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_Conv_1_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_Conv_1_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_Conv_1_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_Conv_1_BatchNorm_moving_variance', + is_test=True) + self.relu2 = paddle.nn.ReLU() + self.conv4 = paddle.nn.Conv2D( + weight_attr='conv4.weight', + bias_attr=False, + in_channels=16, + out_channels=32, + kernel_size=[1, 1], + padding='SAME') + self.bn3 = paddle.nn.BatchNorm( + num_channels=32, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_BatchNorm_FusedBatchNorm_resfcn256_resBlock_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_BatchNorm_FusedBatchNorm_resfcn256_resBlock_BatchNorm_beta', + moving_mean_name='resfcn256_resBlock_BatchNorm_FusedBatchNorm_resfcn256_resBlock_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_BatchNorm_FusedBatchNorm_resfcn256_resBlock_BatchNorm_moving_variance', + is_test=True) + self.relu3 = paddle.nn.ReLU() + self.conv5 = paddle.nn.Conv2D( + weight_attr='conv5.weight', + bias_attr=False, + in_channels=32, + out_channels=16, + kernel_size=[1, 1], + padding='SAME') + self.bn4 = paddle.nn.BatchNorm( + num_channels=16, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_1_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_1_Conv_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_1_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_1_Conv_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_1_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_1_Conv_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_1_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_1_Conv_BatchNorm_moving_variance', + is_test=True) + self.relu4 = paddle.nn.ReLU() + self.conv6 = paddle.nn.Conv2D( + weight_attr='conv6.weight', + bias_attr=False, + in_channels=16, + out_channels=16, + kernel_size=[4, 4], + padding='SAME') + self.bn5 = paddle.nn.BatchNorm( + num_channels=16, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_resBlock_1_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_1_Conv_1_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_1_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_1_Conv_1_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_1_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_1_Conv_1_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_1_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_1_Conv_1_BatchNorm_moving_variance', + is_test=True) + self.relu5 = paddle.nn.ReLU() + self.conv7 = paddle.nn.Conv2D( + weight_attr='conv7.weight', + bias_attr=False, + in_channels=16, + out_channels=32, + kernel_size=[1, 1], + padding='SAME') + self.bn6 = paddle.nn.BatchNorm( + num_channels=32, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_1_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_1_BatchNorm_beta', + moving_mean_name='resfcn256_resBlock_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_1_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_1_BatchNorm_moving_variance', + is_test=True) + self.relu6 = paddle.nn.ReLU() + self.conv8 = paddle.nn.Conv2D( + weight_attr='conv8.weight', + bias_attr=False, + in_channels=32, + out_channels=64, + kernel_size=[1, 1], + stride=2, + padding='SAME') + self.conv9 = paddle.nn.Conv2D( + weight_attr='conv9.weight', + bias_attr=False, + in_channels=32, + out_channels=32, + kernel_size=[1, 1], + padding='SAME') + self.bn7 = paddle.nn.BatchNorm( + num_channels=32, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_2_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_2_Conv_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_2_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_2_Conv_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_2_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_2_Conv_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_2_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_2_Conv_BatchNorm_moving_variance', + is_test=True) + self.relu7 = paddle.nn.ReLU() + self.conv10 = paddle.nn.Conv2D( + weight_attr='conv10.weight', + bias_attr=False, + in_channels=32, + out_channels=32, + kernel_size=[4, 4], + stride=2, + padding='SAME') + self.bn8 = paddle.nn.BatchNorm( + num_channels=32, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_resBlock_2_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_2_Conv_1_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_2_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_2_Conv_1_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_2_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_2_Conv_1_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_2_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_2_Conv_1_BatchNorm_moving_variance', + is_test=True) + self.relu8 = paddle.nn.ReLU() + self.conv11 = paddle.nn.Conv2D( + weight_attr='conv11.weight', + bias_attr=False, + in_channels=32, + out_channels=64, + kernel_size=[1, 1], + padding='SAME') + self.bn9 = paddle.nn.BatchNorm( + num_channels=64, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_2_BatchNorm_FusedBatchNorm_resfcn256_resBlock_2_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_2_BatchNorm_FusedBatchNorm_resfcn256_resBlock_2_BatchNorm_beta', + moving_mean_name='resfcn256_resBlock_2_BatchNorm_FusedBatchNorm_resfcn256_resBlock_2_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_2_BatchNorm_FusedBatchNorm_resfcn256_resBlock_2_BatchNorm_moving_variance', + is_test=True) + self.relu9 = paddle.nn.ReLU() + self.conv12 = paddle.nn.Conv2D( + weight_attr='conv12.weight', + bias_attr=False, + in_channels=64, + out_channels=32, + kernel_size=[1, 1], + padding='SAME') + self.bn10 = paddle.nn.BatchNorm( + num_channels=32, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_3_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_3_Conv_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_3_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_3_Conv_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_3_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_3_Conv_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_3_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_3_Conv_BatchNorm_moving_variance', + is_test=True) + self.relu10 = paddle.nn.ReLU() + self.conv13 = paddle.nn.Conv2D( + weight_attr='conv13.weight', + bias_attr=False, + in_channels=32, + out_channels=32, + kernel_size=[4, 4], + padding='SAME') + self.bn11 = paddle.nn.BatchNorm( + num_channels=32, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_resBlock_3_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_3_Conv_1_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_3_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_3_Conv_1_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_3_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_3_Conv_1_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_3_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_3_Conv_1_BatchNorm_moving_variance', + is_test=True) + self.relu11 = paddle.nn.ReLU() + self.conv14 = paddle.nn.Conv2D( + weight_attr='conv14.weight', + bias_attr=False, + in_channels=32, + out_channels=64, + kernel_size=[1, 1], + padding='SAME') + self.bn12 = paddle.nn.BatchNorm( + num_channels=64, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_3_BatchNorm_FusedBatchNorm_resfcn256_resBlock_3_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_3_BatchNorm_FusedBatchNorm_resfcn256_resBlock_3_BatchNorm_beta', + moving_mean_name='resfcn256_resBlock_3_BatchNorm_FusedBatchNorm_resfcn256_resBlock_3_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_3_BatchNorm_FusedBatchNorm_resfcn256_resBlock_3_BatchNorm_moving_variance', + is_test=True) + self.relu12 = paddle.nn.ReLU() + self.conv15 = paddle.nn.Conv2D( + weight_attr='conv15.weight', + bias_attr=False, + in_channels=64, + out_channels=128, + kernel_size=[1, 1], + stride=2, + padding='SAME') + self.conv16 = paddle.nn.Conv2D( + weight_attr='conv16.weight', + bias_attr=False, + in_channels=64, + out_channels=64, + kernel_size=[1, 1], + padding='SAME') + self.bn13 = paddle.nn.BatchNorm( + num_channels=64, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_4_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_4_Conv_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_4_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_4_Conv_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_4_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_4_Conv_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_4_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_4_Conv_BatchNorm_moving_variance', + is_test=True) + self.relu13 = paddle.nn.ReLU() + self.conv17 = paddle.nn.Conv2D( + weight_attr='conv17.weight', + bias_attr=False, + in_channels=64, + out_channels=64, + kernel_size=[4, 4], + stride=2, + padding='SAME') + self.bn14 = paddle.nn.BatchNorm( + num_channels=64, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_resBlock_4_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_4_Conv_1_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_4_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_4_Conv_1_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_4_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_4_Conv_1_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_4_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_4_Conv_1_BatchNorm_moving_variance', + is_test=True) + self.relu14 = paddle.nn.ReLU() + self.conv18 = paddle.nn.Conv2D( + weight_attr='conv18.weight', + bias_attr=False, + in_channels=64, + out_channels=128, + kernel_size=[1, 1], + padding='SAME') + self.bn15 = paddle.nn.BatchNorm( + num_channels=128, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_4_BatchNorm_FusedBatchNorm_resfcn256_resBlock_4_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_4_BatchNorm_FusedBatchNorm_resfcn256_resBlock_4_BatchNorm_beta', + moving_mean_name='resfcn256_resBlock_4_BatchNorm_FusedBatchNorm_resfcn256_resBlock_4_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_4_BatchNorm_FusedBatchNorm_resfcn256_resBlock_4_BatchNorm_moving_variance', + is_test=True) + self.relu15 = paddle.nn.ReLU() + self.conv19 = paddle.nn.Conv2D( + weight_attr='conv19.weight', + bias_attr=False, + in_channels=128, + out_channels=64, + kernel_size=[1, 1], + padding='SAME') + self.bn16 = paddle.nn.BatchNorm( + num_channels=64, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_5_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_5_Conv_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_5_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_5_Conv_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_5_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_5_Conv_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_5_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_5_Conv_BatchNorm_moving_variance', + is_test=True) + self.relu16 = paddle.nn.ReLU() + self.conv20 = paddle.nn.Conv2D( + weight_attr='conv20.weight', + bias_attr=False, + in_channels=64, + out_channels=64, + kernel_size=[4, 4], + padding='SAME') + self.bn17 = paddle.nn.BatchNorm( + num_channels=64, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_resBlock_5_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_5_Conv_1_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_5_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_5_Conv_1_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_5_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_5_Conv_1_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_5_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_5_Conv_1_BatchNorm_moving_variance', + is_test=True) + self.relu17 = paddle.nn.ReLU() + self.conv21 = paddle.nn.Conv2D( + weight_attr='conv21.weight', + bias_attr=False, + in_channels=64, + out_channels=128, + kernel_size=[1, 1], + padding='SAME') + self.bn18 = paddle.nn.BatchNorm( + num_channels=128, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_5_BatchNorm_FusedBatchNorm_resfcn256_resBlock_5_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_5_BatchNorm_FusedBatchNorm_resfcn256_resBlock_5_BatchNorm_beta', + moving_mean_name='resfcn256_resBlock_5_BatchNorm_FusedBatchNorm_resfcn256_resBlock_5_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_5_BatchNorm_FusedBatchNorm_resfcn256_resBlock_5_BatchNorm_moving_variance', + is_test=True) + self.relu18 = paddle.nn.ReLU() + self.conv22 = paddle.nn.Conv2D( + weight_attr='conv22.weight', + bias_attr=False, + in_channels=128, + out_channels=256, + kernel_size=[1, 1], + stride=2, + padding='SAME') + self.conv23 = paddle.nn.Conv2D( + weight_attr='conv23.weight', + bias_attr=False, + in_channels=128, + out_channels=128, + kernel_size=[1, 1], + padding='SAME') + self.bn19 = paddle.nn.BatchNorm( + num_channels=128, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_6_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_6_Conv_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_6_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_6_Conv_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_6_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_6_Conv_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_6_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_6_Conv_BatchNorm_moving_variance', + is_test=True) + self.relu19 = paddle.nn.ReLU() + self.conv24 = paddle.nn.Conv2D( + weight_attr='conv24.weight', + bias_attr=False, + in_channels=128, + out_channels=128, + kernel_size=[4, 4], + stride=2, + padding='SAME') + self.bn20 = paddle.nn.BatchNorm( + num_channels=128, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_resBlock_6_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_6_Conv_1_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_6_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_6_Conv_1_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_6_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_6_Conv_1_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_6_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_6_Conv_1_BatchNorm_moving_variance', + is_test=True) + self.relu20 = paddle.nn.ReLU() + self.conv25 = paddle.nn.Conv2D( + weight_attr='conv25.weight', + bias_attr=False, + in_channels=128, + out_channels=256, + kernel_size=[1, 1], + padding='SAME') + self.bn21 = paddle.nn.BatchNorm( + num_channels=256, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_6_BatchNorm_FusedBatchNorm_resfcn256_resBlock_6_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_6_BatchNorm_FusedBatchNorm_resfcn256_resBlock_6_BatchNorm_beta', + moving_mean_name='resfcn256_resBlock_6_BatchNorm_FusedBatchNorm_resfcn256_resBlock_6_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_6_BatchNorm_FusedBatchNorm_resfcn256_resBlock_6_BatchNorm_moving_variance', + is_test=True) + self.relu21 = paddle.nn.ReLU() + self.conv26 = paddle.nn.Conv2D( + weight_attr='conv26.weight', + bias_attr=False, + in_channels=256, + out_channels=128, + kernel_size=[1, 1], + padding='SAME') + self.bn22 = paddle.nn.BatchNorm( + num_channels=128, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_7_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_7_Conv_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_7_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_7_Conv_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_7_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_7_Conv_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_7_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_7_Conv_BatchNorm_moving_variance', + is_test=True) + self.relu22 = paddle.nn.ReLU() + self.conv27 = paddle.nn.Conv2D( + weight_attr='conv27.weight', + bias_attr=False, + in_channels=128, + out_channels=128, + kernel_size=[4, 4], + padding='SAME') + self.bn23 = paddle.nn.BatchNorm( + num_channels=128, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_resBlock_7_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_7_Conv_1_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_7_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_7_Conv_1_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_7_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_7_Conv_1_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_7_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_7_Conv_1_BatchNorm_moving_variance', + is_test=True) + self.relu23 = paddle.nn.ReLU() + self.conv28 = paddle.nn.Conv2D( + weight_attr='conv28.weight', + bias_attr=False, + in_channels=128, + out_channels=256, + kernel_size=[1, 1], + padding='SAME') + self.bn24 = paddle.nn.BatchNorm( + num_channels=256, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_7_BatchNorm_FusedBatchNorm_resfcn256_resBlock_7_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_7_BatchNorm_FusedBatchNorm_resfcn256_resBlock_7_BatchNorm_beta', + moving_mean_name='resfcn256_resBlock_7_BatchNorm_FusedBatchNorm_resfcn256_resBlock_7_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_7_BatchNorm_FusedBatchNorm_resfcn256_resBlock_7_BatchNorm_moving_variance', + is_test=True) + self.relu24 = paddle.nn.ReLU() + self.conv29 = paddle.nn.Conv2D( + weight_attr='conv29.weight', + bias_attr=False, + in_channels=256, + out_channels=512, + kernel_size=[1, 1], + stride=2, + padding='SAME') + self.conv30 = paddle.nn.Conv2D( + weight_attr='conv30.weight', + bias_attr=False, + in_channels=256, + out_channels=256, + kernel_size=[1, 1], + padding='SAME') + self.bn25 = paddle.nn.BatchNorm( + num_channels=256, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_8_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_8_Conv_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_8_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_8_Conv_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_8_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_8_Conv_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_8_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_8_Conv_BatchNorm_moving_variance', + is_test=True) + self.relu25 = paddle.nn.ReLU() + self.conv31 = paddle.nn.Conv2D( + weight_attr='conv31.weight', + bias_attr=False, + in_channels=256, + out_channels=256, + kernel_size=[4, 4], + stride=2, + padding='SAME') + self.bn26 = paddle.nn.BatchNorm( + num_channels=256, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_resBlock_8_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_8_Conv_1_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_8_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_8_Conv_1_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_8_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_8_Conv_1_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_8_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_8_Conv_1_BatchNorm_moving_variance', + is_test=True) + self.relu26 = paddle.nn.ReLU() + self.conv32 = paddle.nn.Conv2D( + weight_attr='conv32.weight', + bias_attr=False, + in_channels=256, + out_channels=512, + kernel_size=[1, 1], + padding='SAME') + self.bn27 = paddle.nn.BatchNorm( + num_channels=512, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_8_BatchNorm_FusedBatchNorm_resfcn256_resBlock_8_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_8_BatchNorm_FusedBatchNorm_resfcn256_resBlock_8_BatchNorm_beta', + moving_mean_name='resfcn256_resBlock_8_BatchNorm_FusedBatchNorm_resfcn256_resBlock_8_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_8_BatchNorm_FusedBatchNorm_resfcn256_resBlock_8_BatchNorm_moving_variance', + is_test=True) + self.relu27 = paddle.nn.ReLU() + self.conv33 = paddle.nn.Conv2D( + weight_attr='conv33.weight', + bias_attr=False, + in_channels=512, + out_channels=256, + kernel_size=[1, 1], + padding='SAME') + self.bn28 = paddle.nn.BatchNorm( + num_channels=256, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_9_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_9_Conv_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_9_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_9_Conv_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_9_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_9_Conv_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_9_Conv_BatchNorm_FusedBatchNorm_resfcn256_resBlock_9_Conv_BatchNorm_moving_variance', + is_test=True) + self.relu28 = paddle.nn.ReLU() + self.conv34 = paddle.nn.Conv2D( + weight_attr='conv34.weight', + bias_attr=False, + in_channels=256, + out_channels=256, + kernel_size=[4, 4], + padding='SAME') + self.bn29 = paddle.nn.BatchNorm( + num_channels=256, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_resBlock_9_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_9_Conv_1_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_9_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_9_Conv_1_BatchNorm_beta', + moving_mean_name= + 'resfcn256_resBlock_9_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_9_Conv_1_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_9_Conv_1_BatchNorm_FusedBatchNorm_resfcn256_resBlock_9_Conv_1_BatchNorm_moving_variance', + is_test=True) + self.relu29 = paddle.nn.ReLU() + self.conv35 = paddle.nn.Conv2D( + weight_attr='conv35.weight', + bias_attr=False, + in_channels=256, + out_channels=512, + kernel_size=[1, 1], + padding='SAME') + self.bn30 = paddle.nn.BatchNorm( + num_channels=512, + epsilon=0.0010000000474974513, + param_attr='resfcn256_resBlock_9_BatchNorm_FusedBatchNorm_resfcn256_resBlock_9_BatchNorm_gamma', + bias_attr='resfcn256_resBlock_9_BatchNorm_FusedBatchNorm_resfcn256_resBlock_9_BatchNorm_beta', + moving_mean_name='resfcn256_resBlock_9_BatchNorm_FusedBatchNorm_resfcn256_resBlock_9_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_resBlock_9_BatchNorm_FusedBatchNorm_resfcn256_resBlock_9_BatchNorm_moving_variance', + is_test=True) + self.relu30 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_conv2d_transpose_conv36_weight = self.create_parameter( + shape=(512, 512, 4, 4), attr='conv36.weight') + self.bn31 = paddle.nn.BatchNorm( + num_channels=512, + epsilon=0.0010000000474974513, + param_attr='resfcn256_Conv2d_transpose_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_BatchNorm_gamma', + bias_attr='resfcn256_Conv2d_transpose_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_BatchNorm_moving_variance', + is_test=True) + self.relu31 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_1_conv2d_transpose_conv37_weight = self.create_parameter( + shape=(512, 256, 4, 4), attr='conv37.weight') + self.bn32 = paddle.nn.BatchNorm( + num_channels=256, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_1_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_1_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_1_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_1_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_1_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_1_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_1_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_1_BatchNorm_moving_variance', + is_test=True) + self.relu32 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_2_conv2d_transpose_conv38_weight = self.create_parameter( + shape=(256, 256, 4, 4), attr='conv38.weight') + self.bn33 = paddle.nn.BatchNorm( + num_channels=256, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_2_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_2_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_2_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_2_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_2_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_2_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_2_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_2_BatchNorm_moving_variance', + is_test=True) + self.relu33 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_3_conv2d_transpose_conv39_weight = self.create_parameter( + shape=(256, 256, 4, 4), attr='conv39.weight') + self.bn34 = paddle.nn.BatchNorm( + num_channels=256, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_3_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_3_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_3_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_3_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_3_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_3_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_3_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_3_BatchNorm_moving_variance', + is_test=True) + self.relu34 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_4_conv2d_transpose_conv40_weight = self.create_parameter( + shape=(256, 128, 4, 4), attr='conv40.weight') + self.bn35 = paddle.nn.BatchNorm( + num_channels=128, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_4_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_4_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_4_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_4_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_4_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_4_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_4_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_4_BatchNorm_moving_variance', + is_test=True) + self.relu35 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_5_conv2d_transpose_conv41_weight = self.create_parameter( + shape=(128, 128, 4, 4), attr='conv41.weight') + self.bn36 = paddle.nn.BatchNorm( + num_channels=128, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_5_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_5_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_5_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_5_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_5_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_5_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_5_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_5_BatchNorm_moving_variance', + is_test=True) + self.relu36 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_6_conv2d_transpose_conv42_weight = self.create_parameter( + shape=(128, 128, 4, 4), attr='conv42.weight') + self.bn37 = paddle.nn.BatchNorm( + num_channels=128, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_6_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_6_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_6_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_6_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_6_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_6_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_6_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_6_BatchNorm_moving_variance', + is_test=True) + self.relu37 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_7_conv2d_transpose_conv43_weight = self.create_parameter( + shape=(128, 64, 4, 4), attr='conv43.weight') + self.bn38 = paddle.nn.BatchNorm( + num_channels=64, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_7_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_7_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_7_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_7_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_7_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_7_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_7_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_7_BatchNorm_moving_variance', + is_test=True) + self.relu38 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_8_conv2d_transpose_conv44_weight = self.create_parameter( + shape=(64, 64, 4, 4), attr='conv44.weight') + self.bn39 = paddle.nn.BatchNorm( + num_channels=64, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_8_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_8_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_8_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_8_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_8_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_8_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_8_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_8_BatchNorm_moving_variance', + is_test=True) + self.relu39 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_9_conv2d_transpose_conv45_weight = self.create_parameter( + shape=(64, 64, 4, 4), attr='conv45.weight') + self.bn40 = paddle.nn.BatchNorm( + num_channels=64, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_9_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_9_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_9_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_9_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_9_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_9_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_9_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_9_BatchNorm_moving_variance', + is_test=True) + self.relu40 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_10_conv2d_transpose_conv46_weight = self.create_parameter( + shape=(64, 32, 4, 4), attr='conv46.weight') + self.bn41 = paddle.nn.BatchNorm( + num_channels=32, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_10_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_10_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_10_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_10_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_10_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_10_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_10_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_10_BatchNorm_moving_variance', + is_test=True) + self.relu41 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_11_conv2d_transpose_conv47_weight = self.create_parameter( + shape=(32, 32, 4, 4), attr='conv47.weight') + self.bn42 = paddle.nn.BatchNorm( + num_channels=32, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_11_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_11_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_11_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_11_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_11_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_11_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_11_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_11_BatchNorm_moving_variance', + is_test=True) + self.relu42 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_12_conv2d_transpose_conv48_weight = self.create_parameter( + shape=(32, 16, 4, 4), attr='conv48.weight') + self.bn43 = paddle.nn.BatchNorm( + num_channels=16, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_12_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_12_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_12_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_12_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_12_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_12_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_12_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_12_BatchNorm_moving_variance', + is_test=True) + self.relu43 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_13_conv2d_transpose_conv49_weight = self.create_parameter( + shape=(16, 16, 4, 4), attr='conv49.weight') + self.bn44 = paddle.nn.BatchNorm( + num_channels=16, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_13_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_13_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_13_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_13_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_13_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_13_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_13_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_13_BatchNorm_moving_variance', + is_test=True) + self.relu44 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_14_conv2d_transpose_conv50_weight = self.create_parameter( + shape=(16, 3, 4, 4), attr='conv50.weight') + self.bn45 = paddle.nn.BatchNorm( + num_channels=3, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_14_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_14_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_14_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_14_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_14_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_14_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_14_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_14_BatchNorm_moving_variance', + is_test=True) + self.relu45 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_15_conv2d_transpose_conv51_weight = self.create_parameter( + shape=(3, 3, 4, 4), attr='conv51.weight') + self.bn46 = paddle.nn.BatchNorm( + num_channels=3, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_15_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_15_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_15_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_15_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_15_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_15_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_15_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_15_BatchNorm_moving_variance', + is_test=True) + self.relu46 = paddle.nn.ReLU() + self.resfcn256_Conv2d_transpose_16_conv2d_transpose_conv52_weight = self.create_parameter( + shape=(3, 3, 4, 4), attr='conv52.weight') + self.bn47 = paddle.nn.BatchNorm( + num_channels=3, + epsilon=0.0010000000474974513, + param_attr= + 'resfcn256_Conv2d_transpose_16_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_16_BatchNorm_gamma', + bias_attr= + 'resfcn256_Conv2d_transpose_16_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_16_BatchNorm_beta', + moving_mean_name= + 'resfcn256_Conv2d_transpose_16_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_16_BatchNorm_moving_mean', + moving_variance_name= + 'resfcn256_Conv2d_transpose_16_BatchNorm_FusedBatchNorm_resfcn256_Conv2d_transpose_16_BatchNorm_moving_variance', + is_test=True) + self.sigmoid0 = paddle.nn.Sigmoid() + + def forward(self, Placeholder): + resfcn256_Conv2d_transpose_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=512) + resfcn256_Conv2d_transpose_1_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=2) + resfcn256_Conv2d_transpose_1_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=2) + resfcn256_Conv2d_transpose_1_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=256) + resfcn256_Conv2d_transpose_2_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_2_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_2_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=256) + resfcn256_Conv2d_transpose_3_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_3_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_3_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=256) + resfcn256_Conv2d_transpose_4_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=2) + resfcn256_Conv2d_transpose_4_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=2) + resfcn256_Conv2d_transpose_4_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=128) + resfcn256_Conv2d_transpose_5_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_5_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_5_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=128) + resfcn256_Conv2d_transpose_6_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_6_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_6_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=128) + resfcn256_Conv2d_transpose_7_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=2) + resfcn256_Conv2d_transpose_7_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=2) + resfcn256_Conv2d_transpose_7_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=64) + resfcn256_Conv2d_transpose_8_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_8_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_8_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=64) + resfcn256_Conv2d_transpose_9_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_9_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_9_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=64) + resfcn256_Conv2d_transpose_10_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=2) + resfcn256_Conv2d_transpose_10_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=2) + resfcn256_Conv2d_transpose_10_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=32) + resfcn256_Conv2d_transpose_11_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_11_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_11_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=32) + resfcn256_Conv2d_transpose_12_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=2) + resfcn256_Conv2d_transpose_12_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=2) + resfcn256_Conv2d_transpose_12_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=16) + resfcn256_Conv2d_transpose_13_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_13_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_13_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=16) + resfcn256_Conv2d_transpose_14_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_14_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_14_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=3) + resfcn256_Conv2d_transpose_15_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_15_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_15_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=3) + resfcn256_Conv2d_transpose_16_mul_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_16_mul_1_y = paddle.full(dtype='int32', shape=[1], fill_value=1) + resfcn256_Conv2d_transpose_16_stack_3 = paddle.full(dtype='int32', shape=[1], fill_value=3) + conv2d_transpose_0 = paddle.transpose(x=Placeholder, perm=[0, 3, 1, 2]) + resfcn256_Conv_Conv2D = self.conv0(conv2d_transpose_0) + resfcn256_Conv_BatchNorm_FusedBatchNorm = self.bn0(resfcn256_Conv_Conv2D) + resfcn256_Conv_Relu = self.relu0(resfcn256_Conv_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_shortcut_Conv2D = self.conv1(resfcn256_Conv_Relu) + resfcn256_resBlock_Conv_Conv2D = self.conv2(resfcn256_Conv_Relu) + resfcn256_resBlock_Conv_BatchNorm_FusedBatchNorm = self.bn1(resfcn256_resBlock_Conv_Conv2D) + resfcn256_resBlock_Conv_Relu = self.relu1(resfcn256_resBlock_Conv_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_Conv_1_Conv2D = self.conv3(resfcn256_resBlock_Conv_Relu) + resfcn256_resBlock_Conv_1_BatchNorm_FusedBatchNorm = self.bn2(resfcn256_resBlock_Conv_1_Conv2D) + resfcn256_resBlock_Conv_1_Relu = self.relu2(resfcn256_resBlock_Conv_1_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_Conv_2_Conv2D = self.conv4(resfcn256_resBlock_Conv_1_Relu) + resfcn256_resBlock_add = paddle.add(x=resfcn256_resBlock_Conv_2_Conv2D, y=resfcn256_resBlock_shortcut_Conv2D) + resfcn256_resBlock_BatchNorm_FusedBatchNorm = self.bn3(resfcn256_resBlock_add) + resfcn256_resBlock_Relu = self.relu3(resfcn256_resBlock_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_1_Conv_Conv2D = self.conv5(resfcn256_resBlock_Relu) + resfcn256_resBlock_1_Conv_BatchNorm_FusedBatchNorm = self.bn4(resfcn256_resBlock_1_Conv_Conv2D) + resfcn256_resBlock_1_Conv_Relu = self.relu4(resfcn256_resBlock_1_Conv_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_1_Conv_1_Conv2D = self.conv6(resfcn256_resBlock_1_Conv_Relu) + resfcn256_resBlock_1_Conv_1_BatchNorm_FusedBatchNorm = self.bn5(resfcn256_resBlock_1_Conv_1_Conv2D) + resfcn256_resBlock_1_Conv_1_Relu = self.relu5(resfcn256_resBlock_1_Conv_1_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_1_Conv_2_Conv2D = self.conv7(resfcn256_resBlock_1_Conv_1_Relu) + resfcn256_resBlock_1_add = paddle.add(x=resfcn256_resBlock_1_Conv_2_Conv2D, y=resfcn256_resBlock_Relu) + resfcn256_resBlock_1_BatchNorm_FusedBatchNorm = self.bn6(resfcn256_resBlock_1_add) + resfcn256_resBlock_1_Relu = self.relu6(resfcn256_resBlock_1_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_2_shortcut_Conv2D = self.conv8(resfcn256_resBlock_1_Relu) + resfcn256_resBlock_2_Conv_Conv2D = self.conv9(resfcn256_resBlock_1_Relu) + resfcn256_resBlock_2_Conv_BatchNorm_FusedBatchNorm = self.bn7(resfcn256_resBlock_2_Conv_Conv2D) + resfcn256_resBlock_2_Conv_Relu = self.relu7(resfcn256_resBlock_2_Conv_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_2_Conv_1_Conv2D = self.conv10(resfcn256_resBlock_2_Conv_Relu) + resfcn256_resBlock_2_Conv_1_BatchNorm_FusedBatchNorm = self.bn8(resfcn256_resBlock_2_Conv_1_Conv2D) + resfcn256_resBlock_2_Conv_1_Relu = self.relu8(resfcn256_resBlock_2_Conv_1_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_2_Conv_2_Conv2D = self.conv11(resfcn256_resBlock_2_Conv_1_Relu) + resfcn256_resBlock_2_add = paddle.add( + x=resfcn256_resBlock_2_Conv_2_Conv2D, y=resfcn256_resBlock_2_shortcut_Conv2D) + resfcn256_resBlock_2_BatchNorm_FusedBatchNorm = self.bn9(resfcn256_resBlock_2_add) + resfcn256_resBlock_2_Relu = self.relu9(resfcn256_resBlock_2_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_3_Conv_Conv2D = self.conv12(resfcn256_resBlock_2_Relu) + resfcn256_resBlock_3_Conv_BatchNorm_FusedBatchNorm = self.bn10(resfcn256_resBlock_3_Conv_Conv2D) + resfcn256_resBlock_3_Conv_Relu = self.relu10(resfcn256_resBlock_3_Conv_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_3_Conv_1_Conv2D = self.conv13(resfcn256_resBlock_3_Conv_Relu) + resfcn256_resBlock_3_Conv_1_BatchNorm_FusedBatchNorm = self.bn11(resfcn256_resBlock_3_Conv_1_Conv2D) + resfcn256_resBlock_3_Conv_1_Relu = self.relu11(resfcn256_resBlock_3_Conv_1_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_3_Conv_2_Conv2D = self.conv14(resfcn256_resBlock_3_Conv_1_Relu) + resfcn256_resBlock_3_add = paddle.add(x=resfcn256_resBlock_3_Conv_2_Conv2D, y=resfcn256_resBlock_2_Relu) + resfcn256_resBlock_3_BatchNorm_FusedBatchNorm = self.bn12(resfcn256_resBlock_3_add) + resfcn256_resBlock_3_Relu = self.relu12(resfcn256_resBlock_3_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_4_shortcut_Conv2D = self.conv15(resfcn256_resBlock_3_Relu) + resfcn256_resBlock_4_Conv_Conv2D = self.conv16(resfcn256_resBlock_3_Relu) + resfcn256_resBlock_4_Conv_BatchNorm_FusedBatchNorm = self.bn13(resfcn256_resBlock_4_Conv_Conv2D) + resfcn256_resBlock_4_Conv_Relu = self.relu13(resfcn256_resBlock_4_Conv_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_4_Conv_1_Conv2D = self.conv17(resfcn256_resBlock_4_Conv_Relu) + resfcn256_resBlock_4_Conv_1_BatchNorm_FusedBatchNorm = self.bn14(resfcn256_resBlock_4_Conv_1_Conv2D) + resfcn256_resBlock_4_Conv_1_Relu = self.relu14(resfcn256_resBlock_4_Conv_1_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_4_Conv_2_Conv2D = self.conv18(resfcn256_resBlock_4_Conv_1_Relu) + resfcn256_resBlock_4_add = paddle.add( + x=resfcn256_resBlock_4_Conv_2_Conv2D, y=resfcn256_resBlock_4_shortcut_Conv2D) + resfcn256_resBlock_4_BatchNorm_FusedBatchNorm = self.bn15(resfcn256_resBlock_4_add) + resfcn256_resBlock_4_Relu = self.relu15(resfcn256_resBlock_4_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_5_Conv_Conv2D = self.conv19(resfcn256_resBlock_4_Relu) + resfcn256_resBlock_5_Conv_BatchNorm_FusedBatchNorm = self.bn16(resfcn256_resBlock_5_Conv_Conv2D) + resfcn256_resBlock_5_Conv_Relu = self.relu16(resfcn256_resBlock_5_Conv_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_5_Conv_1_Conv2D = self.conv20(resfcn256_resBlock_5_Conv_Relu) + resfcn256_resBlock_5_Conv_1_BatchNorm_FusedBatchNorm = self.bn17(resfcn256_resBlock_5_Conv_1_Conv2D) + resfcn256_resBlock_5_Conv_1_Relu = self.relu17(resfcn256_resBlock_5_Conv_1_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_5_Conv_2_Conv2D = self.conv21(resfcn256_resBlock_5_Conv_1_Relu) + resfcn256_resBlock_5_add = paddle.add(x=resfcn256_resBlock_5_Conv_2_Conv2D, y=resfcn256_resBlock_4_Relu) + resfcn256_resBlock_5_BatchNorm_FusedBatchNorm = self.bn18(resfcn256_resBlock_5_add) + resfcn256_resBlock_5_Relu = self.relu18(resfcn256_resBlock_5_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_6_shortcut_Conv2D = self.conv22(resfcn256_resBlock_5_Relu) + resfcn256_resBlock_6_Conv_Conv2D = self.conv23(resfcn256_resBlock_5_Relu) + resfcn256_resBlock_6_Conv_BatchNorm_FusedBatchNorm = self.bn19(resfcn256_resBlock_6_Conv_Conv2D) + resfcn256_resBlock_6_Conv_Relu = self.relu19(resfcn256_resBlock_6_Conv_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_6_Conv_1_Conv2D = self.conv24(resfcn256_resBlock_6_Conv_Relu) + resfcn256_resBlock_6_Conv_1_BatchNorm_FusedBatchNorm = self.bn20(resfcn256_resBlock_6_Conv_1_Conv2D) + resfcn256_resBlock_6_Conv_1_Relu = self.relu20(resfcn256_resBlock_6_Conv_1_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_6_Conv_2_Conv2D = self.conv25(resfcn256_resBlock_6_Conv_1_Relu) + resfcn256_resBlock_6_add = paddle.add( + x=resfcn256_resBlock_6_Conv_2_Conv2D, y=resfcn256_resBlock_6_shortcut_Conv2D) + resfcn256_resBlock_6_BatchNorm_FusedBatchNorm = self.bn21(resfcn256_resBlock_6_add) + resfcn256_resBlock_6_Relu = self.relu21(resfcn256_resBlock_6_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_7_Conv_Conv2D = self.conv26(resfcn256_resBlock_6_Relu) + resfcn256_resBlock_7_Conv_BatchNorm_FusedBatchNorm = self.bn22(resfcn256_resBlock_7_Conv_Conv2D) + resfcn256_resBlock_7_Conv_Relu = self.relu22(resfcn256_resBlock_7_Conv_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_7_Conv_1_Conv2D = self.conv27(resfcn256_resBlock_7_Conv_Relu) + resfcn256_resBlock_7_Conv_1_BatchNorm_FusedBatchNorm = self.bn23(resfcn256_resBlock_7_Conv_1_Conv2D) + resfcn256_resBlock_7_Conv_1_Relu = self.relu23(resfcn256_resBlock_7_Conv_1_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_7_Conv_2_Conv2D = self.conv28(resfcn256_resBlock_7_Conv_1_Relu) + resfcn256_resBlock_7_add = paddle.add(x=resfcn256_resBlock_7_Conv_2_Conv2D, y=resfcn256_resBlock_6_Relu) + resfcn256_resBlock_7_BatchNorm_FusedBatchNorm = self.bn24(resfcn256_resBlock_7_add) + resfcn256_resBlock_7_Relu = self.relu24(resfcn256_resBlock_7_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_8_shortcut_Conv2D = self.conv29(resfcn256_resBlock_7_Relu) + resfcn256_resBlock_8_Conv_Conv2D = self.conv30(resfcn256_resBlock_7_Relu) + resfcn256_resBlock_8_Conv_BatchNorm_FusedBatchNorm = self.bn25(resfcn256_resBlock_8_Conv_Conv2D) + resfcn256_resBlock_8_Conv_Relu = self.relu25(resfcn256_resBlock_8_Conv_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_8_Conv_1_Conv2D = self.conv31(resfcn256_resBlock_8_Conv_Relu) + resfcn256_resBlock_8_Conv_1_BatchNorm_FusedBatchNorm = self.bn26(resfcn256_resBlock_8_Conv_1_Conv2D) + resfcn256_resBlock_8_Conv_1_Relu = self.relu26(resfcn256_resBlock_8_Conv_1_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_8_Conv_2_Conv2D = self.conv32(resfcn256_resBlock_8_Conv_1_Relu) + resfcn256_resBlock_8_add = paddle.add( + x=resfcn256_resBlock_8_Conv_2_Conv2D, y=resfcn256_resBlock_8_shortcut_Conv2D) + resfcn256_resBlock_8_BatchNorm_FusedBatchNorm = self.bn27(resfcn256_resBlock_8_add) + resfcn256_resBlock_8_Relu = self.relu27(resfcn256_resBlock_8_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_9_Conv_Conv2D = self.conv33(resfcn256_resBlock_8_Relu) + resfcn256_resBlock_9_Conv_BatchNorm_FusedBatchNorm = self.bn28(resfcn256_resBlock_9_Conv_Conv2D) + resfcn256_resBlock_9_Conv_Relu = self.relu28(resfcn256_resBlock_9_Conv_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_9_Conv_1_Conv2D = self.conv34(resfcn256_resBlock_9_Conv_Relu) + resfcn256_resBlock_9_Conv_1_BatchNorm_FusedBatchNorm = self.bn29(resfcn256_resBlock_9_Conv_1_Conv2D) + resfcn256_resBlock_9_Conv_1_Relu = self.relu29(resfcn256_resBlock_9_Conv_1_BatchNorm_FusedBatchNorm) + resfcn256_resBlock_9_Conv_2_Conv2D = self.conv35(resfcn256_resBlock_9_Conv_1_Relu) + resfcn256_resBlock_9_add = paddle.add(x=resfcn256_resBlock_9_Conv_2_Conv2D, y=resfcn256_resBlock_8_Relu) + resfcn256_resBlock_9_BatchNorm_FusedBatchNorm = self.bn30(resfcn256_resBlock_9_add) + resfcn256_resBlock_9_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_resBlock_9_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_resBlock_9_Relu = self.relu30(resfcn256_resBlock_9_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_Shape = paddle.shape(input=resfcn256_resBlock_9_Relu) + resfcn256_Conv2d_transpose_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_strided_slice_1, y=resfcn256_Conv2d_transpose_mul_y) + resfcn256_Conv2d_transpose_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_strided_slice_2, y=resfcn256_Conv2d_transpose_mul_1_y) + resfcn256_Conv2d_transpose_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_strided_slice, resfcn256_Conv2d_transpose_mul, resfcn256_Conv2d_transpose_mul_1, + resfcn256_Conv2d_transpose_stack_3 + ]) + resfcn256_Conv2d_transpose_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_stack, shape=[-1]) + conv2dbackpropinput_transpose_0 = paddle.transpose(x=resfcn256_resBlock_9_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_conv2d_transpose_conv36_weight = self.resfcn256_Conv2d_transpose_conv2d_transpose_conv36_weight + resfcn256_Conv2d_transpose_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_0, + weight=resfcn256_Conv2d_transpose_conv2d_transpose_conv36_weight, + stride=[1, 1], + dilation=[1, 1], + padding='SAME', + output_size=[8, 8]) + resfcn256_Conv2d_transpose_BatchNorm_FusedBatchNorm = self.bn31(resfcn256_Conv2d_transpose_conv2d_transpose) + resfcn256_Conv2d_transpose_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_Relu = self.relu31(resfcn256_Conv2d_transpose_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_1_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_Relu) + resfcn256_Conv2d_transpose_1_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_1_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_1_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_1_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_1_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_1_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_1_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_1_strided_slice_1, y=resfcn256_Conv2d_transpose_1_mul_y) + resfcn256_Conv2d_transpose_1_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_1_strided_slice_2, y=resfcn256_Conv2d_transpose_1_mul_1_y) + resfcn256_Conv2d_transpose_1_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_1_strided_slice, resfcn256_Conv2d_transpose_1_mul, + resfcn256_Conv2d_transpose_1_mul_1, resfcn256_Conv2d_transpose_1_stack_3 + ]) + resfcn256_Conv2d_transpose_1_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_1_stack, shape=[-1]) + conv2dbackpropinput_transpose_1 = paddle.transpose(x=resfcn256_Conv2d_transpose_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_1_conv2d_transpose_conv37_weight = self.resfcn256_Conv2d_transpose_1_conv2d_transpose_conv37_weight + resfcn256_Conv2d_transpose_1_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_1, + weight=resfcn256_Conv2d_transpose_1_conv2d_transpose_conv37_weight, + stride=[2, 2], + dilation=[1, 1], + padding='SAME', + output_size=[16, 16]) + resfcn256_Conv2d_transpose_1_BatchNorm_FusedBatchNorm = self.bn32(resfcn256_Conv2d_transpose_1_conv2d_transpose) + resfcn256_Conv2d_transpose_1_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_1_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_1_Relu = self.relu32(resfcn256_Conv2d_transpose_1_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_2_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_1_Relu) + resfcn256_Conv2d_transpose_2_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_2_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_2_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_2_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_2_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_2_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_2_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_2_strided_slice_1, y=resfcn256_Conv2d_transpose_2_mul_y) + resfcn256_Conv2d_transpose_2_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_2_strided_slice_2, y=resfcn256_Conv2d_transpose_2_mul_1_y) + resfcn256_Conv2d_transpose_2_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_2_strided_slice, resfcn256_Conv2d_transpose_2_mul, + resfcn256_Conv2d_transpose_2_mul_1, resfcn256_Conv2d_transpose_2_stack_3 + ]) + resfcn256_Conv2d_transpose_2_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_2_stack, shape=[-1]) + conv2dbackpropinput_transpose_2 = paddle.transpose(x=resfcn256_Conv2d_transpose_1_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_2_conv2d_transpose_conv38_weight = self.resfcn256_Conv2d_transpose_2_conv2d_transpose_conv38_weight + resfcn256_Conv2d_transpose_2_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_2, + weight=resfcn256_Conv2d_transpose_2_conv2d_transpose_conv38_weight, + stride=[1, 1], + dilation=[1, 1], + padding='SAME', + output_size=[16, 16]) + resfcn256_Conv2d_transpose_2_BatchNorm_FusedBatchNorm = self.bn33(resfcn256_Conv2d_transpose_2_conv2d_transpose) + resfcn256_Conv2d_transpose_2_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_2_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_2_Relu = self.relu33(resfcn256_Conv2d_transpose_2_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_3_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_2_Relu) + resfcn256_Conv2d_transpose_3_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_3_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_3_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_3_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_3_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_3_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_3_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_3_strided_slice_1, y=resfcn256_Conv2d_transpose_3_mul_y) + resfcn256_Conv2d_transpose_3_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_3_strided_slice_2, y=resfcn256_Conv2d_transpose_3_mul_1_y) + resfcn256_Conv2d_transpose_3_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_3_strided_slice, resfcn256_Conv2d_transpose_3_mul, + resfcn256_Conv2d_transpose_3_mul_1, resfcn256_Conv2d_transpose_3_stack_3 + ]) + resfcn256_Conv2d_transpose_3_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_3_stack, shape=[-1]) + conv2dbackpropinput_transpose_3 = paddle.transpose(x=resfcn256_Conv2d_transpose_2_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_3_conv2d_transpose_conv39_weight = self.resfcn256_Conv2d_transpose_3_conv2d_transpose_conv39_weight + resfcn256_Conv2d_transpose_3_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_3, + weight=resfcn256_Conv2d_transpose_3_conv2d_transpose_conv39_weight, + stride=[1, 1], + dilation=[1, 1], + padding='SAME', + output_size=[16, 16]) + resfcn256_Conv2d_transpose_3_BatchNorm_FusedBatchNorm = self.bn34(resfcn256_Conv2d_transpose_3_conv2d_transpose) + resfcn256_Conv2d_transpose_3_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_3_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_3_Relu = self.relu34(resfcn256_Conv2d_transpose_3_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_4_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_3_Relu) + resfcn256_Conv2d_transpose_4_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_4_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_4_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_4_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_4_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_4_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_4_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_4_strided_slice_1, y=resfcn256_Conv2d_transpose_4_mul_y) + resfcn256_Conv2d_transpose_4_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_4_strided_slice_2, y=resfcn256_Conv2d_transpose_4_mul_1_y) + resfcn256_Conv2d_transpose_4_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_4_strided_slice, resfcn256_Conv2d_transpose_4_mul, + resfcn256_Conv2d_transpose_4_mul_1, resfcn256_Conv2d_transpose_4_stack_3 + ]) + resfcn256_Conv2d_transpose_4_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_4_stack, shape=[-1]) + conv2dbackpropinput_transpose_4 = paddle.transpose(x=resfcn256_Conv2d_transpose_3_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_4_conv2d_transpose_conv40_weight = self.resfcn256_Conv2d_transpose_4_conv2d_transpose_conv40_weight + resfcn256_Conv2d_transpose_4_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_4, + weight=resfcn256_Conv2d_transpose_4_conv2d_transpose_conv40_weight, + stride=[2, 2], + dilation=[1, 1], + padding='SAME', + output_size=[32, 32]) + resfcn256_Conv2d_transpose_4_BatchNorm_FusedBatchNorm = self.bn35(resfcn256_Conv2d_transpose_4_conv2d_transpose) + resfcn256_Conv2d_transpose_4_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_4_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_4_Relu = self.relu35(resfcn256_Conv2d_transpose_4_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_5_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_4_Relu) + resfcn256_Conv2d_transpose_5_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_5_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_5_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_5_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_5_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_5_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_5_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_5_strided_slice_1, y=resfcn256_Conv2d_transpose_5_mul_y) + resfcn256_Conv2d_transpose_5_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_5_strided_slice_2, y=resfcn256_Conv2d_transpose_5_mul_1_y) + resfcn256_Conv2d_transpose_5_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_5_strided_slice, resfcn256_Conv2d_transpose_5_mul, + resfcn256_Conv2d_transpose_5_mul_1, resfcn256_Conv2d_transpose_5_stack_3 + ]) + resfcn256_Conv2d_transpose_5_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_5_stack, shape=[-1]) + conv2dbackpropinput_transpose_5 = paddle.transpose(x=resfcn256_Conv2d_transpose_4_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_5_conv2d_transpose_conv41_weight = self.resfcn256_Conv2d_transpose_5_conv2d_transpose_conv41_weight + resfcn256_Conv2d_transpose_5_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_5, + weight=resfcn256_Conv2d_transpose_5_conv2d_transpose_conv41_weight, + stride=[1, 1], + dilation=[1, 1], + padding='SAME', + output_size=[32, 32]) + resfcn256_Conv2d_transpose_5_BatchNorm_FusedBatchNorm = self.bn36(resfcn256_Conv2d_transpose_5_conv2d_transpose) + resfcn256_Conv2d_transpose_5_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_5_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_5_Relu = self.relu36(resfcn256_Conv2d_transpose_5_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_6_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_5_Relu) + resfcn256_Conv2d_transpose_6_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_6_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_6_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_6_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_6_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_6_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_6_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_6_strided_slice_1, y=resfcn256_Conv2d_transpose_6_mul_y) + resfcn256_Conv2d_transpose_6_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_6_strided_slice_2, y=resfcn256_Conv2d_transpose_6_mul_1_y) + resfcn256_Conv2d_transpose_6_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_6_strided_slice, resfcn256_Conv2d_transpose_6_mul, + resfcn256_Conv2d_transpose_6_mul_1, resfcn256_Conv2d_transpose_6_stack_3 + ]) + resfcn256_Conv2d_transpose_6_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_6_stack, shape=[-1]) + conv2dbackpropinput_transpose_6 = paddle.transpose(x=resfcn256_Conv2d_transpose_5_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_6_conv2d_transpose_conv42_weight = self.resfcn256_Conv2d_transpose_6_conv2d_transpose_conv42_weight + resfcn256_Conv2d_transpose_6_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_6, + weight=resfcn256_Conv2d_transpose_6_conv2d_transpose_conv42_weight, + stride=[1, 1], + dilation=[1, 1], + padding='SAME', + output_size=[32, 32]) + resfcn256_Conv2d_transpose_6_BatchNorm_FusedBatchNorm = self.bn37(resfcn256_Conv2d_transpose_6_conv2d_transpose) + resfcn256_Conv2d_transpose_6_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_6_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_6_Relu = self.relu37(resfcn256_Conv2d_transpose_6_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_7_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_6_Relu) + resfcn256_Conv2d_transpose_7_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_7_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_7_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_7_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_7_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_7_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_7_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_7_strided_slice_1, y=resfcn256_Conv2d_transpose_7_mul_y) + resfcn256_Conv2d_transpose_7_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_7_strided_slice_2, y=resfcn256_Conv2d_transpose_7_mul_1_y) + resfcn256_Conv2d_transpose_7_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_7_strided_slice, resfcn256_Conv2d_transpose_7_mul, + resfcn256_Conv2d_transpose_7_mul_1, resfcn256_Conv2d_transpose_7_stack_3 + ]) + resfcn256_Conv2d_transpose_7_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_7_stack, shape=[-1]) + conv2dbackpropinput_transpose_7 = paddle.transpose(x=resfcn256_Conv2d_transpose_6_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_7_conv2d_transpose_conv43_weight = self.resfcn256_Conv2d_transpose_7_conv2d_transpose_conv43_weight + resfcn256_Conv2d_transpose_7_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_7, + weight=resfcn256_Conv2d_transpose_7_conv2d_transpose_conv43_weight, + stride=[2, 2], + dilation=[1, 1], + padding='SAME', + output_size=[64, 64]) + resfcn256_Conv2d_transpose_7_BatchNorm_FusedBatchNorm = self.bn38(resfcn256_Conv2d_transpose_7_conv2d_transpose) + resfcn256_Conv2d_transpose_7_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_7_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_7_Relu = self.relu38(resfcn256_Conv2d_transpose_7_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_8_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_7_Relu) + resfcn256_Conv2d_transpose_8_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_8_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_8_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_8_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_8_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_8_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_8_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_8_strided_slice_1, y=resfcn256_Conv2d_transpose_8_mul_y) + resfcn256_Conv2d_transpose_8_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_8_strided_slice_2, y=resfcn256_Conv2d_transpose_8_mul_1_y) + resfcn256_Conv2d_transpose_8_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_8_strided_slice, resfcn256_Conv2d_transpose_8_mul, + resfcn256_Conv2d_transpose_8_mul_1, resfcn256_Conv2d_transpose_8_stack_3 + ]) + resfcn256_Conv2d_transpose_8_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_8_stack, shape=[-1]) + conv2dbackpropinput_transpose_8 = paddle.transpose(x=resfcn256_Conv2d_transpose_7_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_8_conv2d_transpose_conv44_weight = self.resfcn256_Conv2d_transpose_8_conv2d_transpose_conv44_weight + resfcn256_Conv2d_transpose_8_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_8, + weight=resfcn256_Conv2d_transpose_8_conv2d_transpose_conv44_weight, + stride=[1, 1], + dilation=[1, 1], + padding='SAME', + output_size=[64, 64]) + resfcn256_Conv2d_transpose_8_BatchNorm_FusedBatchNorm = self.bn39(resfcn256_Conv2d_transpose_8_conv2d_transpose) + resfcn256_Conv2d_transpose_8_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_8_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_8_Relu = self.relu39(resfcn256_Conv2d_transpose_8_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_9_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_8_Relu) + resfcn256_Conv2d_transpose_9_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_9_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_9_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_9_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_9_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_9_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_9_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_9_strided_slice_1, y=resfcn256_Conv2d_transpose_9_mul_y) + resfcn256_Conv2d_transpose_9_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_9_strided_slice_2, y=resfcn256_Conv2d_transpose_9_mul_1_y) + resfcn256_Conv2d_transpose_9_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_9_strided_slice, resfcn256_Conv2d_transpose_9_mul, + resfcn256_Conv2d_transpose_9_mul_1, resfcn256_Conv2d_transpose_9_stack_3 + ]) + resfcn256_Conv2d_transpose_9_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_9_stack, shape=[-1]) + conv2dbackpropinput_transpose_9 = paddle.transpose(x=resfcn256_Conv2d_transpose_8_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_9_conv2d_transpose_conv45_weight = self.resfcn256_Conv2d_transpose_9_conv2d_transpose_conv45_weight + resfcn256_Conv2d_transpose_9_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_9, + weight=resfcn256_Conv2d_transpose_9_conv2d_transpose_conv45_weight, + stride=[1, 1], + dilation=[1, 1], + padding='SAME', + output_size=[64, 64]) + resfcn256_Conv2d_transpose_9_BatchNorm_FusedBatchNorm = self.bn40(resfcn256_Conv2d_transpose_9_conv2d_transpose) + resfcn256_Conv2d_transpose_9_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_9_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_9_Relu = self.relu40(resfcn256_Conv2d_transpose_9_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_10_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_9_Relu) + resfcn256_Conv2d_transpose_10_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_10_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_10_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_10_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_10_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_10_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_10_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_10_strided_slice_1, y=resfcn256_Conv2d_transpose_10_mul_y) + resfcn256_Conv2d_transpose_10_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_10_strided_slice_2, y=resfcn256_Conv2d_transpose_10_mul_1_y) + resfcn256_Conv2d_transpose_10_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_10_strided_slice, resfcn256_Conv2d_transpose_10_mul, + resfcn256_Conv2d_transpose_10_mul_1, resfcn256_Conv2d_transpose_10_stack_3 + ]) + resfcn256_Conv2d_transpose_10_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_10_stack, shape=[-1]) + conv2dbackpropinput_transpose_10 = paddle.transpose(x=resfcn256_Conv2d_transpose_9_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_10_conv2d_transpose_conv46_weight = self.resfcn256_Conv2d_transpose_10_conv2d_transpose_conv46_weight + resfcn256_Conv2d_transpose_10_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_10, + weight=resfcn256_Conv2d_transpose_10_conv2d_transpose_conv46_weight, + stride=[2, 2], + dilation=[1, 1], + padding='SAME', + output_size=[128, 128]) + resfcn256_Conv2d_transpose_10_BatchNorm_FusedBatchNorm = self.bn41( + resfcn256_Conv2d_transpose_10_conv2d_transpose) + resfcn256_Conv2d_transpose_10_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_10_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_10_Relu = self.relu41(resfcn256_Conv2d_transpose_10_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_11_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_10_Relu) + resfcn256_Conv2d_transpose_11_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_11_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_11_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_11_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_11_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_11_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_11_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_11_strided_slice_1, y=resfcn256_Conv2d_transpose_11_mul_y) + resfcn256_Conv2d_transpose_11_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_11_strided_slice_2, y=resfcn256_Conv2d_transpose_11_mul_1_y) + resfcn256_Conv2d_transpose_11_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_11_strided_slice, resfcn256_Conv2d_transpose_11_mul, + resfcn256_Conv2d_transpose_11_mul_1, resfcn256_Conv2d_transpose_11_stack_3 + ]) + resfcn256_Conv2d_transpose_11_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_11_stack, shape=[-1]) + conv2dbackpropinput_transpose_11 = paddle.transpose(x=resfcn256_Conv2d_transpose_10_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_11_conv2d_transpose_conv47_weight = self.resfcn256_Conv2d_transpose_11_conv2d_transpose_conv47_weight + resfcn256_Conv2d_transpose_11_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_11, + weight=resfcn256_Conv2d_transpose_11_conv2d_transpose_conv47_weight, + stride=[1, 1], + dilation=[1, 1], + padding='SAME', + output_size=[128, 128]) + resfcn256_Conv2d_transpose_11_BatchNorm_FusedBatchNorm = self.bn42( + resfcn256_Conv2d_transpose_11_conv2d_transpose) + resfcn256_Conv2d_transpose_11_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_11_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_11_Relu = self.relu42(resfcn256_Conv2d_transpose_11_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_12_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_11_Relu) + resfcn256_Conv2d_transpose_12_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_12_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_12_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_12_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_12_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_12_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_12_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_12_strided_slice_1, y=resfcn256_Conv2d_transpose_12_mul_y) + resfcn256_Conv2d_transpose_12_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_12_strided_slice_2, y=resfcn256_Conv2d_transpose_12_mul_1_y) + resfcn256_Conv2d_transpose_12_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_12_strided_slice, resfcn256_Conv2d_transpose_12_mul, + resfcn256_Conv2d_transpose_12_mul_1, resfcn256_Conv2d_transpose_12_stack_3 + ]) + resfcn256_Conv2d_transpose_12_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_12_stack, shape=[-1]) + conv2dbackpropinput_transpose_12 = paddle.transpose(x=resfcn256_Conv2d_transpose_11_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_12_conv2d_transpose_conv48_weight = self.resfcn256_Conv2d_transpose_12_conv2d_transpose_conv48_weight + resfcn256_Conv2d_transpose_12_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_12, + weight=resfcn256_Conv2d_transpose_12_conv2d_transpose_conv48_weight, + stride=[2, 2], + dilation=[1, 1], + padding='SAME', + output_size=[256, 256]) + resfcn256_Conv2d_transpose_12_BatchNorm_FusedBatchNorm = self.bn43( + resfcn256_Conv2d_transpose_12_conv2d_transpose) + resfcn256_Conv2d_transpose_12_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_12_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_12_Relu = self.relu43(resfcn256_Conv2d_transpose_12_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_13_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_12_Relu) + resfcn256_Conv2d_transpose_13_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_13_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_13_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_13_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_13_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_13_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_13_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_13_strided_slice_1, y=resfcn256_Conv2d_transpose_13_mul_y) + resfcn256_Conv2d_transpose_13_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_13_strided_slice_2, y=resfcn256_Conv2d_transpose_13_mul_1_y) + resfcn256_Conv2d_transpose_13_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_13_strided_slice, resfcn256_Conv2d_transpose_13_mul, + resfcn256_Conv2d_transpose_13_mul_1, resfcn256_Conv2d_transpose_13_stack_3 + ]) + resfcn256_Conv2d_transpose_13_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_13_stack, shape=[-1]) + conv2dbackpropinput_transpose_13 = paddle.transpose(x=resfcn256_Conv2d_transpose_12_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_13_conv2d_transpose_conv49_weight = self.resfcn256_Conv2d_transpose_13_conv2d_transpose_conv49_weight + resfcn256_Conv2d_transpose_13_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_13, + weight=resfcn256_Conv2d_transpose_13_conv2d_transpose_conv49_weight, + stride=[1, 1], + dilation=[1, 1], + padding='SAME', + output_size=[256, 256]) + resfcn256_Conv2d_transpose_13_BatchNorm_FusedBatchNorm = self.bn44( + resfcn256_Conv2d_transpose_13_conv2d_transpose) + resfcn256_Conv2d_transpose_13_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_13_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_13_Relu = self.relu44(resfcn256_Conv2d_transpose_13_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_14_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_13_Relu) + resfcn256_Conv2d_transpose_14_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_14_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_14_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_14_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_14_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_14_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_14_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_14_strided_slice_1, y=resfcn256_Conv2d_transpose_14_mul_y) + resfcn256_Conv2d_transpose_14_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_14_strided_slice_2, y=resfcn256_Conv2d_transpose_14_mul_1_y) + resfcn256_Conv2d_transpose_14_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_14_strided_slice, resfcn256_Conv2d_transpose_14_mul, + resfcn256_Conv2d_transpose_14_mul_1, resfcn256_Conv2d_transpose_14_stack_3 + ]) + resfcn256_Conv2d_transpose_14_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_14_stack, shape=[-1]) + conv2dbackpropinput_transpose_14 = paddle.transpose(x=resfcn256_Conv2d_transpose_13_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_14_conv2d_transpose_conv50_weight = self.resfcn256_Conv2d_transpose_14_conv2d_transpose_conv50_weight + resfcn256_Conv2d_transpose_14_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_14, + weight=resfcn256_Conv2d_transpose_14_conv2d_transpose_conv50_weight, + stride=[1, 1], + dilation=[1, 1], + padding='SAME', + output_size=[256, 256]) + resfcn256_Conv2d_transpose_14_BatchNorm_FusedBatchNorm = self.bn45( + resfcn256_Conv2d_transpose_14_conv2d_transpose) + resfcn256_Conv2d_transpose_14_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_14_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_14_Relu = self.relu45(resfcn256_Conv2d_transpose_14_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_15_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_14_Relu) + resfcn256_Conv2d_transpose_15_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_15_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_15_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_15_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_15_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_15_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_15_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_15_strided_slice_1, y=resfcn256_Conv2d_transpose_15_mul_y) + resfcn256_Conv2d_transpose_15_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_15_strided_slice_2, y=resfcn256_Conv2d_transpose_15_mul_1_y) + resfcn256_Conv2d_transpose_15_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_15_strided_slice, resfcn256_Conv2d_transpose_15_mul, + resfcn256_Conv2d_transpose_15_mul_1, resfcn256_Conv2d_transpose_15_stack_3 + ]) + resfcn256_Conv2d_transpose_15_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_15_stack, shape=[-1]) + conv2dbackpropinput_transpose_15 = paddle.transpose(x=resfcn256_Conv2d_transpose_14_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_15_conv2d_transpose_conv51_weight = self.resfcn256_Conv2d_transpose_15_conv2d_transpose_conv51_weight + resfcn256_Conv2d_transpose_15_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_15, + weight=resfcn256_Conv2d_transpose_15_conv2d_transpose_conv51_weight, + stride=[1, 1], + dilation=[1, 1], + padding='SAME', + output_size=[256, 256]) + resfcn256_Conv2d_transpose_15_BatchNorm_FusedBatchNorm = self.bn46( + resfcn256_Conv2d_transpose_15_conv2d_transpose) + resfcn256_Conv2d_transpose_15_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_15_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_15_Relu = self.relu46(resfcn256_Conv2d_transpose_15_BatchNorm_FusedBatchNorm) + resfcn256_Conv2d_transpose_16_Shape = paddle.shape(input=resfcn256_Conv2d_transpose_15_Relu) + resfcn256_Conv2d_transpose_16_strided_slice = paddle.slice( + input=resfcn256_Conv2d_transpose_16_Shape, axes=[0], starts=[0], ends=[1]) + resfcn256_Conv2d_transpose_16_strided_slice_1 = paddle.slice( + input=resfcn256_Conv2d_transpose_16_Shape, axes=[0], starts=[1], ends=[2]) + resfcn256_Conv2d_transpose_16_strided_slice_2 = paddle.slice( + input=resfcn256_Conv2d_transpose_16_Shape, axes=[0], starts=[2], ends=[3]) + resfcn256_Conv2d_transpose_16_mul = paddle.multiply( + x=resfcn256_Conv2d_transpose_16_strided_slice_1, y=resfcn256_Conv2d_transpose_16_mul_y) + resfcn256_Conv2d_transpose_16_mul_1 = paddle.multiply( + x=resfcn256_Conv2d_transpose_16_strided_slice_2, y=resfcn256_Conv2d_transpose_16_mul_1_y) + resfcn256_Conv2d_transpose_16_stack = paddle.stack(x=[ + resfcn256_Conv2d_transpose_16_strided_slice, resfcn256_Conv2d_transpose_16_mul, + resfcn256_Conv2d_transpose_16_mul_1, resfcn256_Conv2d_transpose_16_stack_3 + ]) + resfcn256_Conv2d_transpose_16_stack = paddle.reshape(x=resfcn256_Conv2d_transpose_16_stack, shape=[-1]) + conv2dbackpropinput_transpose_16 = paddle.transpose(x=resfcn256_Conv2d_transpose_15_Relu, perm=[0, 3, 1, 2]) + resfcn256_Conv2d_transpose_16_conv2d_transpose_conv52_weight = self.resfcn256_Conv2d_transpose_16_conv2d_transpose_conv52_weight + resfcn256_Conv2d_transpose_16_conv2d_transpose = paddle.nn.functional.conv2d_transpose( + x=conv2dbackpropinput_transpose_16, + weight=resfcn256_Conv2d_transpose_16_conv2d_transpose_conv52_weight, + stride=[1, 1], + dilation=[1, 1], + padding='SAME', + output_size=[256, 256]) + resfcn256_Conv2d_transpose_16_BatchNorm_FusedBatchNorm = self.bn47( + resfcn256_Conv2d_transpose_16_conv2d_transpose) + resfcn256_Conv2d_transpose_16_BatchNorm_FusedBatchNorm = paddle.transpose( + x=resfcn256_Conv2d_transpose_16_BatchNorm_FusedBatchNorm, perm=[0, 2, 3, 1]) + resfcn256_Conv2d_transpose_16_Sigmoid = self.sigmoid0(resfcn256_Conv2d_transpose_16_BatchNorm_FusedBatchNorm) + return resfcn256_Conv2d_transpose_16_Sigmoid + + +def main(Placeholder): + # There are 1 inputs. + # Placeholder: shape-[-1, 256, 256, 3], type-float32. + + paddle.disable_static() + params = paddle.load('/work/ToTransferInHub/PRNet-Paddle/pd_model/model.pdparams') + model = TFModel() + model.set_dict(params, use_structured_name=False) + model.eval() + out = model(Placeholder) + return out + + +if __name__ == '__main__': + tensor = paddle.randn([1, 256, 256, 3]) + print(main(tensor).shape) diff --git a/modules/image/image_processing/prnet/predictor.py b/modules/image/image_processing/prnet/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..f44479201810e45b2a33de960b1ab90c2674b6c2 --- /dev/null +++ b/modules/image/image_processing/prnet/predictor.py @@ -0,0 +1,42 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import paddle + +from .pd_model.x2paddle_code import TFModel + + +class PosPrediction(): + def __init__(self, params, resolution_inp=256, resolution_op=256): + # -- hyper settings + self.resolution_inp = resolution_inp + self.resolution_op = resolution_op + self.MaxPos = resolution_inp * 1.1 + + # network type + self.network = TFModel() + self.network.set_dict(params, use_structured_name=False) + self.network.eval() + + def predict(self, image): + paddle.disable_static() + image_tensor = paddle.to_tensor(image[np.newaxis, :, :, :], dtype='float32') + pos = self.network(image_tensor) + pos = pos.numpy() + pos = np.squeeze(pos) + return pos * self.MaxPos + + def predict_batch(self, images): + pos = self.sess.run(self.x_op, feed_dict={self.x: images}) + return pos * self.MaxPos diff --git a/modules/image/image_processing/prnet/requirements.txt b/modules/image/image_processing/prnet/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5bb7941037ccb6157ac0494fcecc8bb65725f91f --- /dev/null +++ b/modules/image/image_processing/prnet/requirements.txt @@ -0,0 +1,2 @@ +dlib +scikit-image diff --git a/modules/image/image_processing/prnet/util.py b/modules/image/image_processing/prnet/util.py new file mode 100644 index 0000000000000000000000000000000000000000..11b9ee3be3cb437a794cb357da38bb7bbb1a2d6d --- /dev/null +++ b/modules/image/image_processing/prnet/util.py @@ -0,0 +1,24 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_GRAYSCALE) + return data diff --git a/modules/image/image_processing/prnet/utils/__init__.py b/modules/image/image_processing/prnet/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/image/image_processing/prnet/utils/cv_plot.py b/modules/image/image_processing/prnet/utils/cv_plot.py new file mode 100644 index 0000000000000000000000000000000000000000..a40efaa50ca043b5c62e7e33bf6f48edf2a53d1e --- /dev/null +++ b/modules/image/image_processing/prnet/utils/cv_plot.py @@ -0,0 +1,86 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cv2 +import numpy as np + +end_list = np.array([17, 22, 27, 42, 48, 31, 36, 68], dtype=np.int32) - 1 + + +def plot_kpt(image, kpt): + ''' Draw 68 key points + Args: + image: the input image + kpt: (68, 3). + ''' + image = image.copy() + kpt = np.round(kpt).astype(np.int32) + for i in range(kpt.shape[0]): + st = kpt[i, :2] + image = cv2.circle(image, (st[0], st[1]), 1, (0, 0, 255), 2) + if i in end_list: + continue + ed = kpt[i + 1, :2] + image = cv2.line(image, (st[0], st[1]), (ed[0], ed[1]), (255, 255, 255), 1) + return image + + +def plot_vertices(image, vertices): + image = image.copy() + vertices = np.round(vertices).astype(np.int32) + for i in range(0, vertices.shape[0], 2): + st = vertices[i, :2] + image = cv2.circle(image, (st[0], st[1]), 1, (255, 0, 0), -1) + return image + + +def plot_pose_box(image, P, kpt, color=(0, 255, 0), line_width=2): + ''' Draw a 3D box as annotation of pose. Ref:https://github.com/yinguobing/head-pose-estimation/blob/master/pose_estimator.py + Args: + image: the input image + P: (3, 4). Affine Camera Matrix. + kpt: (68, 3). + ''' + image = image.copy() + + point_3d = [] + rear_size = 90 + rear_depth = 0 + point_3d.append((-rear_size, -rear_size, rear_depth)) + point_3d.append((-rear_size, rear_size, rear_depth)) + point_3d.append((rear_size, rear_size, rear_depth)) + point_3d.append((rear_size, -rear_size, rear_depth)) + point_3d.append((-rear_size, -rear_size, rear_depth)) + + front_size = 105 + front_depth = 110 + point_3d.append((-front_size, -front_size, front_depth)) + point_3d.append((-front_size, front_size, front_depth)) + point_3d.append((front_size, front_size, front_depth)) + point_3d.append((front_size, -front_size, front_depth)) + point_3d.append((-front_size, -front_size, front_depth)) + point_3d = np.array(point_3d, dtype=np.float).reshape(-1, 3) + + # Map to 2d image points + point_3d_homo = np.hstack((point_3d, np.ones([point_3d.shape[0], 1]))) #n x 4 + point_2d = point_3d_homo.dot(P.T)[:, :2] + point_2d[:, :2] = point_2d[:, :2] - np.mean(point_2d[:4, :2], 0) + np.mean(kpt[:27, :2], 0) + point_2d = np.int32(point_2d.reshape(-1, 2)) + + # Draw all the lines + cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA) + cv2.line(image, tuple(point_2d[1]), tuple(point_2d[6]), color, line_width, cv2.LINE_AA) + cv2.line(image, tuple(point_2d[2]), tuple(point_2d[7]), color, line_width, cv2.LINE_AA) + cv2.line(image, tuple(point_2d[3]), tuple(point_2d[8]), color, line_width, cv2.LINE_AA) + + return image diff --git a/modules/image/image_processing/prnet/utils/estimate_pose.py b/modules/image/image_processing/prnet/utils/estimate_pose.py new file mode 100644 index 0000000000000000000000000000000000000000..ec9986df03a63c1b90ac027d7d53abeae22aa74b --- /dev/null +++ b/modules/image/image_processing/prnet/utils/estimate_pose.py @@ -0,0 +1,113 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from math import asin +from math import atan2 +from math import cos +from math import sin + +import numpy as np + + +def isRotationMatrix(R): + ''' checks if a matrix is a valid rotation matrix(whether orthogonal or not) + ''' + Rt = np.transpose(R) + shouldBeIdentity = np.dot(Rt, R) + I = np.identity(3, dtype=R.dtype) + n = np.linalg.norm(I - shouldBeIdentity) + return n < 1e-6 + + +def matrix2angle(R): + ''' compute three Euler angles from a Rotation Matrix. Ref: http://www.gregslabaugh.net/publications/euler.pdf + Args: + R: (3,3). rotation matrix + Returns: + x: yaw + y: pitch + z: roll + ''' + # assert(isRotationMatrix(R)) + + if R[2, 0] != 1 or R[2, 0] != -1: + x = asin(R[2, 0]) + y = atan2(R[2, 1] / cos(x), R[2, 2] / cos(x)) + z = atan2(R[1, 0] / cos(x), R[0, 0] / cos(x)) + + else: # Gimbal lock + z = 0 #can be anything + if R[2, 0] == -1: + x = np.pi / 2 + y = z + atan2(R[0, 1], R[0, 2]) + else: + x = -np.pi / 2 + y = -z + atan2(-R[0, 1], -R[0, 2]) + + return x, y, z + + +def P2sRt(P): + ''' decompositing camera matrix P. + Args: + P: (3, 4). Affine Camera Matrix. + Returns: + s: scale factor. + R: (3, 3). rotation matrix. + t2d: (2,). 2d translation. + ''' + t2d = P[:2, 3] + R1 = P[0:1, :3] + R2 = P[1:2, :3] + s = (np.linalg.norm(R1) + np.linalg.norm(R2)) / 2.0 + r1 = R1 / np.linalg.norm(R1) + r2 = R2 / np.linalg.norm(R2) + r3 = np.cross(r1, r2) + + R = np.concatenate((r1, r2, r3), 0) + return s, R, t2d + + +def compute_similarity_transform(points_static, points_to_transform): + #http://nghiaho.com/?page_id=671 + p0 = np.copy(points_static).T + p1 = np.copy(points_to_transform).T + + t0 = -np.mean(p0, axis=1).reshape(3, 1) + t1 = -np.mean(p1, axis=1).reshape(3, 1) + t_final = t1 - t0 + + p0c = p0 + t0 + p1c = p1 + t1 + + covariance_matrix = p0c.dot(p1c.T) + U, S, V = np.linalg.svd(covariance_matrix) + R = U.dot(V) + if np.linalg.det(R) < 0: + R[:, 2] *= -1 + + rms_d0 = np.sqrt(np.mean(np.linalg.norm(p0c, axis=0)**2)) + rms_d1 = np.sqrt(np.mean(np.linalg.norm(p1c, axis=0)**2)) + + s = (rms_d0 / rms_d1) + P = np.c_[s * np.eye(3).dot(R), t_final] + return P + + +def estimate_pose(vertices): + canonical_vertices = np.load('Data/uv-data/canonical_vertices.npy') + P = compute_similarity_transform(vertices, canonical_vertices) + _, R, _ = P2sRt(P) # decompose affine matrix to s, R, t + pose = matrix2angle(R) + + return P, pose diff --git a/modules/image/image_processing/prnet/utils/render.py b/modules/image/image_processing/prnet/utils/render.py new file mode 100644 index 0000000000000000000000000000000000000000..ed7c11a8cdbec6b83f830cb1d08e3c7a23448dbb --- /dev/null +++ b/modules/image/image_processing/prnet/utils/render.py @@ -0,0 +1,355 @@ +''' +Author: YadiraF +Mail: fengyao@sjtu.edu.cn +''' +import numpy as np + + +def isPointInTri(point, tri_points): + ''' Judge whether the point is in the triangle + Method: + http://blackpawn.com/texts/pointinpoly/ + Args: + point: [u, v] or [x, y] + tri_points: three vertices(2d points) of a triangle. 2 coords x 3 vertices + Returns: + bool: true for in triangle + ''' + tp = tri_points + + # vectors + v0 = tp[:, 2] - tp[:, 0] + v1 = tp[:, 1] - tp[:, 0] + v2 = point - tp[:, 0] + + # dot products + dot00 = np.dot(v0.T, v0) + dot01 = np.dot(v0.T, v1) + dot02 = np.dot(v0.T, v2) + dot11 = np.dot(v1.T, v1) + dot12 = np.dot(v1.T, v2) + + # barycentric coordinates + if dot00 * dot11 - dot01 * dot01 == 0: + inverDeno = 0 + else: + inverDeno = 1 / (dot00 * dot11 - dot01 * dot01) + + u = (dot11 * dot02 - dot01 * dot12) * inverDeno + v = (dot00 * dot12 - dot01 * dot02) * inverDeno + + # check if point in triangle + return (u >= 0) & (v >= 0) & (u + v < 1) + + +def get_point_weight(point, tri_points): + ''' Get the weights of the position + Methods: https://gamedev.stackexchange.com/questions/23743/whats-the-most-efficient-way-to-find-barycentric-coordinates + -m1.compute the area of the triangles formed by embedding the point P inside the triangle + -m2.Christer Ericson's book "Real-Time Collision Detection". faster, so I used this. + Args: + point: [u, v] or [x, y] + tri_points: three vertices(2d points) of a triangle. 2 coords x 3 vertices + Returns: + w0: weight of v0 + w1: weight of v1 + w2: weight of v3 + ''' + tp = tri_points + # vectors + v0 = tp[:, 2] - tp[:, 0] + v1 = tp[:, 1] - tp[:, 0] + v2 = point - tp[:, 0] + + # dot products + dot00 = np.dot(v0.T, v0) + dot01 = np.dot(v0.T, v1) + dot02 = np.dot(v0.T, v2) + dot11 = np.dot(v1.T, v1) + dot12 = np.dot(v1.T, v2) + + # barycentric coordinates + if dot00 * dot11 - dot01 * dot01 == 0: + inverDeno = 0 + else: + inverDeno = 1 / (dot00 * dot11 - dot01 * dot01) + + u = (dot11 * dot02 - dot01 * dot12) * inverDeno + v = (dot00 * dot12 - dot01 * dot02) * inverDeno + + w0 = 1 - u - v + w1 = v + w2 = u + + return w0, w1, w2 + + +def render_texture(vertices, colors, triangles, h, w, c=3): + ''' render mesh by z buffer + Args: + vertices: 3 x nver + colors: 3 x nver + triangles: 3 x ntri + h: height + w: width + ''' + # initial + image = np.zeros((h, w, c)) + + depth_buffer = np.zeros([h, w]) - 999999. + # triangle depth: approximate the depth to the average value of z in each vertex(v0, v1, v2), since the vertices are closed to each other + tri_depth = (vertices[2, triangles[0, :]] + vertices[2, triangles[1, :]] + vertices[2, triangles[2, :]]) / 3. + tri_tex = (colors[:, triangles[0, :]] + colors[:, triangles[1, :]] + colors[:, triangles[2, :]]) / 3. + + for i in range(triangles.shape[1]): + tri = triangles[:, i] # 3 vertex indices + + # the inner bounding box + umin = max(int(np.ceil(np.min(vertices[0, tri]))), 0) + umax = min(int(np.floor(np.max(vertices[0, tri]))), w - 1) + + vmin = max(int(np.ceil(np.min(vertices[1, tri]))), 0) + vmax = min(int(np.floor(np.max(vertices[1, tri]))), h - 1) + + if umax < umin or vmax < vmin: + continue + + for u in range(umin, umax + 1): + for v in range(vmin, vmax + 1): + if tri_depth[i] > depth_buffer[v, u] and isPointInTri([u, v], vertices[:2, tri]): + depth_buffer[v, u] = tri_depth[i] + image[v, u, :] = tri_tex[:, i] + return image + + +def map_texture(src_image, + src_vertices, + dst_vertices, + dst_triangle_buffer, + triangles, + h, + w, + c=3, + mapping_type='bilinear'): + ''' + Args: + triangles: 3 x ntri + + # src + src_image: height x width x nchannels + src_vertices: 3 x nver + + # dst + dst_vertices: 3 x nver + dst_triangle_buffer: height x width. the triangle index of each pixel in dst image + + Returns: + dst_image: height x width x nchannels + + ''' + [sh, sw, sc] = src_image.shape + dst_image = np.zeros((h, w, c)) + for y in range(h): + for x in range(w): + tri_ind = dst_triangle_buffer[y, x] + if tri_ind < 0: # no tri in dst image + continue + #if src_triangles_vis[tri_ind]: # the corresponding triangle in src image is invisible + # continue + + # then. For this triangle index, map corresponding pixels(in triangles) in src image to dst image + # Two Methods: + # M1. Calculate the corresponding affine matrix from src triangle to dst triangle. Then find the corresponding src position of this dst pixel. + # -- ToDo + # M2. Calculate the relative position of three vertices in dst triangle, then find the corresponding src position relative to three src vertices. + tri = triangles[:, tri_ind] + # dst weight, here directly use the center to approximate because the tri is small + # if tri_ind < 366: + # print tri_ind + w0, w1, w2 = get_point_weight([x, y], dst_vertices[:2, tri]) + # else: + # w0 = w1 = w2 = 1./3 + # src + src_texel = w0 * src_vertices[:2, tri[0]] + w1 * src_vertices[:2, tri[1]] + w2 * src_vertices[:2, tri[2]] # + # + if src_texel[0] < 0 or src_texel[0] > sw - 1 or src_texel[1] < 0 or src_texel[1] > sh - 1: + dst_image[y, x, :] = 0 + continue + # As the coordinates of the transformed pixel in the image will most likely not lie on a texel, we have to choose how to + # calculate the pixel colors depending on the next texels + # there are three different texture interpolation methods: area, bilinear and nearest neighbour + # print y, x, src_texel + # nearest neighbour + if mapping_type == 'nearest': + dst_image[y, x, :] = src_image[int(round(src_texel[1])), int(round(src_texel[0])), :] + # bilinear + elif mapping_type == 'bilinear': + # next 4 pixels + ul = src_image[int(np.floor(src_texel[1])), int(np.floor(src_texel[0])), :] + ur = src_image[int(np.floor(src_texel[1])), int(np.ceil(src_texel[0])), :] + dl = src_image[int(np.ceil(src_texel[1])), int(np.floor(src_texel[0])), :] + dr = src_image[int(np.ceil(src_texel[1])), int(np.ceil(src_texel[0])), :] + + yd = src_texel[1] - np.floor(src_texel[1]) + xd = src_texel[0] - np.floor(src_texel[0]) + dst_image[y, x, :] = ul * (1 - xd) * (1 - yd) + ur * xd * (1 - yd) + dl * (1 - xd) * yd + dr * xd * yd + + return dst_image + + +def get_depth_buffer(vertices, triangles, h, w): + ''' + Args: + vertices: 3 x nver + triangles: 3 x ntri + h: height + w: width + Returns: + depth_buffer: height x width + ToDo: + whether to add x, y by 0.5? the center of the pixel? + m3. like somewhere is wrong + # Each triangle has 3 vertices & Each vertex has 3 coordinates x, y, z. + # Here, the bigger the z, the fronter the point. + ''' + # initial + depth_buffer = np.zeros([h, w + ]) - 999999. #+ np.min(vertices[2,:]) - 999999. # set the initial z to the farest position + + ## calculate the depth(z) of each triangle + #-m1. z = the center of shpere(through 3 vertices) + #center3d = (vertices[:, triangles[0,:]] + vertices[:,triangles[1,:]] + vertices[:, triangles[2,:]])/3. + #tri_depth = np.sum(center3d**2, axis = 0) + #-m2. z = the center of z(v0, v1, v2) + tri_depth = (vertices[2, triangles[0, :]] + vertices[2, triangles[1, :]] + vertices[2, triangles[2, :]]) / 3. + + for i in range(triangles.shape[1]): + tri = triangles[:, i] # 3 vertex indices + + # the inner bounding box + umin = max(int(np.ceil(np.min(vertices[0, tri]))), 0) + umax = min(int(np.floor(np.max(vertices[0, tri]))), w - 1) + + vmin = max(int(np.ceil(np.min(vertices[1, tri]))), 0) + vmax = min(int(np.floor(np.max(vertices[1, tri]))), h - 1) + + if umax < umin or vmax < vmin: + continue + + for u in range(umin, umax + 1): + for v in range(vmin, vmax + 1): + #-m3. calculate the accurate depth(z) of each pixel by barycentric weights + #w0, w1, w2 = weightsOfpoint([u,v], vertices[:2, tri]) + #tri_depth = w0*vertices[2,tri[0]] + w1*vertices[2,tri[1]] + w2*vertices[2,tri[2]] + if tri_depth[i] > depth_buffer[v, u]: # and is_pointIntri([u,v], vertices[:2, tri]): + depth_buffer[v, u] = tri_depth[i] + + return depth_buffer + + +def get_triangle_buffer(vertices, triangles, h, w): + ''' + Args: + vertices: 3 x nver + triangles: 3 x ntri + h: height + w: width + Returns: + depth_buffer: height x width + ToDo: + whether to add x, y by 0.5? the center of the pixel? + m3. like somewhere is wrong + # Each triangle has 3 vertices & Each vertex has 3 coordinates x, y, z. + # Here, the bigger the z, the fronter the point. + ''' + # initial + depth_buffer = np.zeros([h, w + ]) - 999999. #+ np.min(vertices[2,:]) - 999999. # set the initial z to the farest position + triangle_buffer = np.zeros_like(depth_buffer, dtype=np.int32) - 1 # if -1, the pixel has no triangle correspondance + + ## calculate the depth(z) of each triangle + #-m1. z = the center of shpere(through 3 vertices) + #center3d = (vertices[:, triangles[0,:]] + vertices[:,triangles[1,:]] + vertices[:, triangles[2,:]])/3. + #tri_depth = np.sum(center3d**2, axis = 0) + #-m2. z = the center of z(v0, v1, v2) + tri_depth = (vertices[2, triangles[0, :]] + vertices[2, triangles[1, :]] + vertices[2, triangles[2, :]]) / 3. + + for i in range(triangles.shape[1]): + tri = triangles[:, i] # 3 vertex indices + + # the inner bounding box + umin = max(int(np.ceil(np.min(vertices[0, tri]))), 0) + umax = min(int(np.floor(np.max(vertices[0, tri]))), w - 1) + + vmin = max(int(np.ceil(np.min(vertices[1, tri]))), 0) + vmax = min(int(np.floor(np.max(vertices[1, tri]))), h - 1) + + if umax < umin or vmax < vmin: + continue + + for u in range(umin, umax + 1): + for v in range(vmin, vmax + 1): + #-m3. calculate the accurate depth(z) of each pixel by barycentric weights + #w0, w1, w2 = weightsOfpoint([u,v], vertices[:2, tri]) + #tri_depth = w0*vertices[2,tri[0]] + w1*vertices[2,tri[1]] + w2*vertices[2,tri[2]] + if tri_depth[i] > depth_buffer[v, u] and isPointInTri([u, v], vertices[:2, tri]): + depth_buffer[v, u] = tri_depth[i] + triangle_buffer[v, u] = i + + return triangle_buffer + + +def vis_of_vertices(vertices, triangles, h, w, depth_buffer=None): + ''' + Args: + vertices: 3 x nver + triangles: 3 x ntri + depth_buffer: height x width + Returns: + vertices_vis: nver. the visibility of each vertex + ''' + if depth_buffer == None: + depth_buffer = get_depth_buffer(vertices, triangles, h, w) + + vertices_vis = np.zeros(vertices.shape[1], dtype=bool) + + depth_tmp = np.zeros_like(depth_buffer) - 99999 + for i in range(vertices.shape[1]): + vertex = vertices[:, i] + + if np.floor(vertex[0]) < 0 or np.ceil(vertex[0]) > w - 1 or np.floor(vertex[1]) < 0 or np.ceil( + vertex[1]) > h - 1: + continue + + # bilinear interp + # ul = depth_buffer[int(np.floor(vertex[1])), int(np.floor(vertex[0]))] + # ur = depth_buffer[int(np.floor(vertex[1])), int(np.ceil(vertex[0]))] + # dl = depth_buffer[int(np.ceil(vertex[1])), int(np.floor(vertex[0]))] + # dr = depth_buffer[int(np.ceil(vertex[1])), int(np.ceil(vertex[0]))] + + # yd = vertex[1] - np.floor(vertex[1]) + # xd = vertex[0] - np.floor(vertex[0]) + + # vertex_depth = ul*(1-xd)*(1-yd) + ur*xd*(1-yd) + dl*(1-xd)*yd + dr*xd*yd + + # nearest + px = int(np.round(vertex[0])) + py = int(np.round(vertex[1])) + + # if (vertex[2] > depth_buffer[ul[0], ul[1]]) & (vertex[2] > depth_buffer[ur[0], ur[1]]) & (vertex[2] > depth_buffer[dl[0], dl[1]]) & (vertex[2] > depth_buffer[dr[0], dr[1]]): + if vertex[2] < depth_tmp[py, px]: + continue + + # if vertex[2] > depth_buffer[py, px]: + # vertices_vis[i] = True + # depth_tmp[py, px] = vertex[2] + # elif np.abs(vertex[2] - depth_buffer[py, px]) < 1: + # vertices_vis[i] = True + + threshold = 2 # need to be optimized. + if np.abs(vertex[2] - depth_buffer[py, px]) < threshold: + # if np.abs(vertex[2] - vertex_depth) < threshold: + vertices_vis[i] = True + depth_tmp[py, px] = vertex[2] + + return vertices_vis diff --git a/modules/image/image_processing/prnet/utils/render_app.py b/modules/image/image_processing/prnet/utils/render_app.py new file mode 100644 index 0000000000000000000000000000000000000000..50a15f449733580f3942f7f1923818e98059486a --- /dev/null +++ b/modules/image/image_processing/prnet/utils/render_app.py @@ -0,0 +1,57 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +from scipy import ndimage +from utils.render import render_texture +from utils.render import vis_of_vertices + + +def get_visibility(vertices, triangles, h, w): + triangles = triangles.T + vertices_vis = vis_of_vertices(vertices.T, triangles, h, w) + vertices_vis = vertices_vis.astype(bool) + for k in range(2): + tri_vis = vertices_vis[triangles[0, :]] | vertices_vis[triangles[1, :]] | vertices_vis[triangles[2, :]] + ind = triangles[:, tri_vis] + vertices_vis[ind] = True + # for k in range(2): + # tri_vis = vertices_vis[triangles[0,:]] & vertices_vis[triangles[1,:]] & vertices_vis[triangles[2,:]] + # ind = triangles[:, tri_vis] + # vertices_vis[ind] = True + vertices_vis = vertices_vis.astype(np.float32) #1 for visible and 0 for non-visible + return vertices_vis + + +def get_uv_mask(vertices_vis, triangles, uv_coords, h, w, resolution): + triangles = triangles.T + vertices_vis = vertices_vis.astype(np.float32) + uv_mask = render_texture(uv_coords.T, vertices_vis[np.newaxis, :], triangles, resolution, resolution, 1) + uv_mask = np.squeeze(uv_mask > 0) + uv_mask = ndimage.binary_closing(uv_mask) + uv_mask = ndimage.binary_erosion(uv_mask, structure=np.ones((4, 4))) + uv_mask = ndimage.binary_closing(uv_mask) + uv_mask = ndimage.binary_erosion(uv_mask, structure=np.ones((4, 4))) + uv_mask = ndimage.binary_erosion(uv_mask, structure=np.ones((4, 4))) + uv_mask = ndimage.binary_erosion(uv_mask, structure=np.ones((4, 4))) + uv_mask = uv_mask.astype(np.float32) + + return np.squeeze(uv_mask) + + +def get_depth_image(vertices, triangles, h, w, isShow=False): + z = vertices[:, 2:] + if isShow: + z = z / max(z) + depth_image = render_texture(vertices.T, z.T, triangles.T, h, w, 1) + return np.squeeze(depth_image) diff --git a/modules/image/image_processing/prnet/utils/rotate_vertices.py b/modules/image/image_processing/prnet/utils/rotate_vertices.py new file mode 100644 index 0000000000000000000000000000000000000000..b96c8c3cc3590ef2b6dff4dcd2eb9e065f4cde4d --- /dev/null +++ b/modules/image/image_processing/prnet/utils/rotate_vertices.py @@ -0,0 +1,25 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np + + +# import scipy.io as +def frontalize(vertices): + canonical_vertices = np.load('Data/uv-data/canonical_vertices.npy') + + vertices_homo = np.hstack((vertices, np.ones([vertices.shape[0], 1]))) #n x 4 + P = np.linalg.lstsq(vertices_homo, canonical_vertices)[0].T # Affine matrix. 3 x 4 + front_vertices = vertices_homo.dot(P.T) + + return front_vertices diff --git a/modules/image/image_processing/prnet/utils/write.py b/modules/image/image_processing/prnet/utils/write.py new file mode 100644 index 0000000000000000000000000000000000000000..67274f0843a118946572f2d6672b2f0ad1b631e9 --- /dev/null +++ b/modules/image/image_processing/prnet/utils/write.py @@ -0,0 +1,168 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import numpy as np +from skimage.io import imsave + + +def write_asc(path, vertices): + ''' + Args: + vertices: shape = (nver, 3) + ''' + if path.split('.')[-1] == 'asc': + np.savetxt(path, vertices) + else: + np.savetxt(path + '.asc', vertices) + + +def write_obj_with_colors(obj_name, vertices, triangles, colors): + ''' Save 3D face model with texture represented by colors. + Args: + obj_name: str + vertices: shape = (nver, 3) + colors: shape = (nver, 3) + triangles: shape = (ntri, 3) + ''' + triangles = triangles.copy() + triangles += 1 # meshlab start with 1 + + if obj_name.split('.')[-1] != 'obj': + obj_name = obj_name + '.obj' + + # write obj + with open(obj_name, 'w') as f: + + # write vertices & colors + for i in range(vertices.shape[0]): + # s = 'v {} {} {} \n'.format(vertices[0,i], vertices[1,i], vertices[2,i]) + s = 'v {} {} {} {} {} {}\n'.format(vertices[i, 0], vertices[i, 1], vertices[i, 2], colors[i, 0], + colors[i, 1], colors[i, 2]) + f.write(s) + + # write f: ver ind/ uv ind + [k, ntri] = triangles.shape + for i in range(triangles.shape[0]): + # s = 'f {} {} {}\n'.format(triangles[i, 0], triangles[i, 1], triangles[i, 2]) + s = 'f {} {} {}\n'.format(triangles[i, 2], triangles[i, 1], triangles[i, 0]) + f.write(s) + + +def write_obj_with_texture(obj_name, vertices, triangles, texture, uv_coords): + ''' Save 3D face model with texture represented by texture map. + Ref: https://github.com/patrikhuber/eos/blob/bd00155ebae4b1a13b08bf5a991694d682abbada/include/eos/core/Mesh.hpp + Args: + obj_name: str + vertices: shape = (nver, 3) + triangles: shape = (ntri, 3) + texture: shape = (256,256,3) + uv_coords: shape = (nver, 3) max value<=1 + ''' + if obj_name.split('.')[-1] != 'obj': + obj_name = obj_name + '.obj' + mtl_name = obj_name.replace('.obj', '.mtl') + texture_name = obj_name.replace('.obj', '_texture.png') + + triangles = triangles.copy() + triangles += 1 # mesh lab start with 1 + + # write obj + with open(obj_name, 'w') as f: + # first line: write mtlib(material library) + s = "mtllib {}\n".format(os.path.abspath(mtl_name)) + f.write(s) + + # write vertices + for i in range(vertices.shape[0]): + s = 'v {} {} {}\n'.format(vertices[i, 0], vertices[i, 1], vertices[i, 2]) + f.write(s) + + # write uv coords + for i in range(uv_coords.shape[0]): + s = 'vt {} {}\n'.format(uv_coords[i, 0], 1 - uv_coords[i, 1]) + f.write(s) + + f.write("usemtl FaceTexture\n") + + # write f: ver ind/ uv ind + for i in range(triangles.shape[0]): + # s = 'f {}/{} {}/{} {}/{}\n'.format(triangles[i,0], triangles[i,0], triangles[i,1], triangles[i,1], triangles[i,2], triangles[i,2]) + s = 'f {}/{} {}/{} {}/{}\n'.format(triangles[i, 2], triangles[i, 2], triangles[i, 1], triangles[i, 1], + triangles[i, 0], triangles[i, 0]) + f.write(s) + + # write mtl + with open(mtl_name, 'w') as f: + f.write("newmtl FaceTexture\n") + s = 'map_Kd {}\n'.format(os.path.abspath(texture_name)) # map to image + f.write(s) + + # write texture as png + imsave(texture_name, texture) + + +def write_obj_with_colors_texture(obj_name, vertices, colors, triangles, texture, uv_coords): + ''' Save 3D face model with texture. + Ref: https://github.com/patrikhuber/eos/blob/bd00155ebae4b1a13b08bf5a991694d682abbada/include/eos/core/Mesh.hpp + Args: + obj_name: str + vertices: shape = (nver, 3) + colors: shape = (nver, 3) + triangles: shape = (ntri, 3) + texture: shape = (256,256,3) + uv_coords: shape = (nver, 3) max value<=1 + ''' + if obj_name.split('.')[-1] != 'obj': + obj_name = obj_name + '.obj' + mtl_name = obj_name.replace('.obj', '.mtl') + texture_name = obj_name.replace('.obj', '_texture.png') + + triangles = triangles.copy() + triangles += 1 # mesh lab start with 1 + + # write obj + with open(obj_name, 'w') as f: + # first line: write mtlib(material library) + s = "mtllib {}\n".format(os.path.abspath(mtl_name)) + f.write(s) + + # write vertices + for i in range(vertices.shape[0]): + s = 'v {} {} {} {} {} {}\n'.format(vertices[i, 0], vertices[i, 1], vertices[i, 2], colors[i, 0], + colors[i, 1], colors[i, 2]) + f.write(s) + + # write uv coords + for i in range(uv_coords.shape[0]): + s = 'vt {} {}\n'.format(uv_coords[i, 0], 1 - uv_coords[i, 1]) + f.write(s) + + f.write("usemtl FaceTexture\n") + + # write f: ver ind/ uv ind + for i in range(triangles.shape[0]): + # s = 'f {}/{} {}/{} {}/{}\n'.format(triangles[i,0], triangles[i,0], triangles[i,1], triangles[i,1], triangles[i,2], triangles[i,2]) + s = 'f {}/{} {}/{} {}/{}\n'.format(triangles[i, 2], triangles[i, 2], triangles[i, 1], triangles[i, 1], + triangles[i, 0], triangles[i, 0]) + f.write(s) + + # write mtl + with open(mtl_name, 'w') as f: + f.write("newmtl FaceTexture\n") + s = 'map_Kd {}\n'.format(os.path.abspath(texture_name)) # map to image + f.write(s) + + # write texture as png + imsave(texture_name, texture) diff --git a/modules/image/image_processing/seeinthedark/README.md b/modules/image/image_processing/seeinthedark/README.md new file mode 100644 index 0000000000000000000000000000000000000000..11156bc6162f6102c3d6863d2e0a0f225d5ca0f2 --- /dev/null +++ b/modules/image/image_processing/seeinthedark/README.md @@ -0,0 +1,133 @@ +# seeinthedark + +|模型名称|seeinthedark| +| :--- | :---: | +|类别|图像 - 暗光增强| +|网络|ConvNet| +|数据集|SID dataset| +|是否支持Fine-tuning|否| +|模型大小|120MB| +|最新更新日期|2021-11-02| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入图像 +
+ +
+ 输出图像 +
+

+ +- ### 模型介绍 + + - 通过大量暗光条件下短曝光和长曝光组成的图像对,以RAW图像为输入,RGB图像为参照进行训练,该模型实现端到端直接将暗光下的RAW图像处理得到可见的RGB图像。 + + - 更多详情参考:[Learning to See in the Dark](http://cchen156.github.io/paper/18CVPR_SID.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + - rawpy + +- ### 2、安装 + + - ```shell + $ hub install seeinthedark + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a raw(Sony, .ARW) file + $ hub run seeinthedark --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现暗光增强模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + denoiser = hub.Module(name="seeinthedark") + input_path = "/PATH/TO/IMAGE" + # Read from a raw file + denoiser.denoising(paths=[input_path], output_path='./denoising_result.png', use_gpu=True) + ``` + +- ### 3、API + + - ```python + def denoising(images=None, paths=None, output_dir='./denoising_result/', use_gpu=False, visualization=True) + ``` + - 暗光增强API,完成对暗光RAW图像的降噪并处理生成RGB图像。 + + - **参数** + - images (list\[numpy.ndarray\]): 输入的图像,单通道的马赛克图像;
+ - paths (list\[str\]): 暗光图像文件的路径,Sony的RAW格式;
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像风格转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m seeinthedark + ``` + + - 这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import rawpy + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(rawpy.imread("/PATH/TO/IMAGE").raw_image_visible)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/seeinthedark/" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install seeinthedark==1.0.0 + ``` diff --git a/modules/image/image_processing/seeinthedark/module.py b/modules/image/image_processing/seeinthedark/module.py new file mode 100644 index 0000000000000000000000000000000000000000..8074d56da117708b7fb4cb72cef0ab96ed09a647 --- /dev/null +++ b/modules/image/image_processing/seeinthedark/module.py @@ -0,0 +1,194 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse + +import paddle +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable, serving +import numpy as np +import rawpy +import cv2 + +from .util import base64_to_cv2 + + +def pack_raw(raw): + # pack Bayer image to 4 channels + im = raw + if not isinstance(raw, np.ndarray): + im = raw.raw_image_visible.astype(np.float32) + im = np.maximum(im - 512, 0) / (16383 - 512) # subtract the black level + + im = np.expand_dims(im, axis=2) + img_shape = im.shape + H = img_shape[0] + W = img_shape[1] + + out = np.concatenate((im[0:H:2, 0:W:2, :], im[0:H:2, 1:W:2, :], im[1:H:2, 1:W:2, :], im[1:H:2, 0:W:2, :]), axis=2) + return out + + +@moduleinfo( + name="seeinthedark", type="CV/denoising", author="paddlepaddle", author_email="", summary="", version="1.0.0") +class LearningToSeeInDark: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "pd_model/inference_model") + self.cpu_have_loaded = False + self.gpu_have_loaded = False + + def set_device(self, use_gpu=False): + if use_gpu == False: + if not self.cpu_have_loaded: + exe = paddle.static.Executor(paddle.CPUPlace()) + [prog, inputs, outputs] = paddle.static.load_inference_model( + path_prefix=self.pretrained_model, + executor=exe, + model_filename="model.pdmodel", + params_filename="model.pdiparams") + self.cpuexec, self.cpuprog, self.cpuinputs, self.cpuoutputs = exe, prog, inputs, outputs + self.cpu_have_loaded = True + + return self.cpuexec, self.cpuprog, self.cpuinputs, self.cpuoutputs + + else: + if not self.gpu_have_loaded: + exe = paddle.static.Executor(paddle.CUDAPlace(0)) + [prog, inputs, outputs] = paddle.static.load_inference_model( + path_prefix=self.pretrained_model, + executor=exe, + model_filename="model.pdmodel", + params_filename="model.pdiparams") + self.gpuexec, self.gpuprog, self.gpuinputs, self.gpuoutputs = exe, prog, inputs, outputs + self.gpu_have_loaded = True + + return self.gpuexec, self.gpuprog, self.gpuinputs, self.gpuoutputs + + def denoising(self, + images: list = None, + paths: list = None, + output_dir: str = './enlightening_result/', + use_gpu: bool = False, + visualization: bool = True): + ''' + Denoise a raw image in the low-light scene. + + images (list[numpy.ndarray]): data of images, shape of each is [H, W], must be sing-channel image captured by camera. + paths (list[str]): paths to images + output_dir: the dir to save the results + use_gpu: if True, use gpu to perform the computation, otherwise cpu. + visualization: if True, save results in output_dir. + ''' + results = [] + paddle.enable_static() + exe, prog, inputs, outputs = self.set_device(use_gpu) + + if images != None: + for raw in images: + input_full = np.expand_dims(pack_raw(raw), axis=0) * 300 + px = input_full.shape[1] // 512 + py = input_full.shape[2] // 512 + rx, ry = px * 512, py * 512 + input_full = input_full[:, :rx, :ry, :] + output = np.random.randn(rx * 2, ry * 2, 3) + input_full = np.minimum(input_full, 1.0) + for i in range(px): + for j in range(py): + input_patch = input_full[:, i * 512:i * 512 + 512, j * 512:j * 512 + 512, :] + result = exe.run(prog, feed={inputs[0]: input_patch}, fetch_list=outputs) + output[i * 512 * 2:i * 512 * 2 + 512 * 2, j * 512 * 2:j * 512 * 2 + 512 * 2, :] = result[0][0] + output = np.minimum(np.maximum(output, 0), 1) + output = output * 255 + output = np.clip(output, 0, 255) + output = output.astype('uint8') + results.append(output) + if paths != None: + for path in paths: + raw = rawpy.imread(path) + input_full = np.expand_dims(pack_raw(raw), axis=0) * 300 + px = input_full.shape[1] // 512 + py = input_full.shape[2] // 512 + rx, ry = px * 512, py * 512 + input_full = input_full[:, :rx, :ry, :] + output = np.random.randn(rx * 2, ry * 2, 3) + input_full = np.minimum(input_full, 1.0) + for i in range(px): + for j in range(py): + input_patch = input_full[:, i * 512:i * 512 + 512, j * 512:j * 512 + 512, :] + result = exe.run(prog, feed={inputs[0]: input_patch}, fetch_list=outputs) + output[i * 512 * 2:i * 512 * 2 + 512 * 2, j * 512 * 2:j * 512 * 2 + 512 * 2, :] = result[0][0] + output = np.minimum(np.maximum(output, 0), 1) + output = output * 255 + output = np.clip(output, 0, 255) + output = output.astype('uint8') + results.append(output) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + cv2.imwrite(os.path.join(output_dir, 'output_{}.png'.format(i)), out[:, :, ::-1]) + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + self.denoising( + paths=[self.args.input_path], + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.denoising(images=images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='denoising_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--input_path', type=str, help="path to input raw image, should be raw file captured by camera.") diff --git a/modules/image/image_processing/seeinthedark/requirements.txt b/modules/image/image_processing/seeinthedark/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..32c8259e1c5451cc0f2bec980fa8476ac1708771 --- /dev/null +++ b/modules/image/image_processing/seeinthedark/requirements.txt @@ -0,0 +1 @@ +rawpy diff --git a/modules/image/industrial_application/meter_readings/barometer_reader/requirements.txt b/modules/image/industrial_application/meter_readings/barometer_reader/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..2b801c41fabee3640316419344f036d9c963e36a --- /dev/null +++ b/modules/image/industrial_application/meter_readings/barometer_reader/requirements.txt @@ -0,0 +1 @@ +paddlex == 1.3.0 \ No newline at end of file diff --git a/modules/image/matting/dim_vgg16_matting/README.md b/modules/image/matting/dim_vgg16_matting/README.md new file mode 100644 index 0000000000000000000000000000000000000000..07f8e1ac0d4673c164e692d3854efc077494be44 --- /dev/null +++ b/modules/image/matting/dim_vgg16_matting/README.md @@ -0,0 +1,154 @@ +# dim_vgg16_matting + +|模型名称|dim_vgg16_matting| +| :--- | :---: | +|类别|图像-抠图| +|网络|dim_vgg16| +|数据集|百度自建数据集| +|是否支持Fine-tuning|否| +|模型大小|164MB| +|指标|SAD112.73| +|最新更新日期|2021-12-03| + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 样例结果示例(左为原图,右为效果图): +

+ + +

+ +- ### 模型介绍 + + - Matting(精细化分割/影像去背/抠图)是指借由计算前景的颜色和透明度,将前景从影像中撷取出来的技术,可用于替换背景、影像合成、视觉特效,在电影工业中被广泛地使用。影像中的每个像素会有代表其前景透明度的值,称作阿法值(Alpha),一张影像中所有阿法值的集合称作阿法遮罩(Alpha Matte),将影像被遮罩所涵盖的部分取出即可完成前景的分离。dim_vgg16_matting是一种需要trimap作为输入的matting模型。 + + + + - 更多详情请参考:[dim_vgg16_matting](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.3/contrib/Matting) + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 + + - paddleseg >= 2.3.0 + + +- ### 2、安装 + + - ```shell + $ hub install dim_vgg16_matting + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run dim_vgg16_matting --input_path "/PATH/TO/IMAGE" --trimap_path "/PATH/TO/TRIMAP" + ``` + + - 通过命令行方式实现hub模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="dim_vgg16_matting") + + result = model.predict(image_list=["/PATH/TO/IMAGE"], trimap_list=["PATH/TO/TRIMAP"]) + print(result) + ``` +- ### 3、API + + - ```python + def predict(self, + image_list, + trimap_list, + visualization, + save_path): + ``` + + - 人像matting预测API,用于将输入图片中的人像分割出来。 + + - 参数 + + - image_list (list(str | numpy.ndarray)):图片输入路径或者BGR格式numpy数据。 + - trimap_list(list(str | numpy.ndarray)):trimap输入路径或者单通道灰度图片。 + - visualization (bool): 是否进行可视化,默认为False。 + - save_path (str): 当visualization为True时,保存图片的路径,默认为"dim_vgg16_matting_output" 。 + + - 返回 + + - result (list(numpy.ndarray)):模型分割结果: + + +## 四、服务部署 + +- PaddleHub Serving可以部署人像matting在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m dim_vgg16_matting + ``` + + - 这样就完成了一个人像matting在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + import time + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))], 'trimaps':[cv2_to_base64(cv2.imread("/PATH/TO/TRIMAP"))]} + + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/dim_vgg16_matting" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + for image in r.json()["results"]['data']: + data = base64_to_cv2(image) + image_path =str(time.time()) + ".png" + cv2.imwrite(image_path, data) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/matting/dim_vgg16_matting/README_en.md b/modules/image/matting/dim_vgg16_matting/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..aaffb278a85f8076fd0ed5d536e2d5870bb478ca --- /dev/null +++ b/modules/image/matting/dim_vgg16_matting/README_en.md @@ -0,0 +1,156 @@ +# dim_vgg16_matting + +|Module Name|dim_vgg16_matting| +| :--- | :---: | +|Category|Matting| +|Network|dim_vgg16| +|Dataset|Baidu self-built dataset| +|Support Fine-tuning|No| +|Module Size|164MB| +|Data Indicators|-| +|Latest update date|2021-12-03| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ + +

+ +- ### Module Introduction + + - Mating is the technique of extracting foreground from an image by calculating its color and transparency. It is widely used in the film industry to replace background, image composition, and visual effects. Each pixel in the image will have a value that represents its foreground transparency, called Alpha. The set of all Alpha values in an image is called Alpha Matte. The part of the image covered by the mask can be extracted to complete foreground separation. + + + + - For more information, please refer to: [dim_vgg16_matting](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.3/contrib/Matting) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 + + - paddleseg >= 2.3.0 + + +- ### 2、Installation + + - ```shell + $ hub install dim_vgg16_matting + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run dim_vgg16_matting --input_path "/PATH/TO/IMAGE" --trimap_path "/PATH/TO/TRIMAP" + ``` + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="dim_vgg16_matting") + + result = model.predict(image_list=["/PATH/TO/IMAGE"], trimap_list=["PATH/TO/TRIMAP"]) + print(result) + ``` +- ### 3、API + + - ```python + def predict(self, + image_list, + trimap_list, + visualization, + save_path): + ``` + + - Prediction API for matting. + + - **Parameter** + + - image_list (list(str | numpy.ndarray)): Image path or image data, ndarray.shape is in the format \[H, W, C\],BGR. + - trimap_list(list(str | numpy.ndarray)): Trimap path or trimap data, ndarray.shape is in the format \[H, W],Gray style. + - visualization (bool): Whether to save the recognition results as picture files, default is False. + - save_path (str): Save path of images, "dim_vgg16_matting_output" by default. + + - **Return** + + - result (list(numpy.ndarray)):The list of model results. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of matting. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m dim_vgg16_matting + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + + ```python + import requests + import json + import cv2 + import base64 + import time + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))], 'trimaps':[cv2_to_base64(cv2.imread("/PATH/TO/TRIMAP"))]} + + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/dim_vgg16_matting" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + for image in r.json()["results"]['data']: + data = base64_to_cv2(image) + image_path =str(time.time()) + ".png" + cv2.imwrite(image_path, data) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/matting/dim_vgg16_matting/module.py b/modules/image/matting/dim_vgg16_matting/module.py new file mode 100644 index 0000000000000000000000000000000000000000..2ae3c0d36fbdf6a827bb1093a80c1def67de17cd --- /dev/null +++ b/modules/image/matting/dim_vgg16_matting/module.py @@ -0,0 +1,288 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +import argparse +from typing import Callable, Union, List, Tuple + +import numpy as np +import cv2 +import scipy +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.module import moduleinfo, runnable, serving +from paddleseg.models import layers + +from dim_vgg16_matting.vgg import VGG16 +import dim_vgg16_matting.processor as P + + +@moduleinfo( + name="dim_vgg16_matting", + type="CV/matting", + author="paddlepaddle", + summary="dim_vgg16_matting is a matting model", + version="1.0.0" +) +class DIMVGG16(nn.Layer): + """ + The DIM implementation based on PaddlePaddle. + + The original article refers to + Ning Xu, et, al. "Deep Image Matting" + (https://arxiv.org/pdf/1908.07919.pdf). + + Args: + stage (int, optional): The stage of model. Defautl: 3. + decoder_input_channels(int, optional): The channel of decoder input. Default: 512. + pretrained(str, optional): The path of pretrianed model. Defautl: None. + + """ + def __init__(self, + stage: int = 3, + decoder_input_channels: int = 512, + pretrained: str = None): + super(DIMVGG16, self).__init__() + + self.backbone = VGG16() + self.pretrained = pretrained + self.stage = stage + + decoder_output_channels = [64, 128, 256, 512] + self.decoder = Decoder( + input_channels=decoder_input_channels, + output_channels=decoder_output_channels) + if self.stage == 2: + for param in self.backbone.parameters(): + param.stop_gradient = True + for param in self.decoder.parameters(): + param.stop_gradient = True + if self.stage >= 2: + self.refine = Refine() + + self.transforms = P.Compose([P.LoadImages(), P.LimitLong(max_long=3840),P.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'dim-vgg16.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def preprocess(self, img: Union[str, np.ndarray] , transforms: Callable, trimap: Union[str, np.ndarray] = None) -> dict: + data = {} + data['img'] = img + if trimap is not None: + data['trimap'] = trimap + data['gt_fields'] = ['trimap'] + data['trans_info'] = [] + data = self.transforms(data) + data['img'] = paddle.to_tensor(data['img']) + data['img'] = data['img'].unsqueeze(0) + if trimap is not None: + data['trimap'] = paddle.to_tensor(data['trimap']) + data['trimap'] = data['trimap'].unsqueeze((0, 1)) + + return data + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + input_shape = paddle.shape(inputs['img'])[-2:] + x = paddle.concat([inputs['img'], inputs['trimap'] / 255], axis=1) + fea_list = self.backbone(x) + + # decoder stage + up_shape = [] + for i in range(5): + up_shape.append(paddle.shape(fea_list[i])[-2:]) + alpha_raw = self.decoder(fea_list, up_shape) + alpha_raw = F.interpolate( + alpha_raw, input_shape, mode='bilinear', align_corners=False) + logit_dict = {'alpha_raw': alpha_raw} + if self.stage < 2: + return logit_dict + + if self.stage >= 2: + # refine stage + refine_input = paddle.concat([inputs['img'], alpha_raw], axis=1) + alpha_refine = self.refine(refine_input) + + # finally alpha + alpha_pred = alpha_refine + alpha_raw + alpha_pred = F.interpolate( + alpha_pred, input_shape, mode='bilinear', align_corners=False) + if not self.training: + alpha_pred = paddle.clip(alpha_pred, min=0, max=1) + logit_dict['alpha_pred'] = alpha_pred + + return alpha_pred + + def predict(self, image_list: list, trimap_list: list, visualization: bool =False, save_path: str = "dim_vgg16_matting_output") -> list: + self.eval() + result= [] + with paddle.no_grad(): + for i, im_path in enumerate(image_list): + trimap = trimap_list[i] if trimap_list is not None else None + data = self.preprocess(img=im_path, transforms=self.transforms, trimap=trimap) + alpha_pred = self.forward(data) + alpha_pred = P.reverse_transform(alpha_pred, data['trans_info']) + alpha_pred = (alpha_pred.numpy()).squeeze() + alpha_pred = (alpha_pred * 255).astype('uint8') + alpha_pred = P.save_alpha_pred(alpha_pred, trimap) + result.append(alpha_pred) + if visualization: + if not os.path.exists(save_path): + os.makedirs(save_path) + img_name = str(time.time()) + '.png' + image_save_path = os.path.join(save_path, img_name) + cv2.imwrite(image_save_path, alpha_pred) + + return result + + @serving + def serving_method(self, images: list, trimaps:list, **kwargs) -> dict: + """ + Run as a service. + """ + images_decode = [P.base64_to_cv2(image) for image in images] + + if trimaps is not None: + trimap_decoder = [cv2.cvtColor(P.base64_to_cv2(trimap), cv2.COLOR_BGR2GRAY) for trimap in trimaps] + else: + trimap_decoder = None + + outputs = self.predict(image_list=images_decode, trimap_list= trimap_decoder, **kwargs) + + serving_data = [P.cv2_to_base64(outputs[i]) for i in range(len(outputs))] + results = {'data': serving_data} + + return results + + @runnable + def run_cmd(self, argvs: list) -> list: + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + if args.trimap_path is not None: + trimap_list = [args.trimap_path] + else: + trimap_list = None + + results = self.predict(image_list=[args.input_path], trimap_list=trimap_list, save_path=args.output_dir, visualization=args.visualization) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + + self.arg_config_group.add_argument( + '--output_dir', type=str, default="dim_vgg16_matting_output", help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=bool, default=True, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--trimap_path', type=str, help="path to trimap.") + + +class Up(nn.Layer): + def __init__(self, input_channels: int, output_channels: int): + super().__init__() + self.conv = layers.ConvBNReLU( + input_channels, + output_channels, + kernel_size=5, + padding=2, + bias_attr=False) + + def forward(self, x: paddle.Tensor, skip: paddle.Tensor, output_shape: list) -> paddle.Tensor: + x = F.interpolate( + x, size=output_shape, mode='bilinear', align_corners=False) + x = x + skip + x = self.conv(x) + x = F.relu(x) + + return x + + +class Decoder(nn.Layer): + def __init__(self, input_channels: int, output_channels: list = [64, 128, 256, 512]): + super().__init__() + self.deconv6 = nn.Conv2D( + input_channels, input_channels, kernel_size=1, bias_attr=False) + self.deconv5 = Up(input_channels, output_channels[-1]) + self.deconv4 = Up(output_channels[-1], output_channels[-2]) + self.deconv3 = Up(output_channels[-2], output_channels[-3]) + self.deconv2 = Up(output_channels[-3], output_channels[-4]) + self.deconv1 = Up(output_channels[-4], 64) + + self.alpha_conv = nn.Conv2D( + 64, 1, kernel_size=5, padding=2, bias_attr=False) + + def forward(self, fea_list: list, shape_list: list) -> paddle.Tensor: + x = fea_list[-1] + x = self.deconv6(x) + x = self.deconv5(x, fea_list[4], shape_list[4]) + x = self.deconv4(x, fea_list[3], shape_list[3]) + x = self.deconv3(x, fea_list[2], shape_list[2]) + x = self.deconv2(x, fea_list[1], shape_list[1]) + x = self.deconv1(x, fea_list[0], shape_list[0]) + alpha = self.alpha_conv(x) + alpha = F.sigmoid(alpha) + + return alpha + + +class Refine(nn.Layer): + def __init__(self): + super().__init__() + self.conv1 = layers.ConvBNReLU( + 4, 64, kernel_size=3, padding=1, bias_attr=False) + self.conv2 = layers.ConvBNReLU( + 64, 64, kernel_size=3, padding=1, bias_attr=False) + self.conv3 = layers.ConvBNReLU( + 64, 64, kernel_size=3, padding=1, bias_attr=False) + self.alpha_pred = layers.ConvBNReLU( + 64, 1, kernel_size=3, padding=1, bias_attr=False) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv1(x) + x = self.conv2(x) + x = self.conv3(x) + alpha = self.alpha_pred(x) + + return alpha diff --git a/modules/image/matting/dim_vgg16_matting/processor.py b/modules/image/matting/dim_vgg16_matting/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..87e499c2960bb0e76ba6e498a2f00ca508ee19a6 --- /dev/null +++ b/modules/image/matting/dim_vgg16_matting/processor.py @@ -0,0 +1,220 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import base64 +from typing import Callable, Union, List, Tuple + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +from paddleseg.transforms import functional +from PIL import Image + + +class Compose: + """ + Do transformation on input data with corresponding pre-processing and augmentation operations. + The shape of input data to all operations is [height, width, channels]. + """ + + def __init__(self, transforms: Callable, to_rgb: bool = True): + if not isinstance(transforms, list): + raise TypeError('The transforms must be a list!') + self.transforms = transforms + self.to_rgb = to_rgb + + def __call__(self, data: dict) -> dict: + + if 'trans_info' not in data: + data['trans_info'] = [] + for op in self.transforms: + data = op(data) + if data is None: + return None + + data['img'] = np.transpose(data['img'], (2, 0, 1)) + for key in data.get('gt_fields', []): + if len(data[key].shape) == 2: + continue + data[key] = np.transpose(data[key], (2, 0, 1)) + + return data + + +class LoadImages: + """ + Read images from image path. + + Args: + to_rgb (bool, optional): If converting image to RGB color space. Default: True. + """ + def __init__(self, to_rgb: bool = True): + self.to_rgb = to_rgb + + def __call__(self, data: dict) -> dict: + + if isinstance(data['img'], str): + data['img'] = cv2.imread(data['img']) + + for key in data.get('gt_fields', []): + if isinstance(data[key], str): + data[key] = cv2.imread(data[key], cv2.IMREAD_UNCHANGED) + # if alpha and trimap has 3 channels, extract one. + if key in ['alpha', 'trimap']: + if len(data[key].shape) > 2: + data[key] = data[key][:, :, 0] + + if self.to_rgb: + data['img'] = cv2.cvtColor(data['img'], cv2.COLOR_BGR2RGB) + for key in data.get('gt_fields', []): + if len(data[key].shape) == 2: + continue + data[key] = cv2.cvtColor(data[key], cv2.COLOR_BGR2RGB) + + return data + + +class LimitLong: + """ + Limit the long edge of image. + + If the long edge is larger than max_long, resize the long edge + to max_long, while scale the short edge proportionally. + + If the long edge is smaller than min_long, resize the long edge + to min_long, while scale the short edge proportionally. + + Args: + max_long (int, optional): If the long edge of image is larger than max_long, + it will be resize to max_long. Default: None. + min_long (int, optional): If the long edge of image is smaller than min_long, + it will be resize to min_long. Default: None. + """ + + def __init__(self, max_long=None, min_long=None): + if max_long is not None: + if not isinstance(max_long, int): + raise TypeError( + "Type of `max_long` is invalid. It should be int, but it is {}" + .format(type(max_long))) + if min_long is not None: + if not isinstance(min_long, int): + raise TypeError( + "Type of `min_long` is invalid. It should be int, but it is {}" + .format(type(min_long))) + if (max_long is not None) and (min_long is not None): + if min_long > max_long: + raise ValueError( + '`max_long should not smaller than min_long, but they are {} and {}' + .format(max_long, min_long)) + self.max_long = max_long + self.min_long = min_long + + def __call__(self, data): + h, w = data['img'].shape[:2] + long_edge = max(h, w) + target = long_edge + if (self.max_long is not None) and (long_edge > self.max_long): + target = self.max_long + elif (self.min_long is not None) and (long_edge < self.min_long): + target = self.min_long + + if target != long_edge: + data['trans_info'].append(('resize', data['img'].shape[0:2])) + data['img'] = functional.resize_long(data['img'], target) + for key in data.get('gt_fields', []): + data[key] = functional.resize_long(data[key], target) + + return data + + +class Normalize: + """ + Normalize an image. + + Args: + mean (list, optional): The mean value of a data set. Default: [0.5, 0.5, 0.5]. + std (list, optional): The standard deviation of a data set. Default: [0.5, 0.5, 0.5]. + + Raises: + ValueError: When mean/std is not list or any value in std is 0. + """ + + def __init__(self, mean: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5), std: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5)): + self.mean = mean + self.std = std + if not (isinstance(self.mean, (list, tuple)) + and isinstance(self.std, (list, tuple))): + raise ValueError( + "{}: input type is invalid. It should be list or tuple".format( + self)) + from functools import reduce + if reduce(lambda x, y: x * y, self.std) == 0: + raise ValueError('{}: std is invalid!'.format(self)) + + def __call__(self, data: dict) -> dict: + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + data['img'] = functional.normalize(data['img'], mean, std) + if 'fg' in data.get('gt_fields', []): + data['fg'] = functional.normalize(data['fg'], mean, std) + if 'bg' in data.get('gt_fields', []): + data['bg'] = functional.normalize(data['bg'], mean, std) + + return data + + +def reverse_transform(alpha: paddle.Tensor, trans_info: List[str]): + """recover pred to origin shape""" + for item in trans_info[::-1]: + if item[0] == 'resize': + h, w = item[1][0], item[1][1] + alpha = F.interpolate(alpha, [h, w], mode='bilinear') + elif item[0] == 'padding': + h, w = item[1][0], item[1][1] + alpha = alpha[:, :, 0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format(item[0])) + return alpha + +def save_alpha_pred(alpha: np.ndarray, trimap: np.ndarray = None): + """ + The value of alpha is range [0, 1], shape should be [h,w] + """ + if isinstance(trimap, str): + trimap = cv2.imread(trimap, 0) + alpha[trimap == 0] = 0 + alpha[trimap == 255] = 255 + alpha = (alpha).astype('uint8') + return alpha + + +def cv2_to_base64(image: np.ndarray): + """ + Convert data from BGR to base64 format. + """ + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str: str): + """ + Convert data from base64 to BGR format. + """ + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data \ No newline at end of file diff --git a/modules/image/matting/dim_vgg16_matting/requirements.py b/modules/image/matting/dim_vgg16_matting/requirements.py new file mode 100644 index 0000000000000000000000000000000000000000..7df0ef23928361724c3fadb8d87d6a3be869e58b --- /dev/null +++ b/modules/image/matting/dim_vgg16_matting/requirements.py @@ -0,0 +1 @@ +paddleseg >= 2.3.0 diff --git a/modules/image/matting/dim_vgg16_matting/vgg.py b/modules/image/matting/dim_vgg16_matting/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..11cc9ccc51867996d2726522f0e2f1b156895cd7 --- /dev/null +++ b/modules/image/matting/dim_vgg16_matting/vgg.py @@ -0,0 +1,142 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Tuple + +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D + +from paddleseg.utils import utils + + +class ConvBlock(nn.Layer): + def __init__(self, input_channels: int, output_channels: int, groups: int, name: str = None): + super(ConvBlock, self).__init__() + + self.groups = groups + self._conv_1 = Conv2D( + in_channels=input_channels, + out_channels=output_channels, + kernel_size=3, + stride=1, + padding=1, + weight_attr=ParamAttr(name=name + "1_weights"), + bias_attr=False) + if groups == 2 or groups == 3 or groups == 4: + self._conv_2 = Conv2D( + in_channels=output_channels, + out_channels=output_channels, + kernel_size=3, + stride=1, + padding=1, + weight_attr=ParamAttr(name=name + "2_weights"), + bias_attr=False) + if groups == 3 or groups == 4: + self._conv_3 = Conv2D( + in_channels=output_channels, + out_channels=output_channels, + kernel_size=3, + stride=1, + padding=1, + weight_attr=ParamAttr(name=name + "3_weights"), + bias_attr=False) + if groups == 4: + self._conv_4 = Conv2D( + in_channels=output_channels, + out_channels=output_channels, + kernel_size=3, + stride=1, + padding=1, + weight_attr=ParamAttr(name=name + "4_weights"), + bias_attr=False) + + self._pool = MaxPool2D( + kernel_size=2, stride=2, padding=0, return_mask=True) + + def forward(self, inputs: paddle.Tensor) -> List[paddle.Tensor]: + x = self._conv_1(inputs) + x = F.relu(x) + if self.groups == 2 or self.groups == 3 or self.groups == 4: + x = self._conv_2(x) + x = F.relu(x) + if self.groups == 3 or self.groups == 4: + x = self._conv_3(x) + x = F.relu(x) + if self.groups == 4: + x = self._conv_4(x) + x = F.relu(x) + skip = x + x, max_indices = self._pool(x) + return x, max_indices, skip + + +class VGGNet(nn.Layer): + def __init__(self, input_channels: int = 4, layers: int = 11, pretrained: str = None): + super(VGGNet, self).__init__() + self.pretrained = pretrained + + self.layers = layers + self.vgg_configure = { + 11: [1, 1, 2, 2, 2], + 13: [2, 2, 2, 2, 2], + 16: [2, 2, 3, 3, 3], + 19: [2, 2, 4, 4, 4] + } + assert self.layers in self.vgg_configure.keys(), \ + "supported layers are {} but input layer is {}".format( + self.vgg_configure.keys(), layers) + self.groups = self.vgg_configure[self.layers] + + # matting的第一层卷积输入为4通道,初始化是直接初始化为0 + self._conv_block_1 = ConvBlock( + input_channels, 64, self.groups[0], name="conv1_") + self._conv_block_2 = ConvBlock(64, 128, self.groups[1], name="conv2_") + self._conv_block_3 = ConvBlock(128, 256, self.groups[2], name="conv3_") + self._conv_block_4 = ConvBlock(256, 512, self.groups[3], name="conv4_") + self._conv_block_5 = ConvBlock(512, 512, self.groups[4], name="conv5_") + + # 这一层的初始化需要利用vgg fc6的参数转换后进行初始化,可以暂时不考虑初始化 + self._conv_6 = Conv2D( + 512, 512, kernel_size=3, padding=1, bias_attr=False) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + fea_list = [] + ids_list = [] + x, ids, skip = self._conv_block_1(inputs) + fea_list.append(skip) + ids_list.append(ids) + x, ids, skip = self._conv_block_2(x) + fea_list.append(skip) + ids_list.append(ids) + x, ids, skip = self._conv_block_3(x) + fea_list.append(skip) + ids_list.append(ids) + x, ids, skip = self._conv_block_4(x) + fea_list.append(skip) + ids_list.append(ids) + x, ids, skip = self._conv_block_5(x) + fea_list.append(skip) + ids_list.append(ids) + x = F.relu(self._conv_6(x)) + fea_list.append(x) + return fea_list + + +def VGG16(**args): + model = VGGNet(layers=16, **args) + return model \ No newline at end of file diff --git a/modules/image/matting/gfm_resnet34_matting/README.md b/modules/image/matting/gfm_resnet34_matting/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7787fddc230c59995b48f4f1bc8065517d70069b --- /dev/null +++ b/modules/image/matting/gfm_resnet34_matting/README.md @@ -0,0 +1,153 @@ +# gfm_resnet34_matting + +|模型名称|gfm_resnet34_matting| +| :--- | :---: | +|类别|图像-抠图| +|网络|gfm_resnet34| +|数据集|AM-2k| +|是否支持Fine-tuning|否| +|模型大小|562MB| +|指标|SAD10.89| +|最新更新日期|2021-12-03| + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 样例结果示例(左为原图,右为效果图): +

+ + +

+ +- ### 模型介绍 + + - Matting(精细化分割/影像去背/抠图)是指借由计算前景的颜色和透明度,将前景从影像中撷取出来的技术,可用于替换背景、影像合成、视觉特效,在电影工业中被广泛地使用。影像中的每个像素会有代表其前景透明度的值,称作阿法值(Alpha),一张影像中所有阿法值的集合称作阿法遮罩(Alpha Matte),将影像被遮罩所涵盖的部分取出即可完成前景的分离。gfm_resnet34_matting可生成抠图结果。 + + + + - 更多详情请参考:[gfm_resnet34_matting](https://github.com/JizhiziLi/GFM) + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 + + - paddleseg >= 2.3.0 + + +- ### 2、安装 + + - ```shell + $ hub install gfm_resnet34_matting + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run gfm_resnet34_matting --input_path "/PATH/TO/IMAGE" + ``` + + - 通过命令行方式实现hub模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="gfm_resnet34_matting") + result = model.predict(["/PATH/TO/IMAGE"]) + print(result) + ``` +- ### 3、API + + - ```python + def predict(self, + image_list, + visualization, + save_path): + ``` + + - 动物matting预测API,用于将输入图片中的动物分割出来。 + + - 参数 + + - image_list (list(str | numpy.ndarray)):图片输入路径或者BGR格式numpy数据。 + - visualization (bool): 是否进行可视化,默认为False。 + - save_path (str): 当visualization为True时,保存图片的路径,默认为"gfm_resnet34_matting_output"。 + + - 返回 + + - result (list(numpy.ndarray)):模型分割结果: + + +## 四、服务部署 + +- PaddleHub Serving可以部署动物matting在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m gfm_resnet34_matting + ``` + + - 这样就完成了一个动物matting在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + import time + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/gfm_resnet34_matting" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + for image in r.json()["results"]['data']: + data = base64_to_cv2(image) + image_path =str(time.time()) + ".png" + cv2.imwrite(image_path, data) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + diff --git a/modules/image/matting/gfm_resnet34_matting/README_en.md b/modules/image/matting/gfm_resnet34_matting/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..c16a3657b47489845ac44fcadaf99baec55b676e --- /dev/null +++ b/modules/image/matting/gfm_resnet34_matting/README_en.md @@ -0,0 +1,154 @@ +# gfm_resnet34_matting + +|Module Name|gfm_resnet34_matting| +| :--- | :---: | +|Category|Image Matting| +|Network|gfm_resnet34| +|Dataset|AM-2k| +|Support Fine-tuning|No| +|Module Size|562MB| +|Data Indicators|SAD10.89| +|Latest update date|2021-12-03| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ + +

+ +- ### Module Introduction + + - Mating is the technique of extracting foreground from an image by calculating its color and transparency. It is widely used in the film industry to replace background, image composition, and visual effects. Each pixel in the image will have a value that represents its foreground transparency, called Alpha. The set of all Alpha values in an image is called Alpha Matte. The part of the image covered by the mask can be extracted to complete foreground separation. + + + + - For more information, please refer to: [gfm_resnet34_matting](https://github.com/JizhiziLi/GFM) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 + + - paddleseg >= 2.3.0 + + +- ### 2、Installation + + - ```shell + $ hub install gfm_resnet34_matting + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run gfm_resnet34_matting --input_path "/PATH/TO/IMAGE" + ``` + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="gfm_resnet34_matting") + result = model.predict(["/PATH/TO/IMAGE"]) + print(result) + + ``` +- ### 3、API + + - ```python + def predict(self, + image_list, + visualization, + save_path): + ``` + + - Prediction API for matting. + + - **Parameter** + + - image_list (list(str | numpy.ndarray)): Image path or image data, ndarray.shape is in the format \[H, W, C\],BGR. + - visualization (bool): Whether to save the recognition results as picture files, default is False. + - save_path (str): Save path of images, "modnet_mobilenetv2_matting_output" by default. + + - **Return** + + - result (list(numpy.ndarray)):The list of model results. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of matting. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m gfm_resnet34_matting + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + + ```python + import requests + import json + import cv2 + import base64 + import time + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/gfm_resnet34_matting" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + for image in r.json()["results"]['data']: + data = base64_to_cv2(image) + image_path =str(time.time()) + ".png" + cv2.imwrite(image_path, data) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/matting/gfm_resnet34_matting/gfm.py b/modules/image/matting/gfm_resnet34_matting/gfm.py new file mode 100644 index 0000000000000000000000000000000000000000..4b7306c2282467ec80bbf8f1c7540afb25a1b72f --- /dev/null +++ b/modules/image/matting/gfm_resnet34_matting/gfm.py @@ -0,0 +1,447 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Callable, Union, List, Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from gfm_resnet34_matting.resnet import resnet34 + + +def conv3x3(in_planes: int, out_planes: int, stride: int = 1) -> Callable: + """3x3 convolution with padding""" + return nn.Conv2D(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias_attr=False) + + +def conv_up_psp(in_channels: int, out_channels: int, up_sample: float) -> Callable: + return nn.Sequential(nn.Conv2D(in_channels, out_channels, 3, padding=1), + nn.BatchNorm2D(out_channels), + nn.ReLU(), + nn.Upsample(scale_factor=up_sample, mode='bilinear',align_corners = False)) + + +def build_bb(in_channels: int, mid_channels: int, out_channels: int) -> Callable: + return nn.Sequential(nn.Conv2D(in_channels, mid_channels, 3, dilation=2, + padding=2), nn.BatchNorm2D(mid_channels), nn. + ReLU(), nn.Conv2D(mid_channels, out_channels, 3, + dilation=2, padding=2), nn.BatchNorm2D(out_channels), nn.ReLU(), nn.Conv2D(out_channels, + out_channels, 3, dilation=2, padding=2), nn.BatchNorm2D( + out_channels), nn.ReLU()) + + +def build_decoder(in_channels: int, mid_channels_1: int, mid_channels_2: int, out_channels: int, + last_bnrelu: bool, upsample_flag: bool) -> Callable: + layers = [] + layers += [nn.Conv2D(in_channels, mid_channels_1, 3, padding=1), nn. + BatchNorm2D(mid_channels_1), nn.ReLU(), nn.Conv2D(mid_channels_1, mid_channels_2, 3, padding=1), nn. + BatchNorm2D(mid_channels_2), nn.ReLU(), nn.Conv2D(mid_channels_2, out_channels, 3, padding=1)] + if last_bnrelu: + layers += [nn.BatchNorm2D(out_channels), nn.ReLU()] + + if upsample_flag: + layers += [nn.Upsample(scale_factor=2, mode='bilinear')] + + sequential = nn.Sequential(*layers) + return sequential + + +class BasicBlock(nn.Layer): + expansion = 1 + def __init__(self, inplanes: int, planes: int, stride: int = 1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2D(planes) + self.relu = nn.ReLU() + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2D(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x: paddle.Tensor) -> Callable: + residual = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + out = self.conv2(out) + out = self.bn2(out) + if self.downsample is not None: + residual = self.downsample(x) + out += residual + out = self.relu(out) + return out + + +class PSPModule(nn.Layer): + + def __init__(self, features: paddle.Tensor, out_features: int = 1024, sizes: List[int] = (1, 2, 3, 6)): + super().__init__() + #self.stages = [] + self.stages = nn.LayerList([self._make_stage(features, size) for + size in sizes]) + self.bottleneck = nn.Conv2D(features * (len(sizes) + 1), + out_features, kernel_size=1) + self.relu = nn.ReLU() + + def _make_stage(self, features: paddle.Tensor, size: int) -> Callable: + prior = nn.AdaptiveAvgPool2D(output_size=(size, size)) + conv = nn.Conv2D(features, features, kernel_size=1, bias_attr=False) + return nn.Sequential(prior, conv) + + def forward(self, feats: paddle.Tensor) -> paddle.Tensor: + h, w = feats.shape[2], feats.shape[3] + priors = [F.upsample(stage(feats), size=(h, w), mode='bilinear',align_corners = True) for stage in self.stages] + [feats] + bottle = self.bottleneck(paddle.concat(priors, 1)) + return self.relu(bottle) + + +class SELayer(nn.Layer): + + def __init__(self, channel: int, reduction: int = 4): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2D(1) + self.fc = nn.Sequential(nn.Linear(channel, channel // reduction, + bias_attr=False), nn.ReLU(), nn. + Linear(channel // reduction, channel, bias_attr=False), nn. + Sigmoid()) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + return x * y.expand_as(x) + + +class GFM(nn.Layer): + """ + The GFM implementation based on PaddlePaddle. + + The original article refers to: + Bridging Composite and Real: Towards End-to-end Deep Image Matting [IJCV-2021] + Main network file (GFM). + + Copyright (c) 2021, Jizhizi Li (jili8515@uni.sydney.edu.au) + Licensed under the MIT License (see LICENSE for details) + Github repo: https://github.com/JizhiziLi/GFM + Paper link (Arxiv): https://arxiv.org/abs/2010.16188 + + """ + + def __init__(self): + super().__init__() + self.backbone = 'r34_2b' + self.rosta = 'TT' + if self.rosta == 'TT': + self.gd_channel = 3 + else: + self.gd_channel = 2 + if self.backbone == 'r34_2b': + self.resnet = resnet34() + self.encoder0 = nn.Sequential(nn.Conv2D(3, 64, 3, padding=1), + nn.BatchNorm2D(64), nn.ReLU()) + self.encoder1 = self.resnet.layer1 + self.encoder2 = self.resnet.layer2 + self.encoder3 = self.resnet.layer3 + self.encoder4 = self.resnet.layer4 + self.encoder5 = nn.Sequential(nn.MaxPool2D(2, 2, ceil_mode=True + ), BasicBlock(512, 512), BasicBlock(512, 512), BasicBlock( + 512, 512)) + self.encoder6 = nn.Sequential(nn.MaxPool2D(2, 2, ceil_mode=True + ), BasicBlock(512, 512), BasicBlock(512, 512), BasicBlock( + 512, 512)) + self.psp_module = PSPModule(512, 512, (1, 3, 5)) + self.psp6 = conv_up_psp(512, 512, 2) + self.psp5 = conv_up_psp(512, 512, 4) + self.psp4 = conv_up_psp(512, 256, 8) + self.psp3 = conv_up_psp(512, 128, 16) + self.psp2 = conv_up_psp(512, 64, 32) + self.psp1 = conv_up_psp(512, 64, 32) + self.decoder6_g = build_decoder(1024, 512, 512, 512, True, True) + self.decoder5_g = build_decoder(1024, 512, 512, 512, True, True) + self.decoder4_g = build_decoder(1024, 512, 512, 256, True, True) + self.decoder3_g = build_decoder(512, 256, 256, 128, True, True) + self.decoder2_g = build_decoder(256, 128, 128, 64, True, True) + self.decoder1_g = build_decoder(128, 64, 64, 64, True, False) + self.bridge_block = build_bb(512, 512, 512) + self.decoder6_f = build_decoder(1024, 512, 512, 512, True, True) + self.decoder5_f = build_decoder(1024, 512, 512, 512, True, True) + self.decoder4_f = build_decoder(1024, 512, 512, 256, True, True) + self.decoder3_f = build_decoder(512, 256, 256, 128, True, True) + self.decoder2_f = build_decoder(256, 128, 128, 64, True, True) + self.decoder1_f = build_decoder(128, 64, 64, 64, True, False) + if self.rosta == 'RIM': + self.decoder0_g_tt = nn.Sequential(nn.Conv2D(64, 3, 3, + padding=1)) + self.decoder0_g_ft = nn.Sequential(nn.Conv2D(64, 2, 3, + padding=1)) + self.decoder0_g_bt = nn.Sequential(nn.Conv2D(64, 2, 3, + padding=1)) + self.decoder0_f_tt = nn.Sequential(nn.Conv2D(64, 1, 3, + padding=1)) + self.decoder0_f_ft = nn.Sequential(nn.Conv2D(64, 1, 3, + padding=1)) + self.decoder0_f_bt = nn.Sequential(nn.Conv2D(64, 1, 3, + padding=1)) + else: + self.decoder0_g = nn.Sequential(nn.Conv2D(64, self. + gd_channel, 3, padding=1)) + self.decoder0_f = nn.Sequential(nn.Conv2D(64, 1, 3, padding=1)) + if self.backbone == 'r34': + self.encoder0 = nn.Sequential(self.resnet.conv1, self.resnet. + bn1, self.resnet.relu) + + self.encoder1 = nn.Sequential(self.resnet.maxpool, self.resnet. + layer1) + self.encoder2 = self.resnet.layer2 + self.encoder3 = self.resnet.layer3 + self.encoder4 = self.resnet.layer4 + self.psp_module = PSPModule(512, 512, (1, 3, 5)) + self.psp4 = conv_up_psp(512, 256, 2) + self.psp3 = conv_up_psp(512, 128, 4) + self.psp2 = conv_up_psp(512, 64, 8) + self.psp1 = conv_up_psp(512, 64, 16) + self.decoder4_g = build_decoder(1024, 512, 512, 256, True, True) + self.decoder3_g = build_decoder(512, 256, 256, 128, True, True) + self.decoder2_g = build_decoder(256, 128, 128, 64, True, True) + self.decoder1_g = build_decoder(128, 64, 64, 64, True, True) + self.bridge_block = build_bb(512, 512, 512) + self.decoder4_f = build_decoder(1024, 512, 512, 256, True, True) + self.decoder3_f = build_decoder(512, 256, 256, 128, True, True) + self.decoder2_f = build_decoder(256, 128, 128, 64, True, True) + self.decoder1_f = build_decoder(128, 64, 64, 64, True, True) + if self.rosta == 'RIM': + self.decoder0_g_tt = build_decoder(128, 64, 64, 3, False, True) + self.decoder0_g_ft = build_decoder(128, 64, 64, 2, False, True) + self.decoder0_g_bt = build_decoder(128, 64, 64, 2, False, True) + self.decoder0_f_tt = build_decoder(128, 64, 64, 1, False, True) + self.decoder0_f_ft = build_decoder(128, 64, 64, 1, False, True) + self.decoder0_f_bt = build_decoder(128, 64, 64, 1, False, True) + else: + self.decoder0_g = build_decoder(128, 64, 64, self. + gd_channel, False, True) + self.decoder0_f = build_decoder(128, 64, 64, 1, False, True) + elif self.backbone == 'r101': + self.encoder0 = nn.Sequential(self.resnet.conv1, self.resnet. + bn1, self.resnet.relu) + self.encoder1 = nn.Sequential(self.resnet.maxpool, self.resnet. + layer1) + self.encoder2 = self.resnet.layer2 + self.encoder3 = self.resnet.layer3 + self.encoder4 = self.resnet.layer4 + self.psp_module = PSPModule(2048, 2048, (1, 3, 5)) + self.bridge_block = build_bb(2048, 2048, 2048) + self.psp4 = conv_up_psp(2048, 1024, 2) + self.psp3 = conv_up_psp(2048, 512, 4) + self.psp2 = conv_up_psp(2048, 256, 8) + self.psp1 = conv_up_psp(2048, 64, 16) + self.decoder4_g = build_decoder(4096, 2048, 1024, 1024, True, True) + self.decoder3_g = build_decoder(2048, 1024, 512, 512, True, True) + self.decoder2_g = build_decoder(1024, 512, 256, 256, True, True) + self.decoder1_g = build_decoder(512, 256, 128, 64, True, True) + self.decoder4_f = build_decoder(4096, 2048, 1024, 1024, True, True) + self.decoder3_f = build_decoder(2048, 1024, 512, 512, True, True) + self.decoder2_f = build_decoder(1024, 512, 256, 256, True, True) + self.decoder1_f = build_decoder(512, 256, 128, 64, True, True) + if self.rosta == 'RIM': + self.decoder0_g_tt = build_decoder(128, 64, 64, 3, False, True) + self.decoder0_g_ft = build_decoder(128, 64, 64, 2, False, True) + self.decoder0_g_bt = build_decoder(128, 64, 64, 2, False, True) + self.decoder0_f_tt = build_decoder(128, 64, 64, 1, False, True) + self.decoder0_f_ft = build_decoder(128, 64, 64, 1, False, True) + self.decoder0_f_bt = build_decoder(128, 64, 64, 1, False, True) + else: + self.decoder0_g = build_decoder(128, 64, 64, self. + gd_channel, False, True) + self.decoder0_f = build_decoder(128, 64, 64, 1, False, True) + elif self.backbone == 'd121': + self.encoder0 = nn.Sequential(self.densenet.features.conv0, + self.densenet.features.norm0, self.densenet.features.relu0) + self.encoder1 = nn.Sequential(self.densenet.features. + denseblock1, self.densenet.features.transition1) + self.encoder2 = nn.Sequential(self.densenet.features. + denseblock2, self.densenet.features.transition2) + self.encoder3 = nn.Sequential(self.densenet.features. + denseblock3, self.densenet.features.transition3) + self.encoder4 = nn.Sequential(self.densenet.features. + denseblock4, nn.Conv2D(1024, 512, 3, padding=1), nn. + BatchNorm2D(512), nn.ReLU(), + nn.MaxPool2D(2, 2, ceil_mode=True)) + self.psp_module = PSPModule(512, 512, (1, 3, 5)) + self.psp4 = conv_up_psp(512, 256, 2) + self.psp3 = conv_up_psp(512, 128, 4) + self.psp2 = conv_up_psp(512, 64, 8) + self.psp1 = conv_up_psp(512, 64, 16) + self.decoder4_g = build_decoder(1024, 512, 512, 256, True, True) + self.decoder3_g = build_decoder(512, 256, 256, 128, True, True) + self.decoder2_g = build_decoder(256, 128, 128, 64, True, True) + self.decoder1_g = build_decoder(128, 64, 64, 64, True, True) + self.bridge_block = build_bb(512, 512, 512) + self.decoder4_f = build_decoder(1024, 512, 512, 256, True, True) + self.decoder3_f = build_decoder(768, 256, 256, 128, True, True) + self.decoder2_f = build_decoder(384, 128, 128, 64, True, True) + self.decoder1_f = build_decoder(192, 64, 64, 64, True, True) + if self.rosta == 'RIM': + self.decoder0_g_tt = build_decoder(128, 64, 64, 3, False, True) + self.decoder0_g_ft = build_decoder(128, 64, 64, 2, False, True) + self.decoder0_g_bt = build_decoder(128, 64, 64, 2, False, True) + self.decoder0_f_tt = build_decoder(128, 64, 64, 1, False, True) + self.decoder0_f_ft = build_decoder(128, 64, 64, 1, False, True) + self.decoder0_f_bt = build_decoder(128, 64, 64, 1, False, True) + else: + self.decoder0_g = build_decoder(128, 64, 64, self. + gd_channel, False, True) + self.decoder0_f = build_decoder(128, 64, 64, 1, False, True) + if self.rosta == 'RIM': + self.rim = nn.Sequential(nn.Conv2D(3, 16, 1), SELayer(16), nn. + Conv2D(16, 1, 1)) + + def forward(self, input: paddle.Tensor) -> List[paddle.Tensor]: + glance_sigmoid = paddle.zeros(input.shape) + glance_sigmoid.stop_gradient = True + focus_sigmoid = paddle.zeros(input.shape) + focus_sigmoid.stop_gradient = True + fusion_sigmoid = paddle.zeros(input.shape) + fusion_sigmoid.stop_gradient = True + e0 = self.encoder0(input) + e1 = self.encoder1(e0) + e2 = self.encoder2(e1) + e3 = self.encoder3(e2) + e4 = self.encoder4(e3) + if self.backbone == 'r34_2b': + e5 = self.encoder5(e4) + e6 = self.encoder6(e5) + psp = self.psp_module(e6) + d6_g = self.decoder6_g(paddle.concat((psp, e6), 1)) + d5_g = self.decoder5_g(paddle.concat((self.psp6(psp), + d6_g), 1)) + d4_g = self.decoder4_g(paddle.concat((self.psp5(psp), + d5_g), 1)) + else: + psp = self.psp_module(e4) + d4_g = self.decoder4_g(paddle.concat((psp, e4), 1)) + d3_g = self.decoder3_g(paddle.concat((self.psp4(psp), d4_g), 1)) + d2_g = self.decoder2_g(paddle.concat((self.psp3(psp), d3_g), 1)) + d1_g = self.decoder1_g(paddle.concat((self.psp2(psp), d2_g), 1)) + if self.backbone == 'r34_2b': + if self.rosta == 'RIM': + d0_g_tt = self.decoder0_g_tt(d1_g) + d0_g_ft = self.decoder0_g_ft(d1_g) + d0_g_bt = self.decoder0_g_bt(d1_g) + else: + d0_g = self.decoder0_g(d1_g) + elif self.rosta == 'RIM': + d0_g_tt = self.decoder0_g_tt(paddle.concat((self.psp1(psp + ), d1_g), 1)) + d0_g_ft = self.decoder0_g_ft(paddle.concat((self.psp1(psp + ), d1_g), 1)) + d0_g_bt = self.decoder0_g_bt(paddle.concat((self.psp1(psp + ), d1_g), 1)) + else: + d0_g = self.decoder0_g(paddle.concat((self.psp1(psp), + d1_g), 1)) + if self.rosta == 'RIM': + glance_sigmoid_tt = F.sigmoid(d0_g_tt) + glance_sigmoid_ft = F.sigmoid(d0_g_ft) + glance_sigmoid_bt = F.sigmoid(d0_g_bt) + else: + glance_sigmoid = F.sigmoid(d0_g) + if self.backbone == 'r34_2b': + bb = self.bridge_block(e6) + d6_f = self.decoder6_f(paddle.concat((bb, e6), 1)) + d5_f = self.decoder5_f(paddle.concat((d6_f, e5), 1)) + d4_f = self.decoder4_f(paddle.concat((d5_f, e4), 1)) + else: + bb = self.bridge_block(e4) + d4_f = self.decoder4_f(paddle.concat((bb, e4), 1)) + d3_f = self.decoder3_f(paddle.concat((d4_f, e3), 1)) + d2_f = self.decoder2_f(paddle.concat((d3_f, e2), 1)) + d1_f = self.decoder1_f(paddle.concat((d2_f, e1), 1)) + if self.backbone == 'r34_2b': + if self.rosta == 'RIM': + d0_f_tt = self.decoder0_f_tt(d1_f) + d0_f_ft = self.decoder0_f_ft(d1_f) + d0_f_bt = self.decoder0_f_bt(d1_f) + else: + d0_f = self.decoder0_f(d1_f) + elif self.rosta == 'RIM': + d0_f_tt = self.decoder0_f_tt(paddle.concat((d1_f, e0), 1)) + d0_f_ft = self.decoder0_f_ft(paddle.concat((d1_f, e0), 1)) + d0_f_bt = self.decoder0_f_bt(paddle.concat((d1_f, e0), 1)) + else: + d0_f = self.decoder0_f(paddle.concat((d1_f, e0), 1)) + if self.rosta == 'RIM': + focus_sigmoid_tt = F.sigmoid(d0_f_tt) + focus_sigmoid_ft = F.sigmoid(d0_f_ft) + focus_sigmoid_bt = F.sigmoid(d0_f_bt) + else: + focus_sigmoid = F.sigmoid(d0_f) + if self.rosta == 'RIM': + fusion_sigmoid_tt = collaborative_matting('TT', + glance_sigmoid_tt, focus_sigmoid_tt) + fusion_sigmoid_ft = collaborative_matting('FT', + glance_sigmoid_ft, focus_sigmoid_ft) + fusion_sigmoid_bt = collaborative_matting('BT', + glance_sigmoid_bt, focus_sigmoid_bt) + fusion_sigmoid = paddle.concat((fusion_sigmoid_tt, + fusion_sigmoid_ft, fusion_sigmoid_bt), 1) + fusion_sigmoid = self.rim(fusion_sigmoid) + return [[glance_sigmoid_tt, focus_sigmoid_tt, fusion_sigmoid_tt + ], [glance_sigmoid_ft, focus_sigmoid_ft, fusion_sigmoid_ft], + [glance_sigmoid_bt, focus_sigmoid_bt, fusion_sigmoid_bt], + fusion_sigmoid] + else: + fusion_sigmoid = collaborative_matting(self.rosta, + glance_sigmoid, focus_sigmoid) + return glance_sigmoid, focus_sigmoid, fusion_sigmoid + + +def collaborative_matting(rosta, glance_sigmoid, focus_sigmoid): + if rosta == 'TT': + values = paddle.max(glance_sigmoid, axis=1) + index = paddle.argmax(glance_sigmoid, axis=1) + index = index[:, None, :, :].float() + bg_mask = index.clone() + bg_mask[bg_mask == 2] = 1 + bg_mask = 1 - bg_mask + trimap_mask = index.clone() + trimap_mask[trimap_mask == 2] = 0 + fg_mask = index.clone() + fg_mask[fg_mask == 1] = 0 + fg_mask[fg_mask == 2] = 1 + focus_sigmoid = focus_sigmoid.cpu() + trimap_mask = trimap_mask.cpu() + fg_mask = fg_mask.cpu() + fusion_sigmoid = focus_sigmoid * trimap_mask + fg_mask + elif rosta == 'BT': + values = paddle.max(glance_sigmoid, axis=1) + index = paddle.argmax(glance_sigmoid, axis=1) + index = index[:, None, :, :].float() + fusion_sigmoid = index - focus_sigmoid + fusion_sigmoid[fusion_sigmoid < 0] = 0 + else: + values = paddle.max(glance_sigmoid, axis=1) + index = paddle.argmax(glance_sigmoid, axis=1) + index = index[:, None, :, :].float() + fusion_sigmoid = index + focus_sigmoid + fusion_sigmoid[fusion_sigmoid > 1] = 1 + return fusion_sigmoid + + +if __name__ == "__main__": + model = GFM() + x = paddle.ones([1,3, 256,256]) + result = model(x) + print(x) \ No newline at end of file diff --git a/modules/image/matting/gfm_resnet34_matting/module.py b/modules/image/matting/gfm_resnet34_matting/module.py new file mode 100644 index 0000000000000000000000000000000000000000..f78082fc46da8dadc569ab1db0b78011e4b80bc7 --- /dev/null +++ b/modules/image/matting/gfm_resnet34_matting/module.py @@ -0,0 +1,176 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import time +import argparse +from typing import Callable, Union, List, Tuple + +from PIL import Image +import numpy as np +import cv2 +import scipy +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.transforms as T +from paddlehub.module.module import moduleinfo, runnable, serving +from skimage.transform import resize + +from gfm_resnet34_matting.gfm import GFM +import gfm_resnet34_matting.processor as P + + +@moduleinfo( + name="gfm_resnet34_matting", + type="CV/matting", + author="paddlepaddle", + author_email="", + summary="gfm_resnet34_matting is an animal matting model.", + version="1.0.0") +class GFMResNet34(nn.Layer): + """ + The GFM implementation based on PaddlePaddle. + + The original article refers to: + Bridging Composite and Real: Towards End-to-end Deep Image Matting [IJCV-2021] + Main network file (GFM). + + Github repo: https://github.com/JizhiziLi/GFM + Paper link (Arxiv): https://arxiv.org/abs/2010.16188 + """ + + def __init__(self, pretrained: str=None): + super(GFMResNet34, self).__init__() + + self.model = GFM() + self.resize_by_short = P.ResizeByShort(1080) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.model.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.model.set_dict(model_dict) + print("load pretrained parameters success") + + def preprocess(self, img: Union[str, np.ndarray], h: int, w: int) -> paddle.Tensor: + if min(h, w) > 1080: + img = self.resize_by_short(img) + tensor_img = self.scale_image(img, h, w) + return tensor_img + + def scale_image(self, img: np.ndarray, h: int, w: int, ratio: float = 1/3): + new_h = min(1600, h - (h % 32)) + new_w = min(1600, w - (w % 32)) + resize_h = int(h*ratio) + resize_w = int(w*ratio) + new_h = min(1600, resize_h - (resize_h % 32)) + new_w = min(1600, resize_w - (resize_w % 32)) + + scale_img = resize(img,(new_h,new_w)) * 255 + tensor_img = paddle.to_tensor(scale_img.astype(np.float32)[np.newaxis, :, :, :]) + tensor_img = tensor_img.transpose([0,3,1,2]) + return tensor_img + + + def inference_img_scale(self, input: paddle.Tensor) -> List[paddle.Tensor]: + pred_global, pred_local, pred_fusion = self.model(input) + pred_global = P.gen_trimap_from_segmap_e2e(pred_global) + pred_local = pred_local.numpy()[0,0,:,:] + pred_fusion = pred_fusion.numpy()[0,0,:,:] + return pred_global, pred_local, pred_fusion + + + def predict(self, image_list: list, visualization: bool =True, save_path: str = "gfm_resnet34_matting_output"): + self.model.eval() + result = [] + with paddle.no_grad(): + for i, img in enumerate(image_list): + if isinstance(img, str): + img = np.array(Image.open(img))[:,:,:3] + else: + img = img[:,:,::-1] + h, w, _ = img.shape + tensor_img = self.preprocess(img, h, w) + pred_glance_1, pred_focus_1, pred_fusion_1 = self.inference_img_scale(tensor_img) + pred_glance_1 = resize(pred_glance_1,(h,w)) * 255.0 + tensor_img = self.scale_image(img, h, w, 1/2) + pred_glance_2, pred_focus_2, pred_fusion_2 = self.inference_img_scale(tensor_img) + pred_focus_2 = resize(pred_focus_2,(h,w)) + pred_fusion = P.get_masked_local_from_global_test(pred_glance_1, pred_focus_2) + pred_fusion = (pred_fusion * 255).astype(np.uint8) + if visualization: + if not os.path.exists(save_path): + os.makedirs(save_path) + img_name = str(time.time()) + '.png' + image_save_path = os.path.join(save_path, img_name) + cv2.imwrite(image_save_path, pred_fusion) + result.append(pred_fusion) + return result + + @serving + def serving_method(self, images: str, **kwargs): + """ + Run as a service. + """ + images_decode = [P.base64_to_cv2(image) for image in images] + outputs = self.predict(image_list=images_decode, **kwargs) + serving_data = [P.cv2_to_base64(outputs[i]) for i in range(len(outputs))] + results = {'data': serving_data} + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + + results = self.predict(image_list=[args.input_path], save_path=args.output_dir, visualization=args.visualization) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + + self.arg_config_group.add_argument( + '--output_dir', type=str, default="gfm_resnet34_matting_output", help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=bool, default=True, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + diff --git a/modules/image/matting/gfm_resnet34_matting/processor.py b/modules/image/matting/gfm_resnet34_matting/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..52969d0229111d4cc60ccc02d0d6e39a09231e95 --- /dev/null +++ b/modules/image/matting/gfm_resnet34_matting/processor.py @@ -0,0 +1,84 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 + +import cv2 +import numpy as np +from paddleseg.transforms import functional + + +class ResizeByLong: + """ + Resize the long side of an image to given size, and then scale the other side proportionally. + + Args: + long_size (int): The target size of long side. + """ + + def __init__(self, long_size): + self.long_size = long_size + + def __call__(self, data): + data = functional.resize_long(data, self.long_size) + return data + + +class ResizeByShort: + """ + Resize the short side of an image to given size, and then scale the other side proportionally. + + Args: + short_size (int): The target size of short side. + """ + + def __init__(self, short_size): + self.short_size = short_size + + def __call__(self, data): + + data = functional.resize_short(data, self.short_size) + + return data + +def gen_trimap_from_segmap_e2e(segmap): + trimap = np.argmax(segmap, axis=1)[0] + trimap = trimap.astype(np.int64) + trimap[trimap==1]=128 + trimap[trimap==2]=255 + return trimap.astype(np.uint8) + +def get_masked_local_from_global_test(global_result, local_result): + weighted_global = np.ones(global_result.shape) + weighted_global[global_result==255] = 0 + weighted_global[global_result==0] = 0 + fusion_result = global_result*(1.-weighted_global)/255+local_result*weighted_global + return fusion_result + +def cv2_to_base64(image: np.ndarray): + """ + Convert data from BGR to base64 format. + """ + data = cv2.imencode('.png', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str: str): + """ + Convert data from base64 to BGR format. + """ + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data \ No newline at end of file diff --git a/modules/image/matting/gfm_resnet34_matting/resnet.py b/modules/image/matting/gfm_resnet34_matting/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..5d2ec70cb6ccd419cdc7725cf35eb267df25dca9 --- /dev/null +++ b/modules/image/matting/gfm_resnet34_matting/resnet.py @@ -0,0 +1,201 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +from typing import Type, Any, Callable, Union, List, Optional + + +def conv3x3(in_planes: int, out_planes: int, stride: int=1, groups: int=1, + dilation: int=1) ->paddle.nn.Conv2D: + """3x3 convolution with padding""" + return nn.Conv2D(in_planes, out_planes, kernel_size=3, stride=stride, + padding=dilation, groups=groups, dilation=dilation, bias_attr=False) + + +def conv1x1(in_planes: int, out_planes: int, stride: int=1) ->paddle.nn.Conv2D: + """1x1 convolution""" + return nn.Conv2D(in_planes, out_planes, kernel_size=1, stride=stride, + bias_attr=False) + + +class BasicBlock(nn.Layer): + expansion: int = 1 + + def __init__(self, inplanes: int, planes: int, stride: int=1, + downsample: Optional[nn.Layer]=None, groups: int=1, base_width: + int=64, dilation: int=1, norm_layer: Optional[Callable[..., paddle. + nn.Layer]]=None) ->None: + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2D + if groups != 1 or base_width != 64: + raise ValueError( + 'BasicBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError( + 'Dilation > 1 not supported in BasicBlock') + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.relu = paddle.nn.ReLU() + self.conv2 = conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + identity = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + out = self.conv2(out) + out = self.bn2(out) + if self.downsample is not None: + identity = self.downsample(x) + out += identity + out = self.relu(out) + return out + + +class Bottleneck(nn.Layer): + expansion: int = 4 + + def __init__(self, inplanes: int, planes: int, stride: int=1, + downsample: Optional[nn.Layer]=None, groups: int=1, base_width: + int=64, dilation: int=1, norm_layer: Optional[Callable[..., paddle. + nn.Layer]]=None) ->None: + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2D + width = int(planes * (base_width / 64.0)) * groups + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, stride, groups, dilation) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = paddle.nn.ReLU() + self.downsample = downsample + self.stride = stride + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + identity = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + out = self.conv3(out) + out = self.bn3(out) + if self.downsample is not None: + identity = self.downsample(x) + out += identity + out = self.relu(out) + return out + + +class ResNet(nn.Layer): + + def __init__(self, block: Type[Union[BasicBlock, Bottleneck]], layers: + List[int], num_classes: int=1000, zero_init_residual: bool=False, + groups: int=1, width_per_group: int=64, + replace_stride_with_dilation: Optional[List[bool]]=None, norm_layer: + Optional[Callable[..., paddle.nn.Layer]]=None) ->None: + super(ResNet, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2D + self._norm_layer = norm_layer + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError( + 'replace_stride_with_dilation should be None or a 3-element tuple, got {}' + .format(replace_stride_with_dilation)) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2D(3, self.inplanes, kernel_size=7, stride=2, + padding=3, bias_attr=False) + self.bn1 = norm_layer(self.inplanes) + self.relu = paddle.nn.ReLU() + self.maxpool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2, + dilate=replace_stride_with_dilation[0]) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2, + dilate=replace_stride_with_dilation[1]) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, + dilate=replace_stride_with_dilation[2]) + self.avgpool = nn.AdaptiveAvgPool2D((1, 1)) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + def _make_layer(self, block: Type[Union[BasicBlock, Bottleneck]], + planes: int, blocks: int, stride: int=1, dilate: bool=False + ) ->paddle.nn.Sequential: + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential(conv1x1(self.inplanes, planes * + block.expansion, stride), norm_layer(planes * block.expansion)) + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, self + .groups, self.base_width, previous_dilation, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, groups=self.groups, + base_width=self.base_width, dilation=self.dilation, + norm_layer=norm_layer)) + return nn.Sequential(*layers) + + def _forward_impl(self, x: paddle.Tensor) ->paddle.Tensor: + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.avgpool(x) + x= paddle.flatten(x,1) + x = self.fc(x) + return x + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + return self._forward_impl(x) + + +def _resnet(arch: str, block: Type[Union[BasicBlock, Bottleneck]], layers: + List[int], pretrained: bool, progress: bool, **kwargs: Any) ->ResNet: + model = ResNet(block, layers, **kwargs) + return model + + +def resnet34(pretrained: bool=False, progress: bool=True, **kwargs: Any + ) ->ResNet: + """ResNet-34 model from + `"Deep Residual Learning for Image Recognition" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, + progress, **kwargs) diff --git a/modules/image/matting/modnet_hrnet18_matting/README.md b/modules/image/matting/modnet_hrnet18_matting/README.md new file mode 100644 index 0000000000000000000000000000000000000000..704635055d6b00a81806987bbd9cd487f09e50b0 --- /dev/null +++ b/modules/image/matting/modnet_hrnet18_matting/README.md @@ -0,0 +1,155 @@ +# modnet_hrnet18_matting + +|模型名称|modnet_hrnet18_matting| +| :--- | :---: | +|类别|图像-抠图| +|网络|modnet_hrnet18| +|数据集|百度自建数据集| +|是否支持Fine-tuning|否| +|模型大小|60MB| +|指标|SAD77.96| +|最新更新日期|2021-12-03| + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 样例结果示例(左为原图,右为效果图): +

+ + +

+ +- ### 模型介绍 + + - Matting(精细化分割/影像去背/抠图)是指借由计算前景的颜色和透明度,将前景从影像中撷取出来的技术,可用于替换背景、影像合成、视觉特效,在电影工业中被广泛地使用。影像中的每个像素会有代表其前景透明度的值,称作阿法值(Alpha),一张影像中所有阿法值的集合称作阿法遮罩(Alpha Matte),将影像被遮罩所涵盖的部分取出即可完成前景的分离。modnet_hrnet18_matting可生成抠图结果。 + + + + - 更多详情请参考:[modnet_hrnet18_matting](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.3/contrib/Matting) + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 + + - paddleseg >= 2.3.0 + + +- ### 2、安装 + + - ```shell + $ hub install modnet_hrnet18_matting + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run modnet_hrnet18_matting --input_path "/PATH/TO/IMAGE" + ``` + + - 通过命令行方式实现hub模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="modnet_hrnet18_matting") + + result = model.predict(["/PATH/TO/IMAGE"]) + print(result) + ``` +- ### 3、API + + - ```python + def predict(self, + image_list, + trimap_list, + visualization, + save_path): + ``` + + - 人像matting预测API,用于将输入图片中的人像分割出来。 + + - 参数 + + - image_list (list(str | numpy.ndarray)):图片输入路径或者BGR格式numpy数据。 + - trimap_list(list(str | numpy.ndarray)):trimap输入路径或者单通道灰度图格式图片。 + - visualization (bool): 是否进行可视化,默认为False。 + - save_path (str): 当visualization为True时,保存图片的路径,默认为"modnet_hrnet18_matting_output"。 + + - 返回 + + - result (list(numpy.ndarray)):模型分割结果: + + +## 四、服务部署 + +- PaddleHub Serving可以部署人像matting在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m modnet_hrnet18_matting + ``` + + - 这样就完成了一个人像matting在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + import time + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/modnet_hrnet18_matting" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + for image in r.json()["results"]['data']: + data = base64_to_cv2(image) + image_path =str(time.time()) + ".png" + cv2.imwrite(image_path, data) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/matting/modnet_hrnet18_matting/README_en.md b/modules/image/matting/modnet_hrnet18_matting/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..17524b51b31174b66a01fd13fdb0165d97f46223 --- /dev/null +++ b/modules/image/matting/modnet_hrnet18_matting/README_en.md @@ -0,0 +1,156 @@ +# modnet_hrnet18_matting + +|Module Name|modnet_hrnet18_matting| +| :--- | :---: | +|Category|Image Segmentation| +|Network|modnet_mobilenetv2| +|Dataset|Baidu self-built dataset| +|Support Fine-tuning|No| +|Module Size|60MB| +|Data Indicators|SAD77.96| +|Latest update date|2021-12-03| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ + +

+ +- ### Module Introduction + + - Mating is the technique of extracting foreground from an image by calculating its color and transparency. It is widely used in the film industry to replace background, image composition, and visual effects. Each pixel in the image will have a value that represents its foreground transparency, called Alpha. The set of all Alpha values in an image is called Alpha Matte. The part of the image covered by the mask can be extracted to complete foreground separation. + + + + - For more information, please refer to: [modnet_hrnet18_matting](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.3/contrib/Matting) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 + + - paddleseg >= 2.3.0 + + +- ### 2、Installation + + - ```shell + $ hub install modnet_hrnet18_matting + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run modnet_hrnet18_matting --input_path "/PATH/TO/IMAGE" + ``` + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="modnet_hrnet18_matting") + + result = model.predict(["/PATH/TO/IMAGE"]) + print(result) + ``` +- ### 3、API + + - ```python + def predict(self, + image_list, + trimap_list, + visualization, + save_path): + ``` + + - Prediction API for matting. + + - **Parameter** + + - image_list (list(str | numpy.ndarray)): Image path or image data, ndarray.shape is in the format \[H, W, C\],BGR. + - trimap_list(list(str | numpy.ndarray)): Trimap path or trimap data, ndarray.shape is in the format \[H, W],gray. Default is None + - visualization (bool): Whether to save the recognition results as picture files, default is False. + - save_path (str): Save path of images, "modnet_hrnet18_matting_output" by default. + + - **Return** + + - result (list(numpy.ndarray)):The list of model results. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of matting. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m modnet_hrnet18_matting + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + + ```python + import requests + import json + import cv2 + import base64 + import time + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/modnet_hrnet18_matting" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + for image in r.json()["results"]['data']: + data = base64_to_cv2(image) + image_path =str(time.time()) + ".png" + cv2.imwrite(image_path, data) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/matting/modnet_hrnet18_matting/hrnet.py b/modules/image/matting/modnet_hrnet18_matting/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..22cbd377bfd2c5c789f42c273de603d89fd8a24a --- /dev/null +++ b/modules/image/matting/modnet_hrnet18_matting/hrnet.py @@ -0,0 +1,652 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg.cvlibs import manager, param_init +from paddleseg.models import layers +from paddleseg.utils import utils + +__all__ = ["HRNet_W18"] + + +class HRNet(nn.Layer): + """ + The HRNet implementation based on PaddlePaddle. + + The original article refers to + Jingdong Wang, et, al. "HRNet:Deep High-Resolution Representation Learning for Visual Recognition" + (https://arxiv.org/pdf/1908.07919.pdf). + + Args: + pretrained (str, optional): The path of pretrained model. + stage1_num_modules (int, optional): Number of modules for stage1. Default 1. + stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4). + stage1_num_channels (list, optional): Number of channels per branch for stage1. Default (64). + stage2_num_modules (int, optional): Number of modules for stage2. Default 1. + stage2_num_blocks (list, optional): Number of blocks per module for stage2. Default (4, 4). + stage2_num_channels (list, optional): Number of channels per branch for stage2. Default (18, 36). + stage3_num_modules (int, optional): Number of modules for stage3. Default 4. + stage3_num_blocks (list, optional): Number of blocks per module for stage3. Default (4, 4, 4). + stage3_num_channels (list, optional): Number of channels per branch for stage3. Default [18, 36, 72). + stage4_num_modules (int, optional): Number of modules for stage4. Default 3. + stage4_num_blocks (list, optional): Number of blocks per module for stage4. Default (4, 4, 4, 4). + stage4_num_channels (list, optional): Number of channels per branch for stage4. Default (18, 36, 72. 144). + has_se (bool, optional): Whether to use Squeeze-and-Excitation module. Default False. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + """ + + def __init__(self, + input_channels: int=3, + pretrained: int = None, + stage1_num_modules: int = 1, + stage1_num_blocks: list = (4, ), + stage1_num_channels: list = (64, ), + stage2_num_modules: int = 1, + stage2_num_blocks: list = (4, 4), + stage2_num_channels: list = (18, 36), + stage3_num_modules: int = 4, + stage3_num_blocks: list = (4, 4, 4), + stage3_num_channels: list = (18, 36, 72), + stage4_num_modules: int = 3, + stage4_num_blocks: list = (4, 4, 4, 4), + stage4_num_channels: list = (18, 36, 72, 144), + has_se: bool = False, + align_corners: bool = False, + padding_same: bool = True): + super(HRNet, self).__init__() + self.pretrained = pretrained + self.stage1_num_modules = stage1_num_modules + self.stage1_num_blocks = stage1_num_blocks + self.stage1_num_channels = stage1_num_channels + self.stage2_num_modules = stage2_num_modules + self.stage2_num_blocks = stage2_num_blocks + self.stage2_num_channels = stage2_num_channels + self.stage3_num_modules = stage3_num_modules + self.stage3_num_blocks = stage3_num_blocks + self.stage3_num_channels = stage3_num_channels + self.stage4_num_modules = stage4_num_modules + self.stage4_num_blocks = stage4_num_blocks + self.stage4_num_channels = stage4_num_channels + self.has_se = has_se + self.align_corners = align_corners + + self.feat_channels = [i for i in stage4_num_channels] + self.feat_channels = [64] + self.feat_channels + + self.conv_layer1_1 = layers.ConvBNReLU( + in_channels=input_channels, + out_channels=64, + kernel_size=3, + stride=2, + padding=1 if not padding_same else 'same', + bias_attr=False) + + self.conv_layer1_2 = layers.ConvBNReLU( + in_channels=64, + out_channels=64, + kernel_size=3, + stride=2, + padding=1 if not padding_same else 'same', + bias_attr=False) + + self.la1 = Layer1( + num_channels=64, + num_blocks=self.stage1_num_blocks[0], + num_filters=self.stage1_num_channels[0], + has_se=has_se, + name="layer2", + padding_same=padding_same) + + self.tr1 = TransitionLayer( + in_channels=[self.stage1_num_channels[0] * 4], + out_channels=self.stage2_num_channels, + name="tr1", + padding_same=padding_same) + + self.st2 = Stage( + num_channels=self.stage2_num_channels, + num_modules=self.stage2_num_modules, + num_blocks=self.stage2_num_blocks, + num_filters=self.stage2_num_channels, + has_se=self.has_se, + name="st2", + align_corners=align_corners, + padding_same=padding_same) + + self.tr2 = TransitionLayer( + in_channels=self.stage2_num_channels, + out_channels=self.stage3_num_channels, + name="tr2", + padding_same=padding_same) + self.st3 = Stage( + num_channels=self.stage3_num_channels, + num_modules=self.stage3_num_modules, + num_blocks=self.stage3_num_blocks, + num_filters=self.stage3_num_channels, + has_se=self.has_se, + name="st3", + align_corners=align_corners, + padding_same=padding_same) + + self.tr3 = TransitionLayer( + in_channels=self.stage3_num_channels, + out_channels=self.stage4_num_channels, + name="tr3", + padding_same=padding_same) + self.st4 = Stage( + num_channels=self.stage4_num_channels, + num_modules=self.stage4_num_modules, + num_blocks=self.stage4_num_blocks, + num_filters=self.stage4_num_channels, + has_se=self.has_se, + name="st4", + align_corners=align_corners, + padding_same=padding_same) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + feat_list = [] + conv1 = self.conv_layer1_1(x) + feat_list.append(conv1) + conv2 = self.conv_layer1_2(conv1) + + la1 = self.la1(conv2) + + tr1 = self.tr1([la1]) + st2 = self.st2(tr1) + + tr2 = self.tr2(st2) + st3 = self.st3(tr2) + + tr3 = self.tr3(st3) + st4 = self.st4(tr3) + + feat_list = feat_list + st4 + + return feat_list + + +class Layer1(nn.Layer): + def __init__(self, + num_channels: int, + num_filters: int, + num_blocks: int, + has_se: bool = False, + name: str = None, + padding_same: bool = True): + super(Layer1, self).__init__() + + self.bottleneck_block_list = [] + + for i in range(num_blocks): + bottleneck_block = self.add_sublayer( + "bb_{}_{}".format(name, i + 1), + BottleneckBlock( + num_channels=num_channels if i == 0 else num_filters * 4, + num_filters=num_filters, + has_se=has_se, + stride=1, + downsample=True if i == 0 else False, + name=name + '_' + str(i + 1), + padding_same=padding_same)) + self.bottleneck_block_list.append(bottleneck_block) + + def forward(self, x: paddle.Tensor): + conv = x + for block_func in self.bottleneck_block_list: + conv = block_func(conv) + return conv + + +class TransitionLayer(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + name: str = None, + padding_same: bool = True): + super(TransitionLayer, self).__init__() + + num_in = len(in_channels) + num_out = len(out_channels) + self.conv_bn_func_list = [] + for i in range(num_out): + residual = None + if i < num_in: + if in_channels[i] != out_channels[i]: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + layers.ConvBNReLU( + in_channels=in_channels[i], + out_channels=out_channels[i], + kernel_size=3, + padding=1 if not padding_same else 'same', + bias_attr=False)) + else: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + layers.ConvBNReLU( + in_channels=in_channels[-1], + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding=1 if not padding_same else 'same', + bias_attr=False)) + self.conv_bn_func_list.append(residual) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + for idx, conv_bn_func in enumerate(self.conv_bn_func_list): + if conv_bn_func is None: + outs.append(x[idx]) + else: + if idx < len(x): + outs.append(conv_bn_func(x[idx])) + else: + outs.append(conv_bn_func(x[-1])) + return outs + + +class Branches(nn.Layer): + def __init__(self, + num_blocks: int, + in_channels: int, + out_channels: int, + has_se: bool = False, + name: str = None, + padding_same: bool = True): + super(Branches, self).__init__() + + self.basic_block_list = [] + + for i in range(len(out_channels)): + self.basic_block_list.append([]) + for j in range(num_blocks[i]): + in_ch = in_channels[i] if j == 0 else out_channels[i] + basic_block_func = self.add_sublayer( + "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1), + BasicBlock( + num_channels=in_ch, + num_filters=out_channels[i], + has_se=has_se, + name=name + '_branch_layer_' + str(i + 1) + '_' + + str(j + 1), + padding_same=padding_same)) + self.basic_block_list[i].append(basic_block_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + for idx, input in enumerate(x): + conv = input + for basic_block_func in self.basic_block_list[idx]: + conv = basic_block_func(conv) + outs.append(conv) + return outs + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels: int, + num_filters: int, + has_se: bool, + stride: int = 1, + downsample: bool = False, + name:str = None, + padding_same: bool = True): + super(BottleneckBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = layers.ConvBNReLU( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=1, + bias_attr=False) + + self.conv2 = layers.ConvBNReLU( + in_channels=num_filters, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding=1 if not padding_same else 'same', + bias_attr=False) + + self.conv3 = layers.ConvBN( + in_channels=num_filters, + out_channels=num_filters * 4, + kernel_size=1, + bias_attr=False) + + if self.downsample: + self.conv_down = layers.ConvBN( + in_channels=num_channels, + out_channels=num_filters * 4, + kernel_size=1, + bias_attr=False) + + if self.has_se: + self.se = SELayer( + num_channels=num_filters * 4, + num_filters=num_filters * 4, + reduction_ratio=16, + name=name + '_fc') + + self.add = layers.Add() + self.relu = layers.Activation("relu") + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + conv3 = self.conv3(conv2) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv3 = self.se(conv3) + + y = self.add(conv3, residual) + y = self.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + num_channels: int, + num_filters: int, + stride: int = 1, + has_se: bool = False, + downsample: bool = False, + name: str = None, + padding_same: bool = True): + super(BasicBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = layers.ConvBNReLU( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding=1 if not padding_same else 'same', + bias_attr=False) + self.conv2 = layers.ConvBN( + in_channels=num_filters, + out_channels=num_filters, + kernel_size=3, + padding=1 if not padding_same else 'same', + bias_attr=False) + + if self.downsample: + self.conv_down = layers.ConvBNReLU( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=1, + bias_attr=False) + + if self.has_se: + self.se = SELayer( + num_channels=num_filters, + num_filters=num_filters, + reduction_ratio=16, + name=name + '_fc') + + self.add = layers.Add() + self.relu = layers.Activation("relu") + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv2 = self.se(conv2) + + y = self.add(conv2, residual) + y = self.relu(y) + return y + + +class SELayer(nn.Layer): + def __init__(self, num_channels: int, num_filters: int, reduction_ratio: int, name: str = None): + super(SELayer, self).__init__() + + self.pool2d_gap = nn.AdaptiveAvgPool2D(1) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = nn.Linear( + num_channels, + med_ch, + weight_attr=paddle.ParamAttr( + initializer=nn.initializer.Uniform(-stdv, stdv))) + + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = nn.Linear( + med_ch, + num_filters, + weight_attr=paddle.ParamAttr( + initializer=nn.initializer.Uniform(-stdv, stdv))) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + pool = self.pool2d_gap(x) + pool = paddle.reshape(pool, shape=[-1, self._num_channels]) + squeeze = self.squeeze(pool) + squeeze = F.relu(squeeze) + excitation = self.excitation(squeeze) + excitation = F.sigmoid(excitation) + excitation = paddle.reshape( + excitation, shape=[-1, self._num_channels, 1, 1]) + out = x * excitation + return out + + +class Stage(nn.Layer): + def __init__(self, + num_channels: int, + num_modules: int, + num_blocks: int, + num_filters: int, + has_se: bool = False, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False, + padding_same: bool = True): + super(Stage, self).__init__() + + self._num_modules = num_modules + + self.stage_func_list = [] + for i in range(num_modules): + if i == num_modules - 1 and not multi_scale_output: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + multi_scale_output=False, + name=name + '_' + str(i + 1), + align_corners=align_corners, + padding_same=padding_same)) + else: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + name=name + '_' + str(i + 1), + align_corners=align_corners, + padding_same=padding_same)) + + self.stage_func_list.append(stage_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out = x + for idx in range(self._num_modules): + out = self.stage_func_list[idx](out) + return out + + +class HighResolutionModule(nn.Layer): + def __init__(self, + num_channels: int, + num_blocks: int, + num_filters: int, + has_se: bool = False, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False, + padding_same: bool = True): + super(HighResolutionModule, self).__init__() + + self.branches_func = Branches( + num_blocks=num_blocks, + in_channels=num_channels, + out_channels=num_filters, + has_se=has_se, + name=name, + padding_same=padding_same) + + self.fuse_func = FuseLayers( + in_channels=num_filters, + out_channels=num_filters, + multi_scale_output=multi_scale_output, + name=name, + align_corners=align_corners, + padding_same=padding_same) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out = self.branches_func(x) + out = self.fuse_func(out) + return out + + +class FuseLayers(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False, + padding_same: bool = True): + super(FuseLayers, self).__init__() + + self._actual_ch = len(in_channels) if multi_scale_output else 1 + self._in_channels = in_channels + self.align_corners = align_corners + + self.residual_func_list = [] + for i in range(self._actual_ch): + for j in range(len(in_channels)): + if j > i: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}".format(name, i + 1, j + 1), + layers.ConvBN( + in_channels=in_channels[j], + out_channels=out_channels[i], + kernel_size=1, + bias_attr=False)) + self.residual_func_list.append(residual_func) + elif j < i: + pre_num_filters = in_channels[j] + for k in range(i - j): + if k == i - j - 1: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format( + name, i + 1, j + 1, k + 1), + layers.ConvBN( + in_channels=pre_num_filters, + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding=1 if not padding_same else 'same', + bias_attr=False)) + pre_num_filters = out_channels[i] + else: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format( + name, i + 1, j + 1, k + 1), + layers.ConvBNReLU( + in_channels=pre_num_filters, + out_channels=out_channels[j], + kernel_size=3, + stride=2, + padding=1 if not padding_same else 'same', + bias_attr=False)) + pre_num_filters = out_channels[j] + self.residual_func_list.append(residual_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + residual_func_idx = 0 + for i in range(self._actual_ch): + residual = x[i] + residual_shape = paddle.shape(residual)[-2:] + for j in range(len(self._in_channels)): + if j > i: + y = self.residual_func_list[residual_func_idx](x[j]) + residual_func_idx += 1 + + y = F.interpolate( + y, + residual_shape, + mode='bilinear', + align_corners=self.align_corners) + residual = residual + y + elif j < i: + y = x[j] + for k in range(i - j): + y = self.residual_func_list[residual_func_idx](y) + residual_func_idx += 1 + + residual = residual + y + + residual = F.relu(residual) + outs.append(residual) + + return outs + + +def HRNet_W18(**kwargs): + model = HRNet( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[18, 36], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[18, 36, 72, 144], + **kwargs) + return model \ No newline at end of file diff --git a/modules/image/matting/modnet_hrnet18_matting/module.py b/modules/image/matting/modnet_hrnet18_matting/module.py new file mode 100644 index 0000000000000000000000000000000000000000..dd1edbbf7931a92f2ffc03aaf51a35df8b5f2f58 --- /dev/null +++ b/modules/image/matting/modnet_hrnet18_matting/module.py @@ -0,0 +1,513 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +import argparse +from typing import Callable, Union, List, Tuple + +import numpy as np +import cv2 +import scipy +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.module import moduleinfo, runnable, serving + +from modnet_hrnet18_matting.hrnet import HRNet_W18 +import modnet_hrnet18_matting.processor as P + + +@moduleinfo( + name="modnet_hrnet18_matting", + type="CV/matting", + author="paddlepaddle", + summary="modnet_hrnet18_matting is a matting model", + version="1.0.0" +) +class MODNetHRNet18(nn.Layer): + """ + The MODNet implementation based on PaddlePaddle. + + The original article refers to + Zhanghan Ke, et, al. "Is a Green Screen Really Necessary for Real-Time Portrait Matting?" + (https://arxiv.org/pdf/2011.11961.pdf). + + Args: + hr_channels(int, optional): The channels of high resolutions branch. Defautl: None. + pretrained(str, optional): The path of pretrianed model. Defautl: None. + """ + + def __init__(self, hr_channels:int = 32, pretrained=None): + super(MODNetHRNet18, self).__init__() + + self.backbone = HRNet_W18() + self.pretrained = pretrained + + self.head = MODNetHead( + hr_channels=hr_channels, backbone_channels=self.backbone.feat_channels) + self.blurer = GaussianBlurLayer(1, 3) + self.transforms = P.Compose([P.LoadImages(), P.ResizeByShort(), P.ResizeToIntMult(), P.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'modnet-hrnet_w18.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def preprocess(self, img: Union[str, np.ndarray] , transforms: Callable, trimap: Union[str, np.ndarray] = None): + data = {} + data['img'] = img + if trimap is not None: + data['trimap'] = trimap + data['gt_fields'] = ['trimap'] + data['trans_info'] = [] + data = self.transforms(data) + data['img'] = paddle.to_tensor(data['img']) + data['img'] = data['img'].unsqueeze(0) + if trimap is not None: + data['trimap'] = paddle.to_tensor(data['trimap']) + data['trimap'] = data['trimap'].unsqueeze((0, 1)) + + return data + + def forward(self, inputs: dict) -> paddle.Tensor: + x = inputs['img'] + feat_list = self.backbone(x) + y = self.head(inputs=inputs, feat_list=feat_list) + return y + + def predict(self, image_list: list, trimap_list: list = None, visualization: bool =False, save_path: str = "modnet_hrnet18_matting_output") -> list: + self.eval() + result= [] + with paddle.no_grad(): + for i, im_path in enumerate(image_list): + trimap = trimap_list[i] if trimap_list is not None else None + data = self.preprocess(img=im_path, transforms=self.transforms, trimap=trimap) + alpha_pred = self.forward(data) + alpha_pred = P.reverse_transform(alpha_pred, data['trans_info']) + alpha_pred = (alpha_pred.numpy()).squeeze() + alpha_pred = (alpha_pred * 255).astype('uint8') + alpha_pred = P.save_alpha_pred(alpha_pred, trimap) + result.append(alpha_pred) + if visualization: + if not os.path.exists(save_path): + os.makedirs(save_path) + img_name = str(time.time()) + '.png' + image_save_path = os.path.join(save_path, img_name) + cv2.imwrite(image_save_path, alpha_pred) + + return result + + @serving + def serving_method(self, images: list, trimaps:list = None, **kwargs) -> dict: + """ + Run as a service. + """ + images_decode = [P.base64_to_cv2(image) for image in images] + if trimaps is not None: + trimap_decoder = [cv2.cvtColor(P.base64_to_cv2(trimap), cv2.COLOR_BGR2GRAY) for trimap in trimaps] + else: + trimap_decoder = None + + outputs = self.predict(image_list=images_decode, trimap_list= trimap_decoder, **kwargs) + serving_data = [P.cv2_to_base64(outputs[i]) for i in range(len(outputs))] + results = {'data': serving_data} + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + if args.trimap_path is not None: + trimap_list = [args.trimap_path] + else: + trimap_list = None + + results = self.predict(image_list=[args.input_path], trimap_list=trimap_list, save_path=args.output_dir, visualization=args.visualization) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + + self.arg_config_group.add_argument( + '--output_dir', type=str, default="modnet_hrnet18_matting_output", help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=bool, default=True, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--trimap_path', type=str, default=None, help="path to image.") + + + +class MODNetHead(nn.Layer): + """ + Segmentation head. + """ + def __init__(self, hr_channels: int, backbone_channels: int): + super().__init__() + + self.lr_branch = LRBranch(backbone_channels) + self.hr_branch = HRBranch(hr_channels, backbone_channels) + self.f_branch = FusionBranch(hr_channels, backbone_channels) + + def forward(self, inputs: paddle.Tensor, feat_list: list): + pred_semantic, lr8x, [enc2x, enc4x] = self.lr_branch(feat_list) + pred_detail, hr2x = self.hr_branch(inputs['img'], enc2x, enc4x, lr8x) + pred_matte = self.f_branch(inputs['img'], lr8x, hr2x) + + if self.training: + logit_dict = { + 'semantic': pred_semantic, + 'detail': pred_detail, + 'matte': pred_matte + } + return logit_dict + else: + return pred_matte + + + +class FusionBranch(nn.Layer): + def __init__(self, hr_channels: int, enc_channels: int): + super().__init__() + self.conv_lr4x = Conv2dIBNormRelu( + enc_channels[2], hr_channels, 5, stride=1, padding=2) + + self.conv_f2x = Conv2dIBNormRelu( + 2 * hr_channels, hr_channels, 3, stride=1, padding=1) + self.conv_f = nn.Sequential( + Conv2dIBNormRelu( + hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1), + Conv2dIBNormRelu( + int(hr_channels / 2), + 1, + 1, + stride=1, + padding=0, + with_ibn=False, + with_relu=False)) + + def forward(self, img: paddle.Tensor, lr8x: paddle.Tensor, hr2x: paddle.Tensor): + lr4x = F.interpolate( + lr8x, scale_factor=2, mode='bilinear', align_corners=False) + lr4x = self.conv_lr4x(lr4x) + lr2x = F.interpolate( + lr4x, scale_factor=2, mode='bilinear', align_corners=False) + + f2x = self.conv_f2x(paddle.concat((lr2x, hr2x), axis=1)) + f = F.interpolate( + f2x, scale_factor=2, mode='bilinear', align_corners=False) + f = self.conv_f(paddle.concat((f, img), axis=1)) + pred_matte = F.sigmoid(f) + + return pred_matte + + +class HRBranch(nn.Layer): + """ + High Resolution Branch of MODNet + """ + + def __init__(self, hr_channels: int, enc_channels:int): + super().__init__() + + self.tohr_enc2x = Conv2dIBNormRelu( + enc_channels[0], hr_channels, 1, stride=1, padding=0) + self.conv_enc2x = Conv2dIBNormRelu( + hr_channels + 3, hr_channels, 3, stride=2, padding=1) + + self.tohr_enc4x = Conv2dIBNormRelu( + enc_channels[1], hr_channels, 1, stride=1, padding=0) + self.conv_enc4x = Conv2dIBNormRelu( + 2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1) + + self.conv_hr4x = nn.Sequential( + Conv2dIBNormRelu( + 2 * hr_channels + enc_channels[2] + 3, + 2 * hr_channels, + 3, + stride=1, + padding=1), + Conv2dIBNormRelu( + 2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu( + 2 * hr_channels, hr_channels, 3, stride=1, padding=1)) + + self.conv_hr2x = nn.Sequential( + Conv2dIBNormRelu( + 2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu( + 2 * hr_channels, hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1)) + + self.conv_hr = nn.Sequential( + Conv2dIBNormRelu( + hr_channels + 3, hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu( + hr_channels, + 1, + 1, + stride=1, + padding=0, + with_ibn=False, + with_relu=False)) + + def forward(self, img: paddle.Tensor, enc2x: paddle.Tensor, enc4x: paddle.Tensor, lr8x: paddle.Tensor): + img2x = F.interpolate( + img, scale_factor=1 / 2, mode='bilinear', align_corners=False) + img4x = F.interpolate( + img, scale_factor=1 / 4, mode='bilinear', align_corners=False) + + enc2x = self.tohr_enc2x(enc2x) + hr4x = self.conv_enc2x(paddle.concat((img2x, enc2x), axis=1)) + + enc4x = self.tohr_enc4x(enc4x) + hr4x = self.conv_enc4x(paddle.concat((hr4x, enc4x), axis=1)) + + lr4x = F.interpolate( + lr8x, scale_factor=2, mode='bilinear', align_corners=False) + hr4x = self.conv_hr4x(paddle.concat((hr4x, lr4x, img4x), axis=1)) + + hr2x = F.interpolate( + hr4x, scale_factor=2, mode='bilinear', align_corners=False) + hr2x = self.conv_hr2x(paddle.concat((hr2x, enc2x), axis=1)) + + pred_detail = None + if self.training: + hr = F.interpolate( + hr2x, scale_factor=2, mode='bilinear', align_corners=False) + hr = self.conv_hr(paddle.concat((hr, img), axis=1)) + pred_detail = F.sigmoid(hr) + + return pred_detail, hr2x + + +class LRBranch(nn.Layer): + """ + Low Resolution Branch of MODNet + """ + def __init__(self, backbone_channels: int): + super().__init__() + self.se_block = SEBlock(backbone_channels[4], reduction=4) + self.conv_lr16x = Conv2dIBNormRelu( + backbone_channels[4], backbone_channels[3], 5, stride=1, padding=2) + self.conv_lr8x = Conv2dIBNormRelu( + backbone_channels[3], backbone_channels[2], 5, stride=1, padding=2) + self.conv_lr = Conv2dIBNormRelu( + backbone_channels[2], + 1, + 3, + stride=2, + padding=1, + with_ibn=False, + with_relu=False) + + def forward(self, feat_list: list): + enc2x, enc4x, enc32x = feat_list[0], feat_list[1], feat_list[4] + + enc32x = self.se_block(enc32x) + lr16x = F.interpolate( + enc32x, scale_factor=2, mode='bilinear', align_corners=False) + lr16x = self.conv_lr16x(lr16x) + lr8x = F.interpolate( + lr16x, scale_factor=2, mode='bilinear', align_corners=False) + lr8x = self.conv_lr8x(lr8x) + + pred_semantic = None + if self.training: + lr = self.conv_lr(lr8x) + pred_semantic = F.sigmoid(lr) + + return pred_semantic, lr8x, [enc2x, enc4x] + + +class IBNorm(nn.Layer): + """ + Combine Instance Norm and Batch Norm into One Layer + """ + + def __init__(self, in_channels: int): + super().__init__() + self.bnorm_channels = in_channels // 2 + self.inorm_channels = in_channels - self.bnorm_channels + + self.bnorm = nn.BatchNorm2D(self.bnorm_channels) + self.inorm = nn.InstanceNorm2D(self.inorm_channels) + + def forward(self, x): + bn_x = self.bnorm(x[:, :self.bnorm_channels, :, :]) + in_x = self.inorm(x[:, self.bnorm_channels:, :, :]) + + return paddle.concat((bn_x, in_x), 1) + + +class Conv2dIBNormRelu(nn.Layer): + """ + Convolution + IBNorm + Relu + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + padding: int = 0, + dilation:int = 1, + groups: int = 1, + bias_attr: paddle.ParamAttr = None, + with_ibn: bool = True, + with_relu: bool = True): + + super().__init__() + + layers = [ + nn.Conv2D( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias_attr=bias_attr) + ] + + if with_ibn: + layers.append(IBNorm(out_channels)) + + if with_relu: + layers.append(nn.ReLU()) + + self.layers = nn.Sequential(*layers) + + def forward(self, x: paddle.Tensor): + return self.layers(x) + + +class SEBlock(nn.Layer): + """ + SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf + """ + + def __init__(self, num_channels: int, reduction:int = 1): + super().__init__() + self.pool = nn.AdaptiveAvgPool2D(1) + self.conv = nn.Sequential( + nn.Conv2D( + num_channels, + int(num_channels // reduction), + 1, + bias_attr=False), nn.ReLU(), + nn.Conv2D( + int(num_channels // reduction), + num_channels, + 1, + bias_attr=False), nn.Sigmoid()) + + def forward(self, x: paddle.Tensor): + w = self.pool(x) + w = self.conv(w) + return w * x + + +class GaussianBlurLayer(nn.Layer): + """ Add Gaussian Blur to a 4D tensors + This layer takes a 4D tensor of {N, C, H, W} as input. + The Gaussian blur will be performed in given channel number (C) splitly. + """ + + def __init__(self, channels: int, kernel_size: int): + """ + Args: + channels (int): Channel for input tensor + kernel_size (int): Size of the kernel used in blurring + """ + + super(GaussianBlurLayer, self).__init__() + self.channels = channels + self.kernel_size = kernel_size + assert self.kernel_size % 2 != 0 + + self.op = nn.Sequential( + nn.Pad2D(int(self.kernel_size / 2), mode='reflect'), + nn.Conv2D( + channels, + channels, + self.kernel_size, + stride=1, + padding=0, + bias_attr=False, + groups=channels)) + + self._init_kernel() + self.op[1].weight.stop_gradient = True + + def forward(self, x: paddle.Tensor): + """ + Args: + x (paddle.Tensor): input 4D tensor + Returns: + paddle.Tensor: Blurred version of the input + """ + + if not len(list(x.shape)) == 4: + print('\'GaussianBlurLayer\' requires a 4D tensor as input\n') + exit() + elif not x.shape[1] == self.channels: + print('In \'GaussianBlurLayer\', the required channel ({0}) is' + 'not the same as input ({1})\n'.format( + self.channels, x.shape[1])) + exit() + + return self.op(x) + + def _init_kernel(self): + sigma = 0.3 * ((self.kernel_size - 1) * 0.5 - 1) + 0.8 + + n = np.zeros((self.kernel_size, self.kernel_size)) + i = int(self.kernel_size / 2) + n[i, i] = 1 + kernel = scipy.ndimage.gaussian_filter(n, sigma) + kernel = kernel.astype('float32') + kernel = kernel[np.newaxis, np.newaxis, :, :] + paddle.assign(kernel, self.op[1].weight) \ No newline at end of file diff --git a/modules/image/matting/modnet_hrnet18_matting/processor.py b/modules/image/matting/modnet_hrnet18_matting/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..361c955390589469625aa985f6b75d5c95ed2e33 --- /dev/null +++ b/modules/image/matting/modnet_hrnet18_matting/processor.py @@ -0,0 +1,208 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import base64 +from typing import Callable, Union, List, Tuple + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +from paddleseg.transforms import functional +from PIL import Image + + +class Compose: + """ + Do transformation on input data with corresponding pre-processing and augmentation operations. + The shape of input data to all operations is [height, width, channels]. + """ + + def __init__(self, transforms: Callable, to_rgb: bool = True): + if not isinstance(transforms, list): + raise TypeError('The transforms must be a list!') + self.transforms = transforms + self.to_rgb = to_rgb + + def __call__(self, data: dict) -> dict: + + if 'trans_info' not in data: + data['trans_info'] = [] + for op in self.transforms: + data = op(data) + if data is None: + return None + + data['img'] = np.transpose(data['img'], (2, 0, 1)) + for key in data.get('gt_fields', []): + if len(data[key].shape) == 2: + continue + data[key] = np.transpose(data[key], (2, 0, 1)) + + return data + + +class LoadImages: + """ + Read images from image path. + + Args: + to_rgb (bool, optional): If converting image to RGB color space. Default: True. + """ + def __init__(self, to_rgb: bool = True): + self.to_rgb = to_rgb + + def __call__(self, data: dict) -> dict: + + if isinstance(data['img'], str): + data['img'] = cv2.imread(data['img']) + + for key in data.get('gt_fields', []): + if isinstance(data[key], str): + data[key] = cv2.imread(data[key], cv2.IMREAD_UNCHANGED) + # if alpha and trimap has 3 channels, extract one. + if key in ['alpha', 'trimap']: + if len(data[key].shape) > 2: + data[key] = data[key][:, :, 0] + + if self.to_rgb: + data['img'] = cv2.cvtColor(data['img'], cv2.COLOR_BGR2RGB) + for key in data.get('gt_fields', []): + if len(data[key].shape) == 2: + continue + data[key] = cv2.cvtColor(data[key], cv2.COLOR_BGR2RGB) + + return data + + +class ResizeByShort: + """ + Resize the short side of an image to given size, and then scale the other side proportionally. + + Args: + short_size (int): The target size of short side. + """ + + def __init__(self, short_size: int =512): + self.short_size = short_size + + def __call__(self, data: dict) -> dict: + + data['trans_info'].append(('resize', data['img'].shape[0:2])) + data['img'] = functional.resize_short(data['img'], self.short_size) + for key in data.get('gt_fields', []): + data[key] = functional.resize_short(data[key], self.short_size) + return data + + +class ResizeToIntMult: + """ + Resize to some int muitple, d.g. 32. + """ + + def __init__(self, mult_int: int = 32): + self.mult_int = mult_int + + def __call__(self, data: dict) -> dict: + data['trans_info'].append(('resize', data['img'].shape[0:2])) + + h, w = data['img'].shape[0:2] + rw = w - w % 32 + rh = h - h % 32 + data['img'] = functional.resize(data['img'], (rw, rh)) + for key in data.get('gt_fields', []): + data[key] = functional.resize(data[key], (rw, rh)) + + return data + + +class Normalize: + """ + Normalize an image. + + Args: + mean (list, optional): The mean value of a data set. Default: [0.5, 0.5, 0.5]. + std (list, optional): The standard deviation of a data set. Default: [0.5, 0.5, 0.5]. + + Raises: + ValueError: When mean/std is not list or any value in std is 0. + """ + + def __init__(self, mean: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5), std: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5)): + self.mean = mean + self.std = std + if not (isinstance(self.mean, (list, tuple)) + and isinstance(self.std, (list, tuple))): + raise ValueError( + "{}: input type is invalid. It should be list or tuple".format( + self)) + from functools import reduce + if reduce(lambda x, y: x * y, self.std) == 0: + raise ValueError('{}: std is invalid!'.format(self)) + + def __call__(self, data: dict) -> dict: + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + data['img'] = functional.normalize(data['img'], mean, std) + if 'fg' in data.get('gt_fields', []): + data['fg'] = functional.normalize(data['fg'], mean, std) + if 'bg' in data.get('gt_fields', []): + data['bg'] = functional.normalize(data['bg'], mean, std) + + return data + + +def reverse_transform(alpha: paddle.Tensor, trans_info: List[str]): + """recover pred to origin shape""" + for item in trans_info[::-1]: + if item[0] == 'resize': + h, w = item[1][0], item[1][1] + alpha = F.interpolate(alpha, [h, w], mode='bilinear') + elif item[0] == 'padding': + h, w = item[1][0], item[1][1] + alpha = alpha[:, :, 0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format(item[0])) + return alpha + +def save_alpha_pred(alpha: np.ndarray, trimap: Union[np.ndarray, str] = None): + """ + The value of alpha is range [0, 1], shape should be [h,w] + """ + if isinstance(trimap, str): + trimap = cv2.imread(trimap, 0) + + alpha[trimap == 0] = 0 + alpha[trimap == 255] = 255 + alpha = (alpha).astype('uint8') + return alpha + + +def cv2_to_base64(image: np.ndarray): + """ + Convert data from BGR to base64 format. + """ + data = cv2.imencode('.png', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str: str): + """ + Convert data from base64 to BGR format. + """ + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data \ No newline at end of file diff --git a/modules/image/matting/modnet_mobilenetv2_matting/README.md b/modules/image/matting/modnet_mobilenetv2_matting/README.md new file mode 100644 index 0000000000000000000000000000000000000000..51b8691624e36da0648a1c5fc4f5c670b81a4cde --- /dev/null +++ b/modules/image/matting/modnet_mobilenetv2_matting/README.md @@ -0,0 +1,155 @@ +# modnet_mobilenetv2_matting + +|模型名称|modnet_mobilenetv2_matting| +| :--- | :---: | +|类别|图像-抠图| +|网络|modnet_mobilenetv2| +|数据集|百度自建数据集| +|是否支持Fine-tuning|否| +|模型大小|38MB| +|指标|SAD112.73| +|最新更新日期|2021-12-03| + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 样例结果示例(左为原图,右为效果图): +

+ + +

+ +- ### 模型介绍 + + - Matting(精细化分割/影像去背/抠图)是指借由计算前景的颜色和透明度,将前景从影像中撷取出来的技术,可用于替换背景、影像合成、视觉特效,在电影工业中被广泛地使用。影像中的每个像素会有代表其前景透明度的值,称作阿法值(Alpha),一张影像中所有阿法值的集合称作阿法遮罩(Alpha Matte),将影像被遮罩所涵盖的部分取出即可完成前景的分离。modnet_mobilenetv2_matting可生成抠图结果。 + + + + - 更多详情请参考:[modnet_mobilenetv2_matting](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.3/contrib/Matting) + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 + + - paddleseg >= 2.3.0 + + +- ### 2、安装 + + - ```shell + $ hub install modnet_mobilenetv2_matting + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run modnet_mobilenetv2_matting --input_path "/PATH/TO/IMAGE" + ``` + + - 通过命令行方式实现hub模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="modnet_mobilenetv2_matting") + + result = model.predict(["/PATH/TO/IMAGE"]) + print(result) + ``` +- ### 3、API + + - ```python + def predict(self, + image_list, + trimap_list, + visualization, + save_path): + ``` + + - 人像matting预测API,用于将输入图片中的人像分割出来。 + + - 参数 + + - image_list (list(str | numpy.ndarray)):图片输入路径或者BGR格式numpy数据。 + - trimap_list(list(str | numpy.ndarray)):trimap输入路径或者灰度图单通道格式图片。默认为None。 + - visualization (bool): 是否进行可视化,默认为False。 + - save_path (str): 当visualization为True时,保存图片的路径,默认为"modnet_mobilenetv2_matting_output"。 + + - 返回 + + - result (list(numpy.ndarray)):模型分割结果: + + +## 四、服务部署 + +- PaddleHub Serving可以部署人像matting在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m modnet_mobilenetv2_matting + ``` + + - 这样就完成了一个人像matting在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + import time + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/modnet_mobilenetv2_matting" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + for image in r.json()["results"]['data']: + data = base64_to_cv2(image) + image_path =str(time.time()) + ".png" + cv2.imwrite(image_path, data) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/matting/modnet_mobilenetv2_matting/README_en.md b/modules/image/matting/modnet_mobilenetv2_matting/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..a85aa07e9200e7d80756c0c67958a7f42215cf85 --- /dev/null +++ b/modules/image/matting/modnet_mobilenetv2_matting/README_en.md @@ -0,0 +1,156 @@ +# modnet_mobilenetv2_matting + +|Module Name|modnet_mobilenetv2_matting| +| :--- | :---: | +|Category|Image Matting| +|Network|modnet_mobilenetv2| +|Dataset|Baidu self-built dataset| +|Support Fine-tuning|No| +|Module Size|38MB| +|Data Indicators|SAD112.73| +|Latest update date|2021-12-03| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ + +

+ +- ### Module Introduction + + - Mating is the technique of extracting foreground from an image by calculating its color and transparency. It is widely used in the film industry to replace background, image composition, and visual effects. Each pixel in the image will have a value that represents its foreground transparency, called Alpha. The set of all Alpha values in an image is called Alpha Matte. The part of the image covered by the mask can be extracted to complete foreground separation. + + + + - For more information, please refer to: [modnet_mobilenetv2_matting](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.3/contrib/Matting) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 + + - paddleseg >= 2.3.0 + + +- ### 2、Installation + + - ```shell + $ hub install modnet_mobilenetv2_matting + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run modnet_mobilenetv2_matting --input_path "/PATH/TO/IMAGE" + ``` + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="modnet_mobilenetv2_matting") + + result = model.predict(image_list=["/PATH/TO/IMAGE"]) + print(result) + ``` +- ### 3、API + + - ```python + def predict(self, + image_list, + trimap_list, + visualization, + save_path): + ``` + + - Prediction API for matting. + + - **Parameter** + + - image_list (list(str | numpy.ndarray)): Image path or image data, ndarray.shape is in the format \[H, W, C\],BGR. + - trimap_list(list(str | numpy.ndarray)): Trimap path or trimap data, ndarray.shape is in the format \[H, W],gray. Default is None. + - visualization (bool): Whether to save the recognition results as picture files, default is False. + - save_path (str): Save path of images, "modnet_mobilenetv2_matting_output" by default. + + - **Return** + + - result (list(numpy.ndarray)):The list of model results. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of matting. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m modnet_mobilenetv2_matting + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + + ```python + import requests + import json + import cv2 + import base64 + import time + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/modnet_mobilenetv2_matting" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + for image in r.json()["results"]['data']: + data = base64_to_cv2(image) + image_path =str(time.time()) + ".png" + cv2.imwrite(image_path, data) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/matting/modnet_mobilenetv2_matting/mobilenetv2.py b/modules/image/matting/modnet_mobilenetv2_matting/mobilenetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..8895104a34073143ae17c1021519650dad022aeb --- /dev/null +++ b/modules/image/matting/modnet_mobilenetv2_matting/mobilenetv2.py @@ -0,0 +1,224 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import numpy as np +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, BatchNorm, Linear, Dropout +from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D + +from paddleseg import utils +from paddleseg.cvlibs import manager + + +__all__ = ["MobileNetV2"] + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + def __init__(self, + num_channels: int, + filter_size: int, + num_filters: int, + stride: int, + padding: int, + num_groups: int=1, + name: str = None, + use_cudnn: bool = True): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + weight_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + param_attr=ParamAttr(name=name + "_bn_scale"), + bias_attr=ParamAttr(name=name + "_bn_offset"), + moving_mean_name=name + "_bn_mean", + moving_variance_name=name + "_bn_variance") + + def forward(self, inputs: paddle.Tensor, if_act: bool = True) -> paddle.Tensor: + y = self._conv(inputs) + y = self._batch_norm(y) + if if_act: + y = F.relu6(y) + return y + + +class InvertedResidualUnit(nn.Layer): + """Inverted residual block""" + def __init__(self, num_channels: int, num_in_filter: int, num_filters: int, stride: int, + filter_size: int, padding: int, expansion_factor: int, name: str): + super(InvertedResidualUnit, self).__init__() + num_expfilter = int(round(num_in_filter * expansion_factor)) + self._expand_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + name=name + "_expand") + + self._bottleneck_conv = ConvBNLayer( + num_channels=num_expfilter, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + use_cudnn=False, + name=name + "_dwise") + + self._linear_conv = ConvBNLayer( + num_channels=num_expfilter, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + name=name + "_linear") + + def forward(self, inputs: paddle.Tensor, ifshortcut: bool) -> paddle.Tensor: + y = self._expand_conv(inputs, if_act=True) + y = self._bottleneck_conv(y, if_act=True) + y = self._linear_conv(y, if_act=False) + if ifshortcut: + y = paddle.add(inputs, y) + return y + + +class InvresiBlocks(nn.Layer): + def __init__(self, in_c: int, t: int, c: int, n: int, s: int, name: str): + super(InvresiBlocks, self).__init__() + + self._first_block = InvertedResidualUnit( + num_channels=in_c, + num_in_filter=in_c, + num_filters=c, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + "_1") + + self._block_list = [] + for i in range(1, n): + block = self.add_sublayer( + name + "_" + str(i + 1), + sublayer=InvertedResidualUnit( + num_channels=c, + num_in_filter=c, + num_filters=c, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + "_" + str(i + 1))) + self._block_list.append(block) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self._first_block(inputs, ifshortcut=False) + for block in self._block_list: + y = block(y, ifshortcut=True) + return y + + +class MobileNet(nn.Layer): + """Networj of MobileNet""" + def __init__(self, + input_channels: int = 3, + scale: float = 1.0, + pretrained: str = None, + prefix_name: str = ""): + super(MobileNet, self).__init__() + self.scale = scale + + bottleneck_params_list = [ + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 3, 2), + (6, 64, 4, 2), + (6, 96, 3, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), + ] + + self.conv1 = ConvBNLayer( + num_channels=input_channels, + num_filters=int(32 * scale), + filter_size=3, + stride=2, + padding=1, + name=prefix_name + "conv1_1") + + self.block_list = [] + i = 1 + in_c = int(32 * scale) + for layer_setting in bottleneck_params_list: + t, c, n, s = layer_setting + i += 1 + block = self.add_sublayer( + prefix_name + "conv" + str(i), + sublayer=InvresiBlocks( + in_c=in_c, + t=t, + c=int(c * scale), + n=n, + s=s, + name=prefix_name + "conv" + str(i))) + self.block_list.append(block) + in_c = int(c * scale) + + self.out_c = int(1280 * scale) if scale > 1.0 else 1280 + self.conv9 = ConvBNLayer( + num_channels=in_c, + num_filters=self.out_c, + filter_size=1, + stride=1, + padding=0, + name=prefix_name + "conv9") + + self.feat_channels = [int(i * scale) for i in [16, 24, 32, 96, 1280]] + self.pretrained = pretrained + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + feat_list = [] + y = self.conv1(inputs, if_act=True) + + block_index = 0 + for block in self.block_list: + y = block(y) + if block_index in [0, 1, 2, 4]: + feat_list.append(y) + block_index += 1 + y = self.conv9(y, if_act=True) + feat_list.append(y) + return feat_list + + +def MobileNetV2(**kwargs): + model = MobileNet(scale=1.0, **kwargs) + return model diff --git a/modules/image/matting/modnet_mobilenetv2_matting/module.py b/modules/image/matting/modnet_mobilenetv2_matting/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e6a0e6cbeb4c7c60f069e2642c4593fc6a4cde93 --- /dev/null +++ b/modules/image/matting/modnet_mobilenetv2_matting/module.py @@ -0,0 +1,514 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +import argparse +from typing import Callable, Union, List, Tuple + +import numpy as np +import cv2 +import scipy +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.module import moduleinfo, runnable, serving + +from modnet_mobilenetv2_matting.mobilenetv2 import MobileNetV2 +import modnet_mobilenetv2_matting.processor as P + + +@moduleinfo( + name="modnet_mobilenetv2_matting", + type="CV", + author="paddlepaddle", + summary="modnet_mobilenetv2_matting is a matting model", + version="1.0.0" +) +class MODNetMobilenetV2(nn.Layer): + """ + The MODNet implementation based on PaddlePaddle. + + The original article refers to + Zhanghan Ke, et, al. "Is a Green Screen Really Necessary for Real-Time Portrait Matting?" + (https://arxiv.org/pdf/2011.11961.pdf). + + Args: + hr_channels(int, optional): The channels of high resolutions branch. Defautl: None. + pretrained(str, optional): The path of pretrianed model. Defautl: None. + + """ + + def __init__(self, hr_channels:int = 32, pretrained=None): + super(MODNetMobilenetV2, self).__init__() + + self.backbone = MobileNetV2() + self.pretrained = pretrained + + self.head = MODNetHead( + hr_channels=hr_channels, backbone_channels=self.backbone.feat_channels) + self.blurer = GaussianBlurLayer(1, 3) + self.transforms = P.Compose([P.LoadImages(), P.ResizeByShort(), P.ResizeToIntMult(), P.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'modnet-mobilenetv2.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def preprocess(self, img: Union[str, np.ndarray] , transforms: Callable, trimap: Union[str, np.ndarray] = None): + data = {} + data['img'] = img + if trimap is not None: + data['trimap'] = trimap + data['gt_fields'] = ['trimap'] + data['trans_info'] = [] + data = self.transforms(data) + data['img'] = paddle.to_tensor(data['img']) + data['img'] = data['img'].unsqueeze(0) + if trimap is not None: + data['trimap'] = paddle.to_tensor(data['trimap']) + data['trimap'] = data['trimap'].unsqueeze((0, 1)) + + return data + + def forward(self, inputs: dict): + x = inputs['img'] + feat_list = self.backbone(x) + y = self.head(inputs=inputs, feat_list=feat_list) + return y + + def predict(self, image_list: list, trimap_list: list = None, visualization: bool =False, save_path: str = "modnet_mobilenetv2_matting_output"): + self.eval() + result = [] + with paddle.no_grad(): + for i, im_path in enumerate(image_list): + trimap = trimap_list[i] if trimap_list is not None else None + data = self.preprocess(img=im_path, transforms=self.transforms, trimap=trimap) + alpha_pred = self.forward(data) + alpha_pred = P.reverse_transform(alpha_pred, data['trans_info']) + alpha_pred = (alpha_pred.numpy()).squeeze() + alpha_pred = (alpha_pred * 255).astype('uint8') + alpha_pred = P.save_alpha_pred(alpha_pred, trimap) + result.append(alpha_pred) + if visualization: + if not os.path.exists(save_path): + os.makedirs(save_path) + img_name = str(time.time()) + '.png' + image_save_path = os.path.join(save_path, img_name) + cv2.imwrite(image_save_path, alpha_pred) + + return result + + @serving + def serving_method(self, images: list, trimaps:list = None, **kwargs): + """ + Run as a service. + """ + images_decode = [P.base64_to_cv2(image) for image in images] + if trimaps is not None: + trimap_decoder = [cv2.cvtColor(P.base64_to_cv2(trimap), cv2.COLOR_BGR2GRAY) for trimap in trimaps] + else: + trimap_decoder = None + + outputs = self.predict(image_list=images_decode, trimap_list= trimap_decoder, **kwargs) + serving_data = [P.cv2_to_base64(outputs[i]) for i in range(len(outputs))] + results = {'data': serving_data} + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + if args.trimap_path is not None: + trimap_list = [args.trimap_path] + else: + trimap_list = None + + results = self.predict(image_list=[args.input_path], trimap_list=trimap_list, save_path=args.output_dir, visualization=args.visualization) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + + self.arg_config_group.add_argument( + '--output_dir', type=str, default="modnet_mobilenetv2_matting_output", help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=bool, default=True, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--trimap_path', type=str, default=None, help="path to image.") + + + +class MODNetHead(nn.Layer): + """ + Segmentation head. + """ + def __init__(self, hr_channels: int, backbone_channels: int): + super().__init__() + + self.lr_branch = LRBranch(backbone_channels) + self.hr_branch = HRBranch(hr_channels, backbone_channels) + self.f_branch = FusionBranch(hr_channels, backbone_channels) + + def forward(self, inputs: paddle.Tensor, feat_list: list): + pred_semantic, lr8x, [enc2x, enc4x] = self.lr_branch(feat_list) + pred_detail, hr2x = self.hr_branch(inputs['img'], enc2x, enc4x, lr8x) + pred_matte = self.f_branch(inputs['img'], lr8x, hr2x) + + if self.training: + logit_dict = { + 'semantic': pred_semantic, + 'detail': pred_detail, + 'matte': pred_matte + } + return logit_dict + else: + return pred_matte + + + +class FusionBranch(nn.Layer): + def __init__(self, hr_channels: int, enc_channels: int): + super().__init__() + self.conv_lr4x = Conv2dIBNormRelu( + enc_channels[2], hr_channels, 5, stride=1, padding=2) + + self.conv_f2x = Conv2dIBNormRelu( + 2 * hr_channels, hr_channels, 3, stride=1, padding=1) + self.conv_f = nn.Sequential( + Conv2dIBNormRelu( + hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1), + Conv2dIBNormRelu( + int(hr_channels / 2), + 1, + 1, + stride=1, + padding=0, + with_ibn=False, + with_relu=False)) + + def forward(self, img: paddle.Tensor, lr8x: paddle.Tensor, hr2x: paddle.Tensor): + lr4x = F.interpolate( + lr8x, scale_factor=2, mode='bilinear', align_corners=False) + lr4x = self.conv_lr4x(lr4x) + lr2x = F.interpolate( + lr4x, scale_factor=2, mode='bilinear', align_corners=False) + + f2x = self.conv_f2x(paddle.concat((lr2x, hr2x), axis=1)) + f = F.interpolate( + f2x, scale_factor=2, mode='bilinear', align_corners=False) + f = self.conv_f(paddle.concat((f, img), axis=1)) + pred_matte = F.sigmoid(f) + + return pred_matte + + +class HRBranch(nn.Layer): + """ + High Resolution Branch of MODNet + """ + + def __init__(self, hr_channels: int, enc_channels:int): + super().__init__() + + self.tohr_enc2x = Conv2dIBNormRelu( + enc_channels[0], hr_channels, 1, stride=1, padding=0) + self.conv_enc2x = Conv2dIBNormRelu( + hr_channels + 3, hr_channels, 3, stride=2, padding=1) + + self.tohr_enc4x = Conv2dIBNormRelu( + enc_channels[1], hr_channels, 1, stride=1, padding=0) + self.conv_enc4x = Conv2dIBNormRelu( + 2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1) + + self.conv_hr4x = nn.Sequential( + Conv2dIBNormRelu( + 2 * hr_channels + enc_channels[2] + 3, + 2 * hr_channels, + 3, + stride=1, + padding=1), + Conv2dIBNormRelu( + 2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu( + 2 * hr_channels, hr_channels, 3, stride=1, padding=1)) + + self.conv_hr2x = nn.Sequential( + Conv2dIBNormRelu( + 2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu( + 2 * hr_channels, hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1)) + + self.conv_hr = nn.Sequential( + Conv2dIBNormRelu( + hr_channels + 3, hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu( + hr_channels, + 1, + 1, + stride=1, + padding=0, + with_ibn=False, + with_relu=False)) + + def forward(self, img: paddle.Tensor, enc2x: paddle.Tensor, enc4x: paddle.Tensor, lr8x: paddle.Tensor): + img2x = F.interpolate( + img, scale_factor=1 / 2, mode='bilinear', align_corners=False) + img4x = F.interpolate( + img, scale_factor=1 / 4, mode='bilinear', align_corners=False) + + enc2x = self.tohr_enc2x(enc2x) + hr4x = self.conv_enc2x(paddle.concat((img2x, enc2x), axis=1)) + + enc4x = self.tohr_enc4x(enc4x) + hr4x = self.conv_enc4x(paddle.concat((hr4x, enc4x), axis=1)) + + lr4x = F.interpolate( + lr8x, scale_factor=2, mode='bilinear', align_corners=False) + hr4x = self.conv_hr4x(paddle.concat((hr4x, lr4x, img4x), axis=1)) + + hr2x = F.interpolate( + hr4x, scale_factor=2, mode='bilinear', align_corners=False) + hr2x = self.conv_hr2x(paddle.concat((hr2x, enc2x), axis=1)) + + pred_detail = None + if self.training: + hr = F.interpolate( + hr2x, scale_factor=2, mode='bilinear', align_corners=False) + hr = self.conv_hr(paddle.concat((hr, img), axis=1)) + pred_detail = F.sigmoid(hr) + + return pred_detail, hr2x + + +class LRBranch(nn.Layer): + """ + Low Resolution Branch of MODNet + """ + def __init__(self, backbone_channels: int): + super().__init__() + self.se_block = SEBlock(backbone_channels[4], reduction=4) + self.conv_lr16x = Conv2dIBNormRelu( + backbone_channels[4], backbone_channels[3], 5, stride=1, padding=2) + self.conv_lr8x = Conv2dIBNormRelu( + backbone_channels[3], backbone_channels[2], 5, stride=1, padding=2) + self.conv_lr = Conv2dIBNormRelu( + backbone_channels[2], + 1, + 3, + stride=2, + padding=1, + with_ibn=False, + with_relu=False) + + def forward(self, feat_list: list): + enc2x, enc4x, enc32x = feat_list[0], feat_list[1], feat_list[4] + + enc32x = self.se_block(enc32x) + lr16x = F.interpolate( + enc32x, scale_factor=2, mode='bilinear', align_corners=False) + lr16x = self.conv_lr16x(lr16x) + lr8x = F.interpolate( + lr16x, scale_factor=2, mode='bilinear', align_corners=False) + lr8x = self.conv_lr8x(lr8x) + + pred_semantic = None + if self.training: + lr = self.conv_lr(lr8x) + pred_semantic = F.sigmoid(lr) + + return pred_semantic, lr8x, [enc2x, enc4x] + + +class IBNorm(nn.Layer): + """ + Combine Instance Norm and Batch Norm into One Layer + """ + + def __init__(self, in_channels: int): + super().__init__() + self.bnorm_channels = in_channels // 2 + self.inorm_channels = in_channels - self.bnorm_channels + + self.bnorm = nn.BatchNorm2D(self.bnorm_channels) + self.inorm = nn.InstanceNorm2D(self.inorm_channels) + + def forward(self, x): + bn_x = self.bnorm(x[:, :self.bnorm_channels, :, :]) + in_x = self.inorm(x[:, self.bnorm_channels:, :, :]) + + return paddle.concat((bn_x, in_x), 1) + + +class Conv2dIBNormRelu(nn.Layer): + """ + Convolution + IBNorm + Relu + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + padding: int = 0, + dilation:int = 1, + groups: int = 1, + bias_attr: paddle.ParamAttr = None, + with_ibn: bool = True, + with_relu: bool = True): + + super().__init__() + + layers = [ + nn.Conv2D( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias_attr=bias_attr) + ] + + if with_ibn: + layers.append(IBNorm(out_channels)) + + if with_relu: + layers.append(nn.ReLU()) + + self.layers = nn.Sequential(*layers) + + def forward(self, x: paddle.Tensor): + return self.layers(x) + + +class SEBlock(nn.Layer): + """ + SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf + """ + + def __init__(self, num_channels: int, reduction:int = 1): + super().__init__() + self.pool = nn.AdaptiveAvgPool2D(1) + self.conv = nn.Sequential( + nn.Conv2D( + num_channels, + int(num_channels // reduction), + 1, + bias_attr=False), nn.ReLU(), + nn.Conv2D( + int(num_channels // reduction), + num_channels, + 1, + bias_attr=False), nn.Sigmoid()) + + def forward(self, x: paddle.Tensor): + w = self.pool(x) + w = self.conv(w) + return w * x + + +class GaussianBlurLayer(nn.Layer): + """ Add Gaussian Blur to a 4D tensors + This layer takes a 4D tensor of {N, C, H, W} as input. + The Gaussian blur will be performed in given channel number (C) splitly. + """ + + def __init__(self, channels: int, kernel_size: int): + """ + Args: + channels (int): Channel for input tensor + kernel_size (int): Size of the kernel used in blurring + """ + + super(GaussianBlurLayer, self).__init__() + self.channels = channels + self.kernel_size = kernel_size + assert self.kernel_size % 2 != 0 + + self.op = nn.Sequential( + nn.Pad2D(int(self.kernel_size / 2), mode='reflect'), + nn.Conv2D( + channels, + channels, + self.kernel_size, + stride=1, + padding=0, + bias_attr=False, + groups=channels)) + + self._init_kernel() + self.op[1].weight.stop_gradient = True + + def forward(self, x: paddle.Tensor): + """ + Args: + x (paddle.Tensor): input 4D tensor + Returns: + paddle.Tensor: Blurred version of the input + """ + + if not len(list(x.shape)) == 4: + print('\'GaussianBlurLayer\' requires a 4D tensor as input\n') + exit() + elif not x.shape[1] == self.channels: + print('In \'GaussianBlurLayer\', the required channel ({0}) is' + 'not the same as input ({1})\n'.format( + self.channels, x.shape[1])) + exit() + + return self.op(x) + + def _init_kernel(self): + sigma = 0.3 * ((self.kernel_size - 1) * 0.5 - 1) + 0.8 + + n = np.zeros((self.kernel_size, self.kernel_size)) + i = int(self.kernel_size / 2) + n[i, i] = 1 + kernel = scipy.ndimage.gaussian_filter(n, sigma) + kernel = kernel.astype('float32') + kernel = kernel[np.newaxis, np.newaxis, :, :] + paddle.assign(kernel, self.op[1].weight) \ No newline at end of file diff --git a/modules/image/matting/modnet_mobilenetv2_matting/processor.py b/modules/image/matting/modnet_mobilenetv2_matting/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..3ae79593f0d3dab19520c3c666ae4a06b81960dd --- /dev/null +++ b/modules/image/matting/modnet_mobilenetv2_matting/processor.py @@ -0,0 +1,207 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import base64 +from typing import Callable, Union, List, Tuple + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +from paddleseg.transforms import functional +from PIL import Image + + +class Compose: + """ + Do transformation on input data with corresponding pre-processing and augmentation operations. + The shape of input data to all operations is [height, width, channels]. + """ + + def __init__(self, transforms: Callable, to_rgb: bool = True): + if not isinstance(transforms, list): + raise TypeError('The transforms must be a list!') + self.transforms = transforms + self.to_rgb = to_rgb + + def __call__(self, data: dict) -> dict: + + if 'trans_info' not in data: + data['trans_info'] = [] + for op in self.transforms: + data = op(data) + if data is None: + return None + + data['img'] = np.transpose(data['img'], (2, 0, 1)) + for key in data.get('gt_fields', []): + if len(data[key].shape) == 2: + continue + data[key] = np.transpose(data[key], (2, 0, 1)) + + return data + + +class LoadImages: + """ + Read images from image path. + + Args: + to_rgb (bool, optional): If converting image to RGB color space. Default: True. + """ + def __init__(self, to_rgb: bool = True): + self.to_rgb = to_rgb + + def __call__(self, data: dict) -> dict: + + if isinstance(data['img'], str): + data['img'] = cv2.imread(data['img']) + + for key in data.get('gt_fields', []): + if isinstance(data[key], str): + data[key] = cv2.imread(data[key], cv2.IMREAD_UNCHANGED) + # if alpha and trimap has 3 channels, extract one. + if key in ['alpha', 'trimap']: + if len(data[key].shape) > 2: + data[key] = data[key][:, :, 0] + + if self.to_rgb: + data['img'] = cv2.cvtColor(data['img'], cv2.COLOR_BGR2RGB) + for key in data.get('gt_fields', []): + if len(data[key].shape) == 2: + continue + data[key] = cv2.cvtColor(data[key], cv2.COLOR_BGR2RGB) + + return data + + +class ResizeByShort: + """ + Resize the short side of an image to given size, and then scale the other side proportionally. + + Args: + short_size (int): The target size of short side. + """ + + def __init__(self, short_size: int =512): + self.short_size = short_size + + def __call__(self, data: dict) -> dict: + + data['trans_info'].append(('resize', data['img'].shape[0:2])) + data['img'] = functional.resize_short(data['img'], self.short_size) + for key in data.get('gt_fields', []): + data[key] = functional.resize_short(data[key], self.short_size) + return data + + +class ResizeToIntMult: + """ + Resize to some int muitple, d.g. 32. + """ + + def __init__(self, mult_int: int = 32): + self.mult_int = mult_int + + def __call__(self, data: dict) -> dict: + data['trans_info'].append(('resize', data['img'].shape[0:2])) + + h, w = data['img'].shape[0:2] + rw = w - w % 32 + rh = h - h % 32 + data['img'] = functional.resize(data['img'], (rw, rh)) + for key in data.get('gt_fields', []): + data[key] = functional.resize(data[key], (rw, rh)) + + return data + + +class Normalize: + """ + Normalize an image. + + Args: + mean (list, optional): The mean value of a data set. Default: [0.5, 0.5, 0.5]. + std (list, optional): The standard deviation of a data set. Default: [0.5, 0.5, 0.5]. + + Raises: + ValueError: When mean/std is not list or any value in std is 0. + """ + + def __init__(self, mean: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5), std: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5)): + self.mean = mean + self.std = std + if not (isinstance(self.mean, (list, tuple)) + and isinstance(self.std, (list, tuple))): + raise ValueError( + "{}: input type is invalid. It should be list or tuple".format( + self)) + from functools import reduce + if reduce(lambda x, y: x * y, self.std) == 0: + raise ValueError('{}: std is invalid!'.format(self)) + + def __call__(self, data: dict) -> dict: + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + data['img'] = functional.normalize(data['img'], mean, std) + if 'fg' in data.get('gt_fields', []): + data['fg'] = functional.normalize(data['fg'], mean, std) + if 'bg' in data.get('gt_fields', []): + data['bg'] = functional.normalize(data['bg'], mean, std) + + return data + + +def reverse_transform(alpha: paddle.Tensor, trans_info: List[str]): + """recover pred to origin shape""" + for item in trans_info[::-1]: + if item[0] == 'resize': + h, w = item[1][0], item[1][1] + alpha = F.interpolate(alpha, [h, w], mode='bilinear') + elif item[0] == 'padding': + h, w = item[1][0], item[1][1] + alpha = alpha[:, :, 0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format(item[0])) + return alpha + +def save_alpha_pred(alpha: np.ndarray, trimap: np.ndarray = None): + """ + The value of alpha is range [0, 1], shape should be [h,w] + """ + if isinstance(trimap, str): + trimap = cv2.imread(trimap, 0) + alpha[trimap == 0] = 0 + alpha[trimap == 255] = 255 + alpha = (alpha).astype('uint8') + return alpha + + +def cv2_to_base64(image: np.ndarray): + """ + Convert data from BGR to base64 format. + """ + data = cv2.imencode('.png', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str: str): + """ + Convert data from base64 to BGR format. + """ + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data \ No newline at end of file diff --git a/modules/image/matting/modnet_mobilenetv2_matting/requirements.py b/modules/image/matting/modnet_mobilenetv2_matting/requirements.py new file mode 100644 index 0000000000000000000000000000000000000000..7df0ef23928361724c3fadb8d87d6a3be869e58b --- /dev/null +++ b/modules/image/matting/modnet_mobilenetv2_matting/requirements.py @@ -0,0 +1 @@ +paddleseg >= 2.3.0 diff --git a/modules/image/matting/modnet_resnet50vd_matting/README.md b/modules/image/matting/modnet_resnet50vd_matting/README.md new file mode 100644 index 0000000000000000000000000000000000000000..03ad69e6732d545861063c85a38e872ff6e60c5d --- /dev/null +++ b/modules/image/matting/modnet_resnet50vd_matting/README.md @@ -0,0 +1,157 @@ +# modnet_resnet50vd_matting + +|模型名称|modnet_resnet50vd_matting| +| :--- | :---: | +|类别|图像-抠图| +|网络|modnet_resnet50vd| +|数据集|百度自建数据集| +|是否支持Fine-tuning|否| +|模型大小|535MB| +|指标|SAD112.73| +|最新更新日期|2021-12-03| + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 样例结果示例(左为原图,右为效果图): +

+ + +

+ +- ### 模型介绍 + + - Matting(精细化分割/影像去背/抠图)是指借由计算前景的颜色和透明度,将前景从影像中撷取出来的技术,可用于替换背景、影像合成、视觉特效,在电影工业中被广泛地使用。影像中的每个像素会有代表其前景透明度的值,称作阿法值(Alpha),一张影像中所有阿法值的集合称作阿法遮罩(Alpha Matte),将影像被遮罩所涵盖的部分取出即可完成前景的分离。modnet_resnet50vd_matting可生成抠图结果。 + + + + - 更多详情请参考:[modnet_resnet50vd_matting](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.3/contrib/Matting) + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 + + - paddleseg >= 2.3.0 + + +- ### 2、安装 + + - ```shell + $ hub install modnet_resnet50vd_matting + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run modnet_resnet50vd_matting --input_path "/PATH/TO/IMAGE" + ``` + + - 通过命令行方式实现hub模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="modnet_resnet50vd_matting") + + result = model.predict(["/PATH/TO/IMAGE"]) + print(result) + ``` + +- ### 3、API + + - ```python + def predict(self, + image_list, + trimap_list, + visualization, + save_path): + ``` + + - 人像matting预测API,用于将输入图片中的人像分割出来。 + + - 参数 + + - image_list (list(str | numpy.ndarray)):图片输入路径或者BGR格式numpy数据。 + - trimap_list(list(str | numpy.ndarray)):trimap输入路径或者灰度图单通道格式图片。 + - visualization (bool): 是否进行可视化,默认为False。 + - save_path (str): 当visualization为True时,保存图片的路径,默认为"modnet_resnet50vd_matting_output"。 + + - 返回 + + - result (list(numpy.ndarray)):模型分割结果: + + +## 四、服务部署 + +- PaddleHub Serving可以部署人像matting在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m modnet_resnet50vd_matting + ``` + + - 这样就完成了一个人像matting在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + import time + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/modnet_resnet50vd_matting" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + for image in r.json()["results"]['data']: + data = base64_to_cv2(image) + image_path =str(time.time()) + ".png" + cv2.imwrite(image_path, data) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/matting/modnet_resnet50vd_matting/README_en.md b/modules/image/matting/modnet_resnet50vd_matting/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..2a6d4e463d2196d3874a8b87892312cb0dc49b31 --- /dev/null +++ b/modules/image/matting/modnet_resnet50vd_matting/README_en.md @@ -0,0 +1,156 @@ +# modnet_resnet50vd_matting + +|Module Name|modnet_resnet50vd_matting| +| :--- | :---: | +|Category|Image Matting| +|Network|modnet_resnet50vd| +|Dataset|Baidu self-built dataset| +|Support Fine-tuning|No| +|Module Size|535MB| +|Data Indicators|SAD104.14| +|Latest update date|2021-12-03| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ + +

+ +- ### Module Introduction + + - Mating is the technique of extracting foreground from an image by calculating its color and transparency. It is widely used in the film industry to replace background, image composition, and visual effects. Each pixel in the image will have a value that represents its foreground transparency, called Alpha. The set of all Alpha values in an image is called Alpha Matte. The part of the image covered by the mask can be extracted to complete foreground separation. + + + + - For more information, please refer to: [modnet_resnet50vd_matting](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.3/contrib/Matting) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 + + - paddleseg >= 2.3.0 + + +- ### 2、Installation + + - ```shell + $ hub install modnet_resnet50vd_matting + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run modnet_resnet50vd_matting --input_path "/PATH/TO/IMAGE" + ``` + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="modnet_resnet50vd_matting") + + result = model.predict(["/PATH/TO/IMAGE"]) + print(result) + ``` +- ### 3、API + + - ```python + def predict(self, + image_list, + trimap_list, + visualization, + save_path): + ``` + + - Prediction API for matting. + + - **Parameter** + + - image_list (list(str | numpy.ndarray)): Image path or image data, ndarray.shape is in the format \[H, W, C\], BGR. + - trimap_list(list(str | numpy.ndarray)): Trimap path or trimap data, ndarray.shape is in the format \[H, W\], Gray. Default is None. + - visualization (bool): Whether to save the recognition results as picture files, default is False. + - save_path (str): Save path of images, "modnet_resnet50vd_matting_output" by default. + + - **Return** + + - result (list(numpy.ndarray)):The list of model results. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of matting. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m modnet_resnet50vd_matting + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + + ```python + import requests + import json + import cv2 + import base64 + import time + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/modnet_resnet50vd_matting" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + for image in r.json()["results"]['data']: + data = base64_to_cv2(image) + image_path =str(time.time()) + ".png" + cv2.imwrite(image_path, data) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/matting/modnet_resnet50vd_matting/module.py b/modules/image/matting/modnet_resnet50vd_matting/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b57c170a9e281c258fbce8102a52293d93ed0a9e --- /dev/null +++ b/modules/image/matting/modnet_resnet50vd_matting/module.py @@ -0,0 +1,497 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +import argparse +from typing import Callable, Union, List, Tuple + +import numpy as np +import cv2 +import scipy +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.module import moduleinfo, runnable, serving + +from modnet_resnet50vd_matting.resnet import ResNet50_vd +import modnet_resnet50vd_matting.processor as P + + +@moduleinfo( + name="modnet_resnet50vd_matting", + type="CV/matting", + author="paddlepaddle", + summary="modnet_resnet50vd_matting is a matting model", + version="1.0.0" +) +class MODNetResNet50Vd(nn.Layer): + """ + The MODNet implementation based on PaddlePaddle. + + The original article refers to + Zhanghan Ke, et, al. "Is a Green Screen Really Necessary for Real-Time Portrait Matting?" + (https://arxiv.org/pdf/2011.11961.pdf). + + Args: + hr_channels(int, optional): The channels of high resolutions branch. Defautl: None. + pretrained(str, optional): The path of pretrianed model. Defautl: None. + """ + + def __init__(self, hr_channels:int = 32, pretrained=None): + super(MODNetResNet50Vd, self).__init__() + + self.backbone = ResNet50_vd() + self.pretrained = pretrained + + self.head = MODNetHead( + hr_channels=hr_channels, backbone_channels=self.backbone.feat_channels) + self.blurer = GaussianBlurLayer(1, 3) + self.transforms = P.Compose([P.LoadImages(), P.ResizeByShort(), P.ResizeToIntMult(), P.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'modnet-resnet50_vd.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def preprocess(self, img: Union[str, np.ndarray] , transforms: Callable, trimap: Union[str, np.ndarray] = None): + data = {} + data['img'] = img + if trimap is not None: + data['trimap'] = trimap + data['gt_fields'] = ['trimap'] + data['trans_info'] = [] + data = self.transforms(data) + data['img'] = paddle.to_tensor(data['img']) + data['img'] = data['img'].unsqueeze(0) + if trimap is not None: + data['trimap'] = paddle.to_tensor(data['trimap']) + data['trimap'] = data['trimap'].unsqueeze((0, 1)) + + return data + + def forward(self, inputs: dict): + x = inputs['img'] + feat_list = self.backbone(x) + y = self.head(inputs=inputs, feat_list=feat_list) + return y + + def predict(self, image_list: list, trimap_list: list = None, visualization: bool =False, save_path: str = "modnet_resnet50vd_matting_output"): + self.eval() + result= [] + with paddle.no_grad(): + for i, im_path in enumerate(image_list): + trimap = trimap_list[i] if trimap_list is not None else None + data = self.preprocess(img=im_path, transforms=self.transforms, trimap=trimap) + alpha_pred = self.forward(data) + alpha_pred = P.reverse_transform(alpha_pred, data['trans_info']) + alpha_pred = (alpha_pred.numpy()).squeeze() + alpha_pred = (alpha_pred * 255).astype('uint8') + alpha_pred = P.save_alpha_pred(alpha_pred, trimap) + result.append(alpha_pred) + if visualization: + if not os.path.exists(save_path): + os.makedirs(save_path) + img_name = str(time.time()) + '.png' + image_save_path = os.path.join(save_path, img_name) + cv2.imwrite(image_save_path, alpha_pred) + + return result + + @serving + def serving_method(self, images: list, trimaps:list = None, **kwargs): + """ + Run as a service. + """ + images_decode = [P.base64_to_cv2(image) for image in images] + if trimaps is not None: + trimap_decoder = [cv2.cvtColor(P.base64_to_cv2(trimap), cv2.COLOR_BGR2GRAY) for trimap in trimaps] + else: + trimap_decoder = None + + outputs = self.predict(image_list=images_decode, trimap_list= trimap_decoder, **kwargs) + serving_data = [P.cv2_to_base64(outputs[i]) for i in range(len(outputs))] + results = {'data': serving_data} + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + if args.trimap_path is not None: + trimap_list = [args.trimap_path] + else: + trimap_list = None + + results = self.predict(image_list=[args.input_path], trimap_list=trimap_list, save_path=args.output_dir, visualization=args.visualization) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + + self.arg_config_group.add_argument( + '--output_dir', type=str, default="modnet_resnet50vd_matting_output", help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=bool, default=True, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--trimap_path', type=str, default=None, help="path to trimap.") + + + +class MODNetHead(nn.Layer): + """ + Segmentation head. + """ + def __init__(self, hr_channels: int, backbone_channels: int): + super().__init__() + + self.lr_branch = LRBranch(backbone_channels) + self.hr_branch = HRBranch(hr_channels, backbone_channels) + self.f_branch = FusionBranch(hr_channels, backbone_channels) + + def forward(self, inputs: paddle.Tensor, feat_list: list) -> paddle.Tensor: + pred_semantic, lr8x, [enc2x, enc4x] = self.lr_branch(feat_list) + pred_detail, hr2x = self.hr_branch(inputs['img'], enc2x, enc4x, lr8x) + pred_matte = self.f_branch(inputs['img'], lr8x, hr2x) + return pred_matte + + + +class FusionBranch(nn.Layer): + def __init__(self, hr_channels: int, enc_channels: int): + super().__init__() + self.conv_lr4x = Conv2dIBNormRelu( + enc_channels[2], hr_channels, 5, stride=1, padding=2) + + self.conv_f2x = Conv2dIBNormRelu( + 2 * hr_channels, hr_channels, 3, stride=1, padding=1) + self.conv_f = nn.Sequential( + Conv2dIBNormRelu( + hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1), + Conv2dIBNormRelu( + int(hr_channels / 2), + 1, + 1, + stride=1, + padding=0, + with_ibn=False, + with_relu=False)) + + def forward(self, img: paddle.Tensor, lr8x: paddle.Tensor, hr2x: paddle.Tensor) -> paddle.Tensor: + lr4x = F.interpolate( + lr8x, scale_factor=2, mode='bilinear', align_corners=False) + lr4x = self.conv_lr4x(lr4x) + lr2x = F.interpolate( + lr4x, scale_factor=2, mode='bilinear', align_corners=False) + + f2x = self.conv_f2x(paddle.concat((lr2x, hr2x), axis=1)) + f = F.interpolate( + f2x, scale_factor=2, mode='bilinear', align_corners=False) + f = self.conv_f(paddle.concat((f, img), axis=1)) + pred_matte = F.sigmoid(f) + + return pred_matte + + +class HRBranch(nn.Layer): + """ + High Resolution Branch of MODNet + """ + + def __init__(self, hr_channels: int, enc_channels:int): + super().__init__() + + self.tohr_enc2x = Conv2dIBNormRelu( + enc_channels[0], hr_channels, 1, stride=1, padding=0) + self.conv_enc2x = Conv2dIBNormRelu( + hr_channels + 3, hr_channels, 3, stride=2, padding=1) + + self.tohr_enc4x = Conv2dIBNormRelu( + enc_channels[1], hr_channels, 1, stride=1, padding=0) + self.conv_enc4x = Conv2dIBNormRelu( + 2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1) + + self.conv_hr4x = nn.Sequential( + Conv2dIBNormRelu( + 2 * hr_channels + enc_channels[2] + 3, + 2 * hr_channels, + 3, + stride=1, + padding=1), + Conv2dIBNormRelu( + 2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu( + 2 * hr_channels, hr_channels, 3, stride=1, padding=1)) + + self.conv_hr2x = nn.Sequential( + Conv2dIBNormRelu( + 2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu( + 2 * hr_channels, hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1)) + + self.conv_hr = nn.Sequential( + Conv2dIBNormRelu( + hr_channels + 3, hr_channels, 3, stride=1, padding=1), + Conv2dIBNormRelu( + hr_channels, + 1, + 1, + stride=1, + padding=0, + with_ibn=False, + with_relu=False)) + + def forward(self, img: paddle.Tensor, enc2x: paddle.Tensor, enc4x: paddle.Tensor, lr8x: paddle.Tensor) -> paddle.Tensor: + img2x = F.interpolate( + img, scale_factor=1 / 2, mode='bilinear', align_corners=False) + img4x = F.interpolate( + img, scale_factor=1 / 4, mode='bilinear', align_corners=False) + + enc2x = self.tohr_enc2x(enc2x) + hr4x = self.conv_enc2x(paddle.concat((img2x, enc2x), axis=1)) + + enc4x = self.tohr_enc4x(enc4x) + hr4x = self.conv_enc4x(paddle.concat((hr4x, enc4x), axis=1)) + + lr4x = F.interpolate( + lr8x, scale_factor=2, mode='bilinear', align_corners=False) + hr4x = self.conv_hr4x(paddle.concat((hr4x, lr4x, img4x), axis=1)) + + hr2x = F.interpolate( + hr4x, scale_factor=2, mode='bilinear', align_corners=False) + hr2x = self.conv_hr2x(paddle.concat((hr2x, enc2x), axis=1)) + pred_detail = None + return pred_detail, hr2x + + +class LRBranch(nn.Layer): + """ + Low Resolution Branch of MODNet + """ + def __init__(self, backbone_channels: int): + super().__init__() + self.se_block = SEBlock(backbone_channels[4], reduction=4) + self.conv_lr16x = Conv2dIBNormRelu( + backbone_channels[4], backbone_channels[3], 5, stride=1, padding=2) + self.conv_lr8x = Conv2dIBNormRelu( + backbone_channels[3], backbone_channels[2], 5, stride=1, padding=2) + self.conv_lr = Conv2dIBNormRelu( + backbone_channels[2], + 1, + 3, + stride=2, + padding=1, + with_ibn=False, + with_relu=False) + + def forward(self, feat_list: list) -> List[paddle.Tensor]: + enc2x, enc4x, enc32x = feat_list[0], feat_list[1], feat_list[4] + + enc32x = self.se_block(enc32x) + lr16x = F.interpolate( + enc32x, scale_factor=2, mode='bilinear', align_corners=False) + lr16x = self.conv_lr16x(lr16x) + lr8x = F.interpolate( + lr16x, scale_factor=2, mode='bilinear', align_corners=False) + lr8x = self.conv_lr8x(lr8x) + + pred_semantic = None + if self.training: + lr = self.conv_lr(lr8x) + pred_semantic = F.sigmoid(lr) + + return pred_semantic, lr8x, [enc2x, enc4x] + + +class IBNorm(nn.Layer): + """ + Combine Instance Norm and Batch Norm into One Layer + """ + + def __init__(self, in_channels: int): + super().__init__() + self.bnorm_channels = in_channels // 2 + self.inorm_channels = in_channels - self.bnorm_channels + + self.bnorm = nn.BatchNorm2D(self.bnorm_channels) + self.inorm = nn.InstanceNorm2D(self.inorm_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + bn_x = self.bnorm(x[:, :self.bnorm_channels, :, :]) + in_x = self.inorm(x[:, self.bnorm_channels:, :, :]) + + return paddle.concat((bn_x, in_x), 1) + + +class Conv2dIBNormRelu(nn.Layer): + """ + Convolution + IBNorm + Relu + """ + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + padding: int = 0, + dilation:int = 1, + groups: int = 1, + bias_attr: paddle.ParamAttr = None, + with_ibn: bool = True, + with_relu: bool = True): + + super().__init__() + + layers = [ + nn.Conv2D( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias_attr=bias_attr) + ] + + if with_ibn: + layers.append(IBNorm(out_channels)) + + if with_relu: + layers.append(nn.ReLU()) + + self.layers = nn.Sequential(*layers) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + return self.layers(x) + + +class SEBlock(nn.Layer): + """ + SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf + """ + + def __init__(self, num_channels: int, reduction:int = 1): + super().__init__() + self.pool = nn.AdaptiveAvgPool2D(1) + self.conv = nn.Sequential( + nn.Conv2D( + num_channels, + int(num_channels // reduction), + 1, + bias_attr=False), nn.ReLU(), + nn.Conv2D( + int(num_channels // reduction), + num_channels, + 1, + bias_attr=False), nn.Sigmoid()) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + w = self.pool(x) + w = self.conv(w) + return w * x + + +class GaussianBlurLayer(nn.Layer): + """ Add Gaussian Blur to a 4D tensors + This layer takes a 4D tensor of {N, C, H, W} as input. + The Gaussian blur will be performed in given channel number (C) splitly. + """ + + def __init__(self, channels: int, kernel_size: int): + """ + Args: + channels (int): Channel for input tensor + kernel_size (int): Size of the kernel used in blurring + """ + + super(GaussianBlurLayer, self).__init__() + self.channels = channels + self.kernel_size = kernel_size + assert self.kernel_size % 2 != 0 + + self.op = nn.Sequential( + nn.Pad2D(int(self.kernel_size / 2), mode='reflect'), + nn.Conv2D( + channels, + channels, + self.kernel_size, + stride=1, + padding=0, + bias_attr=False, + groups=channels)) + + self._init_kernel() + self.op[1].weight.stop_gradient = True + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + """ + Args: + x (paddle.Tensor): input 4D tensor + Returns: + paddle.Tensor: Blurred version of the input + """ + + if not len(list(x.shape)) == 4: + print('\'GaussianBlurLayer\' requires a 4D tensor as input\n') + exit() + elif not x.shape[1] == self.channels: + print('In \'GaussianBlurLayer\', the required channel ({0}) is' + 'not the same as input ({1})\n'.format( + self.channels, x.shape[1])) + exit() + + return self.op(x) + + def _init_kernel(self): + sigma = 0.3 * ((self.kernel_size - 1) * 0.5 - 1) + 0.8 + + n = np.zeros((self.kernel_size, self.kernel_size)) + i = int(self.kernel_size / 2) + n[i, i] = 1 + kernel = scipy.ndimage.gaussian_filter(n, sigma) + kernel = kernel.astype('float32') + kernel = kernel[np.newaxis, np.newaxis, :, :] + paddle.assign(kernel, self.op[1].weight) \ No newline at end of file diff --git a/modules/image/matting/modnet_resnet50vd_matting/processor.py b/modules/image/matting/modnet_resnet50vd_matting/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..3ae79593f0d3dab19520c3c666ae4a06b81960dd --- /dev/null +++ b/modules/image/matting/modnet_resnet50vd_matting/processor.py @@ -0,0 +1,207 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import base64 +from typing import Callable, Union, List, Tuple + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +from paddleseg.transforms import functional +from PIL import Image + + +class Compose: + """ + Do transformation on input data with corresponding pre-processing and augmentation operations. + The shape of input data to all operations is [height, width, channels]. + """ + + def __init__(self, transforms: Callable, to_rgb: bool = True): + if not isinstance(transforms, list): + raise TypeError('The transforms must be a list!') + self.transforms = transforms + self.to_rgb = to_rgb + + def __call__(self, data: dict) -> dict: + + if 'trans_info' not in data: + data['trans_info'] = [] + for op in self.transforms: + data = op(data) + if data is None: + return None + + data['img'] = np.transpose(data['img'], (2, 0, 1)) + for key in data.get('gt_fields', []): + if len(data[key].shape) == 2: + continue + data[key] = np.transpose(data[key], (2, 0, 1)) + + return data + + +class LoadImages: + """ + Read images from image path. + + Args: + to_rgb (bool, optional): If converting image to RGB color space. Default: True. + """ + def __init__(self, to_rgb: bool = True): + self.to_rgb = to_rgb + + def __call__(self, data: dict) -> dict: + + if isinstance(data['img'], str): + data['img'] = cv2.imread(data['img']) + + for key in data.get('gt_fields', []): + if isinstance(data[key], str): + data[key] = cv2.imread(data[key], cv2.IMREAD_UNCHANGED) + # if alpha and trimap has 3 channels, extract one. + if key in ['alpha', 'trimap']: + if len(data[key].shape) > 2: + data[key] = data[key][:, :, 0] + + if self.to_rgb: + data['img'] = cv2.cvtColor(data['img'], cv2.COLOR_BGR2RGB) + for key in data.get('gt_fields', []): + if len(data[key].shape) == 2: + continue + data[key] = cv2.cvtColor(data[key], cv2.COLOR_BGR2RGB) + + return data + + +class ResizeByShort: + """ + Resize the short side of an image to given size, and then scale the other side proportionally. + + Args: + short_size (int): The target size of short side. + """ + + def __init__(self, short_size: int =512): + self.short_size = short_size + + def __call__(self, data: dict) -> dict: + + data['trans_info'].append(('resize', data['img'].shape[0:2])) + data['img'] = functional.resize_short(data['img'], self.short_size) + for key in data.get('gt_fields', []): + data[key] = functional.resize_short(data[key], self.short_size) + return data + + +class ResizeToIntMult: + """ + Resize to some int muitple, d.g. 32. + """ + + def __init__(self, mult_int: int = 32): + self.mult_int = mult_int + + def __call__(self, data: dict) -> dict: + data['trans_info'].append(('resize', data['img'].shape[0:2])) + + h, w = data['img'].shape[0:2] + rw = w - w % 32 + rh = h - h % 32 + data['img'] = functional.resize(data['img'], (rw, rh)) + for key in data.get('gt_fields', []): + data[key] = functional.resize(data[key], (rw, rh)) + + return data + + +class Normalize: + """ + Normalize an image. + + Args: + mean (list, optional): The mean value of a data set. Default: [0.5, 0.5, 0.5]. + std (list, optional): The standard deviation of a data set. Default: [0.5, 0.5, 0.5]. + + Raises: + ValueError: When mean/std is not list or any value in std is 0. + """ + + def __init__(self, mean: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5), std: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5)): + self.mean = mean + self.std = std + if not (isinstance(self.mean, (list, tuple)) + and isinstance(self.std, (list, tuple))): + raise ValueError( + "{}: input type is invalid. It should be list or tuple".format( + self)) + from functools import reduce + if reduce(lambda x, y: x * y, self.std) == 0: + raise ValueError('{}: std is invalid!'.format(self)) + + def __call__(self, data: dict) -> dict: + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + data['img'] = functional.normalize(data['img'], mean, std) + if 'fg' in data.get('gt_fields', []): + data['fg'] = functional.normalize(data['fg'], mean, std) + if 'bg' in data.get('gt_fields', []): + data['bg'] = functional.normalize(data['bg'], mean, std) + + return data + + +def reverse_transform(alpha: paddle.Tensor, trans_info: List[str]): + """recover pred to origin shape""" + for item in trans_info[::-1]: + if item[0] == 'resize': + h, w = item[1][0], item[1][1] + alpha = F.interpolate(alpha, [h, w], mode='bilinear') + elif item[0] == 'padding': + h, w = item[1][0], item[1][1] + alpha = alpha[:, :, 0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format(item[0])) + return alpha + +def save_alpha_pred(alpha: np.ndarray, trimap: np.ndarray = None): + """ + The value of alpha is range [0, 1], shape should be [h,w] + """ + if isinstance(trimap, str): + trimap = cv2.imread(trimap, 0) + alpha[trimap == 0] = 0 + alpha[trimap == 255] = 255 + alpha = (alpha).astype('uint8') + return alpha + + +def cv2_to_base64(image: np.ndarray): + """ + Convert data from BGR to base64 format. + """ + data = cv2.imencode('.png', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str: str): + """ + Convert data from base64 to BGR format. + """ + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data \ No newline at end of file diff --git a/modules/image/matting/modnet_resnet50vd_matting/resnet.py b/modules/image/matting/modnet_resnet50vd_matting/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..19abe41c8e47ca297941eb44e7ffc49e63b996da --- /dev/null +++ b/modules/image/matting/modnet_resnet50vd_matting/resnet.py @@ -0,0 +1,332 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg.models import layers +from paddleseg.utils import utils + +__all__ = ["ResNet50_vd"] + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + ): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = nn.AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = layers.SyncBatchNorm(out_channels) + self._act_op = layers.Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu') + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation) + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True) + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + + #################################################################### + # If given dilation rate > 1, using corresponding padding. + # The performance drops down without the follow padding. + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + ##################################################################### + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class BasicBlock(nn.Layer): + """Basic residual block""" + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu') + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + act=None) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first else True) + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.add(x=short, y=conv1) + y = F.relu(y) + + return y + + +class ResNet_vd(nn.Layer): + """ + The ResNet_vd implementation based on PaddlePaddle. + + The original article refers to Jingdong + Tong He, et, al. "Bag of Tricks for Image Classification with Convolutional Neural Networks" + (https://arxiv.org/pdf/1812.01187.pdf). + + """ + + def __init__(self, + input_channels: int = 3, + layers: int = 50, + output_stride: int = 32, + multi_grid: tuple = (1, 1, 1), + pretrained: str = None): + super(ResNet_vd, self).__init__() + + self.conv1_logit = None # for gscnn shape stream + self.layers = layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024 + ] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + # for channels of four returned stages + self.feat_channels = [c * 4 for c in num_filters + ] if layers >= 50 else num_filters + self.feat_channels = [64] + self.feat_channels + + dilation_dict = None + if output_stride == 8: + dilation_dict = {2: 2, 3: 4} + elif output_stride == 16: + dilation_dict = {3: 2} + + self.conv1_1 = ConvBNLayer( + in_channels=input_channels, + out_channels=32, + kernel_size=3, + stride=2, + act='relu') + self.conv1_2 = ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu') + self.conv1_3 = ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu') + self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) + + # self.block_list = [] + self.stage_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + ############################################################################### + # Add dilation rate for some segmentation tasks, if dilation_dict is not None. + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + + # Actually block here is 'stage', and i is 'block' in 'stage' + # At the stage 4, expand the the dilation_rate if given multi_grid + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + ############################################################################### + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + dilation=dilation_rate)) + + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + else: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block], + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + if_first=block == i == 0)) + block_list.append(basic_block) + shortcut = True + self.stage_list.append(block_list) + + self.pretrained = pretrained + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + feat_list = [] + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + feat_list.append(y) + + y = self.pool2d_max(y) + + # A feature list saves the output feature map of each stage. + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + + return feat_list + + +def ResNet50_vd(**args): + model = ResNet_vd(layers=50, **args) + return model diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md index 933beb1c6ff2866a3f5d5579f177fb1a597ca157..abf2bbeb033a6bd8b5a575775f4d668a4b454d3f 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md @@ -50,7 +50,7 @@ $ hub run faster_rcnn_resnet50_coco2017 --input_path "/PATH/TO/IMAGE" ``` - 通过命令行方式实现目标检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README_en.md b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..62ddf3ad324d7c31a4fecba3ae1783f2147f863c --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README_en.md @@ -0,0 +1,173 @@ +# faster_rcnn_resnet50_coco2017 + +|Module Name|faster_rcnn_resnet50_coco2017| +| :--- | :---: | +|Category|object detection| +|Network|faster_rcnn| +|Dataset|COCO2017| +|Fine-tuning supported or not|No| +|Module Size|131MB| +|Latest update date|2021-03-15| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ + +- ### Module Introduction + + - Faster_RCNN is a two-stage detector, it consists of feature extraction, proposal, classification and refinement processes. This module is trained on COCO2017 dataset, and can be used for object detection. + + + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install faster_rcnn_resnet50_coco2017 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run faster_rcnn_resnet50_coco2017 --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + object_detector = hub.Module(name="faster_rcnn_resnet50_coco2017") + result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def object_detection(paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True) + ``` + + - Detection API, detect positions of all objects in image + + - **Parameters** + + - paths (list[str]): image path; + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - score\_thresh (float): confidence threshold;
+ - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - label (str): label + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str, optional): output path for saving results + + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of object detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m faster_rcnn_resnet50_coco2017 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/faster_rcnn_resnet50_coco2017" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.1.0 + + First release + +* 1.1.1 + + Fix the problem of reading numpy + - ```shell + $ hub install faster_rcnn_resnet50_coco2017==1.1.1 + ``` diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md index 92d5e7bc8e762030f08624eca3a6116f1f79fd1f..9d003b8009e0b69a891bf17b1c7be1a2c3fbb04c 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md @@ -50,7 +50,7 @@ ``` - 通过命令行方式实现目标检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..d90beb6499a3c47d69fee654f0fac04524837af2 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md @@ -0,0 +1,171 @@ +# faster_rcnn_resnet50_fpn_coco2017 + +|Module Name|faster_rcnn_resnet50_fpn_coco2017| +| :--- | :---: | +|Category|object detection| +|Network|faster_rcnn| +|Dataset|COCO2017| +|Fine-tuning supported or not|No| +|Module Size|161MB| +|Latest update date|2021-03-15| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - Faster_RCNN is a two-stage detector, it consists of feature extraction, proposal, classification and refinement processes. This module is trained on COCO2017 dataset, and can be used for object detection. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install faster_rcnn_resnet50_fpn_coco2017 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run faster_rcnn_resnet50_fpn_coco2017 --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + object_detector = hub.Module(name="faster_rcnn_resnet50_fpn_coco2017") + result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def object_detection(paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True) + ``` + + - Detection API, detect positions of all objects in image + + - **Parameters** + + - paths (list[str]): image path; + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - score\_thresh (float): confidence threshold;
+ - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - label (str): label + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str, optional): output path for saving results + + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of object detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m faster_rcnn_resnet50_fpn_coco2017 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/faster_rcnn_resnet50_fpn_coco2017" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Fix the problem of reading numpy + - ```shell + $ hub install faster_rcnn_resnet50_fpn_coco2017==1.0.1 + ``` diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/README_en.md b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..b129280bfc226dce9fad32035fe6e5ff2b5a460c --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_venus/README_en.md @@ -0,0 +1,105 @@ +# faster_rcnn_resnet50_fpn_venus + +|Module Name|faster_rcnn_resnet50_fpn_venus| +| :--- | :---: | +|Category|object detection| +|Network|faster_rcnn| +|Dataset|Baidu Detection Dataset| +|Fine-tuning supported or not|Yes| +|Module Size|317MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Module Introduction + + - Faster_RCNN is a two-stage detector, it consists of feature extraction, proposal, classification and refinement processes. This module is trained on Baidu Detection Dataset, which contains 170w pictures and 1000w+ boxes, and improve the accuracy on 8 test datasets with average 2.06%. Besides, this module supports to fine-tune model, and can achieve faster convergence and better performance. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install faster_rcnn_resnet50_fpn_venus + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、API + + - ```python + def context(num_classes=81, + trainable=True, + pretrained=True, + phase='train') + ``` + + - Extract features, and do transfer learning + + - **Parameters** + - num\_classes (int): number of classes;
+ - trainable (bool): whether parameters trainable or not;
+ - pretrained (bool): whether load pretrained model or not + - get\_prediction (bool): optional, 'train' or 'predict','train' is used for training,'predict' used for prediction. + + - **Return** + - inputs (dict): inputs, a dict: + if phase is 'train', keys are: + - image (Variable): image variable + - im\_size (Variable): image size + - im\_info (Variable): image information + - gt\_class (Variable): box class + - gt\_box (Variable): box coordination + - is\_crowd (Variable): if multiple objects in box + if phase 为 'predict',keys are: + - image (Variable): image variable + - im\_size (Variable): image size + - im\_info (Variable): image information + - outputs (dict): model output + if phase is 'train', keys are: + - head_features (Variable): features extracted + - rpn\_cls\_loss (Variable): classfication loss in box + - rpn\_reg\_loss (Variable): regression loss in box + - generate\_proposal\_labels (Variable): proposal labels + if phase 为 'predict',keys are: + - head_features (Variable): features extracted + - rois (Variable): roi + - bbox\_out (Variable): prediction results + - program for transfer learning + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + + + +## IV.Release Note + +* 1.0.0 + + First release + - ```shell + $ hub install faster_rcnn_resnet50_fpn_venus==1.0.0 + ``` diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md b/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md index 774b7073664a8a7f6e224fb6cae820ae7fedfa2f..7c3749cec26d7f3a1832ac1c20c39b44b9b9c927 100644 --- a/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md @@ -49,7 +49,7 @@ $ hub run ssd_mobilenet_v1_pascal --input_path "/PATH/TO/IMAGE" ``` - 通过命令行方式实现目标检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/README_en.md b/modules/image/object_detection/ssd_mobilenet_v1_pascal/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..8956a7ed48fa0dd4051ca4f4a01ef9b723c7cb54 --- /dev/null +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/README_en.md @@ -0,0 +1,172 @@ +# ssd_mobilenet_v1_pascal + +|Module Name|ssd_mobilenet_v1_pascal| +| :--- | :---: | +|Category|object detection| +|Network|SSD| +|Dataset|PASCAL VOC| +|Fine-tuning supported or not|No| +|Module Size|24MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - Single Shot MultiBox Detector (SSD) is a one-stage detector. Different from two-stage detector, SSD frames object detection as a re- gression problem to spatially separated bounding boxes and associated class probabilities. This module is based on MobileNet-v1, trained on Pascal dataset, and can be used for object detection. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install ssd_mobilenet_v1_pascal + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run ssd_mobilenet_v1_pascal --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + object_detector = hub.Module(name="ssd_mobilenet_v1_pascal") + result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def object_detection(paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True, + ) + ``` + + - Detection API, detect positions of all objects in image + + - **Parameters** + + - paths (list[str]): image path; + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - score\_thresh (float): 识别置信度的阈值;
+ - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + + - **Return** + + - res (list\[dict\]): results + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - label (str): label + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str, optional): output path for saving results + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of object detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m ssd_mobilenet_v1_pascal + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ssd_mobilenet_v1_pascal" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.1.2 + + Fix the problem of reading numpy + + - ```shell + $ hub install ssd_mobilenet_v1_pascal==1.1.2 + ``` diff --git a/modules/image/object_detection/ssd_vgg16_512_coco2017/README.md b/modules/image/object_detection/ssd_vgg16_512_coco2017/README.md index d6a97f0e4583b983b76b45c6f85d04142c6f563d..1f46189822cd926af996cc6d1e309224e9db6ad0 100644 --- a/modules/image/object_detection/ssd_vgg16_512_coco2017/README.md +++ b/modules/image/object_detection/ssd_vgg16_512_coco2017/README.md @@ -49,7 +49,7 @@ $ hub run ssd_vgg16_512_coco2017 --input_path "/PATH/TO/IMAGE" ``` - 通过命令行方式实现目标检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/object_detection/ssd_vgg16_512_coco2017/README_en.md b/modules/image/object_detection/ssd_vgg16_512_coco2017/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..0d862abcf4c108719f202b72638b71cea8cc8727 --- /dev/null +++ b/modules/image/object_detection/ssd_vgg16_512_coco2017/README_en.md @@ -0,0 +1,171 @@ +# ssd_vgg16_512_coco2017 + +|Module Name|ssd_vgg16_512_coco2017| +| :--- | :---: | +|Category|object detection| +|Network|SSD| +|Dataset|COCO2017| +|Fine-tuning supported or not|No| +|Module Size|139MB| +|Latest update date|2021-03-15| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - Single Shot MultiBox Detector (SSD) is a one-stage detector. Different from two-stage detector, SSD frames object detection as a re- gression problem to spatially separated bounding boxes and associated class probabilities. This module is based on VGG16, trained on COCO2017 dataset, and can be used for object detection. + + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install ssd_vgg16_512_coco2017 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run ssd_vgg16_512_coco2017 --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + object_detector = hub.Module(name="ssd_vgg16_512_coco2017") + result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def object_detection(paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True) + ``` + + - Detection API, detect positions of all objects in image + + - **Parameters** + + - paths (list[str]): image path; + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - score\_thresh (float): confidence threshold;
+ - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - label (str): label + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str, optional): output path for saving results + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of object detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m ssd_vgg16_512_coco2017 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ssd_vgg16_512_coco2017" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.2 + + Fix the problem of reading numpy + + - ```shell + $ hub install ssd_vgg16_512_coco2017==1.0.2 + ``` diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/README.md b/modules/image/object_detection/yolov3_darknet53_coco2017/README.md index 8d31ccce57f55829f9c71d192ea6e17db46833d7..15482251798e7ea2969ddb3d9cacd8b801cad146 100644 --- a/modules/image/object_detection/yolov3_darknet53_coco2017/README.md +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/README.md @@ -49,7 +49,7 @@ $ hub run yolov3_darknet53_coco2017 --input_path "/PATH/TO/IMAGE" ``` - 通过命令行方式实现目标检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/README_en.md b/modules/image/object_detection/yolov3_darknet53_coco2017/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..b6757ff38c2ed434649c23b068fbcac68fb8d1a1 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/README_en.md @@ -0,0 +1,169 @@ +# yolov3_darknet53_coco2017 + +|Module Name|yolov3_darknet53_coco2017| +| :--- | :---: | +|Category|object detection| +|Network|YOLOv3| +|Dataset|COCO2017| +|Fine-tuning supported or not|No| +|Module Size|239MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - YOLOv3 is a one-stage detector proposed by Joseph Redmon and Ali Farhadi, which can reach comparable accuracy but twice as fast as traditional methods. This module is based on YOLOv3, trained on COCO2017, and can be used for object detection. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install yolov3_darknet53_coco2017 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run yolov3_darknet53_coco2017 --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + object_detector = hub.Module(name="yolov3_darknet53_coco2017") + result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def object_detection(paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True) + ``` + + - Detection API, detect positions of all objects in image + + - **Parameters** + + - paths (list[str]): image path; + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - score\_thresh (float): confidence threshold;
+ - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - label (str): label + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str, optional): output path for saving results + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of object detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m yolov3_darknet53_coco2017 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/yolov3_darknet53_coco2017" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.1.1 + Fix the problem of reading numpy + + - ```shell + $ hub install yolov3_darknet53_coco2017==1.1.1 + ``` diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md b/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md index eebcdeaf7546ba3edd01339f9bb4662153e1d125..0abc368b657332888d6f2ec108709baf7c830e2d 100644 --- a/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md @@ -49,7 +49,7 @@ $ hub run yolov3_darknet53_pedestrian --input_path "/PATH/TO/IMAGE" ``` - 通过命令行方式实现行人检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/README_en.md b/modules/image/object_detection/yolov3_darknet53_pedestrian/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..f7be6546a84ce542dfd5377d3502a0eda1eb238f --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/README_en.md @@ -0,0 +1,171 @@ +# yolov3_darknet53_pedestrian + +|Module Name|yolov3_darknet53_pedestrian| +| :--- | :---: | +|Category|object detection| +|Network|YOLOv3| +|Dataset|Baidu Pedestrian Dataset| +|Fine-tuning supported or not|No| +|Module Size|238MB| +|Latest update date|2021-03-15| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - YOLOv3 is a one-stage detector proposed by Joseph Redmon and Ali Farhadi, which can reach comparable accuracy but twice as fast as traditional methods. This module is based on YOLOv3, trained on Baidu Pedestrian Dataset, and can be used for pedestrian detection. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install yolov3_darknet53_pedestrian + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run yolov3_darknet53_pedestrian --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + pedestrian_detector = hub.Module(name="yolov3_darknet53_pedestrian") + result = pedestrian_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = pedestrian_detector.object_detection(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def object_detection(paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='yolov3_pedestrian_detect_output', + score_thresh=0.2, + visualization=True) + ``` + + - Detection API, detect positions of all pedestrian in image + + - **Parameters** + + - paths (list[str]): image path; + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - score\_thresh (float): confidence threshold;
+ - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + + - **Return** + + - res (list\[dict\]): results + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - label (str): label + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str, optional): output path for saving results + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of object detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m yolov3_darknet53_pedestrian + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/yolov3_darknet53_pedestrian" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.2 + + Fix the problem of reading numpy + + - ```shell + $ hub install yolov3_darknet53_pedestrian==1.0.2 + ``` diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/README.md b/modules/image/object_detection/yolov3_darknet53_vehicles/README.md index c1b791604c22947b0dacf78746f87de2ceb7a244..72fee4b31c9718d4c318bfcf910a43fa2e1a3959 100644 --- a/modules/image/object_detection/yolov3_darknet53_vehicles/README.md +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/README.md @@ -49,7 +49,7 @@ $ hub run yolov3_darknet53_vehicles --input_path "/PATH/TO/IMAGE" ``` - 通过命令行方式实现车辆检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/README_en.md b/modules/image/object_detection/yolov3_darknet53_vehicles/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..b0a5aa992d25737da2e35d1033d17b49e32886a9 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/README_en.md @@ -0,0 +1,171 @@ +# yolov3_darknet53_vehicles + +|Module Name|yolov3_darknet53_vehicles| +| :--- | :---: | +|Category|object detection| +|Network|YOLOv3| +|Dataset|Baidu Vehicle Dataset| +|Fine-tuning supported or not|No| +|Module Size|238MB| +|Latest update date|2021-03-15| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - YOLOv3 is a one-stage detector proposed by Joseph Redmon and Ali Farhadi, which can reach comparable accuracy but twice as fast as traditional methods. This module is based on YOLOv3, trained on Baidu Vehicle Dataset, and can be used for vehicle detection. + + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install yolov3_darknet53_vehicles + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run yolov3_darknet53_vehicles --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + vehicles_detector = hub.Module(name="yolov3_darknet53_vehicles") + result = vehicles_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = vehicles_detector.object_detection((paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def object_detection(paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='yolov3_vehicles_detect_output', + score_thresh=0.2, + visualization=True) + ``` + + - Detection API, detect positions of all vehicles in image + + - **Parameters** + + - paths (list[str]): image path; + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - score\_thresh (float): confidence threshold;
+ - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - label (str): label + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str, optional): output path for saving results + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of object detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m yolov3_darknet53_vehicles + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/yolov3_darknet53_vehicles" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.2 + + Fix the problem of reading numpy + + - ```shell + $ hub install yolov3_darknet53_vehicles==1.0.2 + ``` diff --git a/modules/image/object_detection/yolov3_darknet53_venus/README_en.md b/modules/image/object_detection/yolov3_darknet53_venus/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..76b9fda008a89392914c26147d232a816be9a46d --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_venus/README_en.md @@ -0,0 +1,120 @@ +# yolov3_darknet53_venus + +|Module Name|yolov3_darknet53_venus| +| :--- | :---: | +|Category|object detection| +|Network|YOLOv3| +|Dataset|Baidu Detection Dataset| +|Fine-tuning supported or not|Yes| +|Module Size|501MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Module Introduction + + - YOLOv3 is a one-stage detector proposed by Joseph Redmon and Ali Farhadi, which can reach comparable accuracy but twice as fast as traditional methods. This module is based on YOLOv3, trained on Baidu Vehicle Dataset which consists of 170w pictures and 1000w+ boxes, improve the accuracy on 8 test datasets for average 5.36%, and can be used for vehicle detection. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install yolov3_darknet53_venus + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、API + + - ```python + def context(trainable=True, + pretrained=True, + get_prediction=False) + ``` + + - Extract features, and do transfer learning + + - **Parameters** + + - trainable(bool): whether parameters trainable or not + - pretrained (bool): whether load pretrained model or not + - get\_prediction (bool): whether perform prediction + + - **Return** + - inputs (dict): inputs, a dict, include two keys: "image" and "im\_size" + - image (Variable): image variable + - im\_size (Variable): image size + - outputs (dict): model output + - program for transfer learning + + - ```python + def object_detection(paths=None, + images=None, + batch_size=1, + use_gpu=False, + score_thresh=0.5, + visualization=True, + output_dir='detection_result') + ``` + + - Detection API, detect positions of all objects in image + + - **Parameters** + + - paths (list[str]): image path; + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - score\_thresh (float): confidence threshold;
+ - visualization (bool): Whether to save the results as picture files; + - output_dir (str): save path of images; + + - **Return** + + - res (list\[dict\]): classication results, each element in the list is dict, key is the label name, and value is the corresponding probability + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - label (str): label + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str, optional): output path for saving results + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + + + +## IV.Release Note + +* 1.0.0 + + First release + - ```shell + $ hub install yolov3_darknet53_venus==1.0.0 + ``` diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md index bd30a746b0121f1d5d5533f457387c046cb203d0..456de66bac3b6b2c59466eecb53ce86a09fa783b 100644 --- a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md @@ -50,7 +50,7 @@ $ hub run yolov3_mobilenet_v1_coco2017 --input_path "/PATH/TO/IMAGE" ``` - 通过命令行方式实现目标检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README_en.md b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..f80472bfa12c931152e617a965d8023b079c02da --- /dev/null +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README_en.md @@ -0,0 +1,171 @@ +# yolov3_mobilenet_v1_coco2017 + +|Module Name|yolov3_mobilenet_v1_coco2017| +| :--- | :---: | +|Category|object detection| +|Network|YOLOv3| +|Dataset|COCO2017| +|Fine-tuning supported or not|No| +|Module Size|96MB| +|Latest update date|2021-03-15| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ + +- ### Module Introduction + + - YOLOv3 is a one-stage detector proposed by Joseph Redmon and Ali Farhadi, which can reach comparable accuracy but twice as fast as traditional methods. This module is based on YOLOv3, trained on COCO2017, and can be used for object detection. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install yolov3_mobilenet_v1_coco2017 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run yolov3_mobilenet_v1_coco2017 --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + object_detector = hub.Module(name="yolov3_mobilenet_v1_coco2017") + result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def object_detection(paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True) + ``` + + - Detection API, detect positions of all objects in image + + - **Parameters** + + - paths (list[str]): image path; + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - score\_thresh (float): confidence threshold;
+ - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - label (str): label + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str, optional): output path for saving results + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of object detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m yolov3_mobilenet_v1_coco2017 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/yolov3_mobilenet_v1_coco2017" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.2 + + Fix the problem of reading numpy + + - ```shell + $ hub install yolov3_mobilenet_v1_coco2017==1.0.2 + ``` diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/README.md b/modules/image/object_detection/yolov3_resnet34_coco2017/README.md index 1a1bfceaf9ea5e087427d91e6aa531166e71d288..bb245f340434b3b057e11ef18c68ce1996002c41 100644 --- a/modules/image/object_detection/yolov3_resnet34_coco2017/README.md +++ b/modules/image/object_detection/yolov3_resnet34_coco2017/README.md @@ -49,7 +49,7 @@ $ hub run yolov3_resnet34_coco2017 --input_path "/PATH/TO/IMAGE" ``` - 通过命令行方式实现目标检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/README_en.md b/modules/image/object_detection/yolov3_resnet34_coco2017/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..c10a2466fdf0bf3cc7884757fdad96d54cf398f0 --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet34_coco2017/README_en.md @@ -0,0 +1,170 @@ +# yolov3_resnet34_coco2017 + +|Module Name|yolov3_resnet34_coco2017| +| :--- | :---: | +|Category|object detection| +|Network|YOLOv3| +|Dataset|COCO2017| +|Fine-tuning supported or not|No| +|Module Size|164MB| +|Latest update date|2021-03-15| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - YOLOv3 is a one-stage detector proposed by Joseph Redmon and Ali Farhadi, which can reach comparable accuracy but twice as fast as traditional methods. This module is based on YOLOv3, trained on COCO2017, and can be used for object detection. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install yolov3_resnet34_coco2017 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run yolov3_resnet34_coco2017 --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + object_detector = hub.Module(name="yolov3_resnet34_coco2017") + result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def object_detection(paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True) + ``` + + - Detection API, detect positions of all objects in image + + - **Parameters** + + - paths (list[str]): image path; + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - score\_thresh (float): confidence threshold;
+ - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - label (str): label + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str, optional): output path for saving results + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of object detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m yolov3_resnet34_coco2017 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/yolov3_resnet34_coco2017" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.2 + + Fix the problem of reading numpy + + - ```shell + $ hub install yolov3_resnet34_coco2017==1.0.2 + ``` diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md index fa51bc34e4915bff2ccad018ebab6df85f5b16e0..0ad42e87a4b93e1764331057702670e39e4dc7ad 100644 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md @@ -49,7 +49,7 @@ $ hub run yolov3_resnet50_vd_coco2017 --input_path "/PATH/TO/IMAGE" ``` - 通过命令行方式实现目标检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README_en.md b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..7bb7b10aee81292114a8c01b9bd776de3a2d44f0 --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README_en.md @@ -0,0 +1,170 @@ +# yolov3_resnet50_vd_coco2017 + +|Module Name|yolov3_resnet50_vd_coco2017| +| :--- | :---: | +|Category|object detection| +|Network|YOLOv3| +|Dataset|COCO2017| +|Fine-tuning supported or not|No| +|Module Size|178MB| +|Latest update date|2021-03-15| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - YOLOv3 is a one-stage detector proposed by Joseph Redmon and Ali Farhadi, which can reach comparable accuracy but twice as fast as traditional methods. This module is based on YOLOv3, trained on COCO2017, and can be used for object detection. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install yolov3_resnet50_vd_coco2017 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run yolov3_resnet50_vd_coco2017 --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + object_detector = hub.Module(name="yolov3_resnet50_vd_coco2017") + result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def object_detection(paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True) + ``` + + - Detection API, detect positions of all objects in image + + - **Parameters** + + - paths (list[str]): image path; + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - score\_thresh (float): confidence threshold;
+ - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - label (str): label + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str, optional): output path for saving results + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: output dir for saving model + - model\_filename: filename for saving model + - params\_filename: filename for saving parameters + - combined: whether save parameters into one file + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of object detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m yolov3_resnet50_vd_coco2017 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/yolov3_resnet50_vd_coco2017" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.2 + + Fix the problem of reading numpy + + - ```shell + $ hub install yolov3_resnet50_vd_coco2017==1.0.2 + ``` diff --git a/modules/image/semantic_segmentation/ExtremeC3_Portrait_Segmentation/README.md b/modules/image/semantic_segmentation/ExtremeC3_Portrait_Segmentation/README.md index 17d2979a19b9df5963b341e42347921e40c94c40..b413d0315d1f34e4f076ae2814c9d9ab62730544 100644 --- a/modules/image/semantic_segmentation/ExtremeC3_Portrait_Segmentation/README.md +++ b/modules/image/semantic_segmentation/ExtremeC3_Portrait_Segmentation/README.md @@ -44,7 +44,7 @@ ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 ```python import cv2 @@ -60,27 +60,27 @@ visualization=False) ``` - - ### 2、API - - ```python - def Segmentation( - images=None, - paths=None, - batch_size=1, - output_dir='output', - visualization=False): - ``` - - 人像分割 API - - - **参数** - * images (list[np.ndarray]) : 输入图像数据列表(BGR) - * paths (list[str]) : 输入图像路径列表 - * batch_size (int) : 数据批大小 - * output_dir (str) : 可视化图像输出目录 - * visualization (bool) : 是否可视化 - - - **返回** - * results (list[dict{"mask":np.ndarray,"result":np.ndarray}]): 输出图像数据列表 +- ### 2、API + +```python +def Segmentation( + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False): +``` +- 人像分割 API + +- **参数** + * images (list[np.ndarray]) : 输入图像数据列表(BGR) + * paths (list[str]) : 输入图像路径列表 + * batch_size (int) : 数据批大小 + * output_dir (str) : 可视化图像输出目录 + * visualization (bool) : 是否可视化 + +- **返回** + * results (list[dict{"mask":np.ndarray,"result":np.ndarray}]): 输出图像数据列表 ## 四、更新历史 diff --git a/modules/image/semantic_segmentation/ExtremeC3_Portrait_Segmentation/README_en.md b/modules/image/semantic_segmentation/ExtremeC3_Portrait_Segmentation/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..15ac80e05b615611a75591b5e1e6d42a66521564 --- /dev/null +++ b/modules/image/semantic_segmentation/ExtremeC3_Portrait_Segmentation/README_en.md @@ -0,0 +1,89 @@ +# ExtremeC3_Portrait_Segmentation + +|Module Name|ExtremeC3_Portrait_Segmentation| +| :--- | :---: | +|Category|image segmentation| +|Network |ExtremeC3| +|Dataset|EG1800, Baidu fashion dataset| +|Fine-tuning supported or not|No| +|Module Size|0.038MB| +|Data indicators|-| +|Latest update date|2021-02-26| + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+
+

+ + +- ### Module Introduction + * ExtremeC3_Portrait_Segmentation is a light weigth module based on ExtremeC3 to achieve portrait segmentation. + + * For more information, please refer to: [ExtremeC3_Portrait_Segmentation](https://github.com/clovaai/ext_portrait_segmentation). + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install ExtremeC3_Portrait_Segmentation + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + ```python + import cv2 + import paddlehub as hub + + model = hub.Module(name='ExtremeC3_Portrait_Segmentation') + + result = model.Segmentation( + images=[cv2.imread('/PATH/TO/IMAGE')], + paths=None, + batch_size=1, + output_dir='output', + visualization=False) + ``` + +- ### 2、API + + ```python + def Segmentation( + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False): + ``` + - Prediction API, used for portrait segmentation. + + - **Parameter** + * images (list[np.ndarray]) : image data, ndarray.shape is in the format [H, W, C], BGR; + * paths (list[str]) :image path + * batch_size (int) : batch size + * output_dir (str) : save path of images, 'output' by default. + * visualization (bool) : whether to save the segmentation results as picture files. + - **Return** + * results (list[dict{"mask":np.ndarray,"result":np.ndarray}]): list of recognition results. + +## IV. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README.md b/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README.md index 7d71dcfb3686ad0a2ba01a067e92514e6c07b378..a892e978fbc958e3bef8fa1bc90dd3d8cc728778 100644 --- a/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README.md +++ b/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README.md @@ -43,7 +43,7 @@ ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 - ```python import paddlehub as hub @@ -91,4 +91,4 @@ - ```shell $ hub install FCN_HRNet_W18_Face_Seg==1.0.0 - ``` + ``` diff --git a/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README_en.md b/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..1470c6ef5665e4fcfd12d290646e980356b659f9 --- /dev/null +++ b/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README_en.md @@ -0,0 +1,93 @@ +# FCN_HRNet_W18_Face_Seg + +|Module Name|FCN_HRNet_W18_Face_Seg| +| :--- | :---: | +|Category|image segmentation| +|Network|FCN_HRNet_W18| +|Dataset|-| +|Fine-tuning supported or not|No| +|Module Size|56MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+
+

+ + +- ### Module Introduction + + - This module is based on FCN_HRNet_W18 model, and can be used to segment face region. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install FCN_HRNet_W18_Face_Seg + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="FCN_HRNet_W18_Face_Seg") + result = model.Segmentation(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.Segmentation(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def Segmentation(images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False): + ``` + + - Face segmentation API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - paths (list[str]): image path; + - batch_size (int): the size of batch; + - output_dir (str): save path of images; + - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + - res (list\[numpy.ndarray\]): result list,ndarray.shape is \[H, W, C\] + + + + +## IV.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install FCN_HRNet_W18_Face_Seg==1.0.0 + ``` diff --git a/modules/image/semantic_segmentation/Pneumonia_CT_LKM_PP/README_en.md b/modules/image/semantic_segmentation/Pneumonia_CT_LKM_PP/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..397441dfd52a074a9e9ca9775a0b54f98d02027e --- /dev/null +++ b/modules/image/semantic_segmentation/Pneumonia_CT_LKM_PP/README_en.md @@ -0,0 +1,91 @@ +# Pneumonia_CT_LKM_PP + +|Module Name|Pneumonia_CT_LKM_PP| +| :--- | :---: | +|Category|Image segmentation| +|Network |-| +|Dataset|-| +|Fine-tuning supported or not|No| +|Module Size|35M| +|Data indicators|-| +|Latest update date|2021-02-26| + + +## I. Basic Information + + +- ### Module Introduction + + - Pneumonia CT analysis model (Pneumonia-CT-LKM-PP) can efficiently complete the detection of lesions and outline the patient's CT images. Through post-processing codes, the number, volume, and lesions of lung lesions can be analyzed. This model has been fully trained by high-resolution and low-resolution CT image data, which can adapt to the examination data collected by different levels of CT imaging equipment. + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install Pneumonia_CT_LKM_PP==1.0.0 + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + ```python + import paddlehub as hub + + pneumonia = hub.Module(name="Pneumonia_CT_LKM_PP") + + input_only_lesion_np_path = "/PATH/TO/ONLY_LESION_NP" + input_both_lesion_np_path = "/PATH/TO/LESION_NP" + input_both_lung_np_path = "/PATH/TO/LUNG_NP" + + # set input dict + input_dict = {"image_np_path": [ + [input_only_lesion_np_path], + [input_both_lesion_np_path, input_both_lung_np_path], + ]} + + # execute predict and print the result + results = pneumonia.segmentation(data=input_dict) + for result in results: + print(result) + + ``` + + +- ### 2、API + + - ```python + def segmentation(data) + ``` + + - Prediction API, used for CT analysis of pneumonia. + + - **Parameter** + + * data (dict): key is "image_np_path", value is the list of results which contains lesion and lung segmentation masks. + + + - **Return** + + * result (list\[dict\]): the list of recognition results, where each element is dict and each field is: + * input_lesion_np_path: input path of lesion. + * output_lesion_np: segmentation result path of lesion. + * input_lung_np_path: input path of lung. + * output_lung_np:segmentation result path of lung. + + +## IV. Release Note + +* 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/Pneumonia_CT_LKM_PP_lung/README_en.md b/modules/image/semantic_segmentation/Pneumonia_CT_LKM_PP_lung/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..067ab57f3f189b02df2f578d5e58d77acb9e9620 --- /dev/null +++ b/modules/image/semantic_segmentation/Pneumonia_CT_LKM_PP_lung/README_en.md @@ -0,0 +1,91 @@ +# Pneumonia_CT_LKM_PP_lung + +|Module Name|Pneumonia_CT_LKM_PP_lung| +| :--- | :---: | +|Category|Image segmentation| +|Network |-| +|Dataset|-| +|Fine-tuning supported or not|No| +|Module Size|35M| +|Data indicators|-| +|Latest update date|2021-02-26| + + +## I. Basic Information + + +- ### Module Introduction + + - Pneumonia CT analysis model (Pneumonia-CT-LKM-PP) can efficiently complete the detection of lesions and outline the patient's CT images. Through post-processing codes, the number, volume, and lesions of lung lesions can be analyzed. This model has been fully trained by high-resolution and low-resolution CT image data, which can adapt to the examination data collected by different levels of CT imaging equipment. (This module is a submodule of Pneumonia_CT_LKM_PP.) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install Pneumonia_CT_LKM_PP_lung==1.0.0 + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + ```python + import paddlehub as hub + + pneumonia = hub.Module(name="Pneumonia_CT_LKM_PP_lung") + + input_only_lesion_np_path = "/PATH/TO/ONLY_LESION_NP" + input_both_lesion_np_path = "/PATH/TO/LESION_NP" + input_both_lung_np_path = "/PATH/TO/LUNG_NP" + + # set input dict + input_dict = {"image_np_path": [ + [input_only_lesion_np_path], + [input_both_lesion_np_path, input_both_lung_np_path], + ]} + + # execute predict and print the result + results = pneumonia.segmentation(data=input_dict) + for result in results: + print(result) + + ``` + + +- ### 2、API + + - ```python + def segmentation(data) + ``` + + - Prediction API, used for CT analysis of pneumonia. + + - **Parameter** + + * data (dict): Key is "image_np_path", value is the list of results which contains lesion and lung segmentation masks. + + + - **Return** + + * result (list\[dict\]): The list of recognition results, where each element is dict and each field is: + * input_lesion_np_path: Input path of lesion. + * output_lesion_np: Segmentation result path of lesion. + * input_lung_np_path: Input path of lung. + * output_lung_np: Segmentation result path of lung. + + +## IV. Release Note + +* 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/U2Net/README.md b/modules/image/semantic_segmentation/U2Net/README.md index bedd1cc65feebb68d754814eecbbbb03d35397bf..535b8fc426f37cdc87fe0168ab4025cd44a150b7 100644 --- a/modules/image/semantic_segmentation/U2Net/README.md +++ b/modules/image/semantic_segmentation/U2Net/README.md @@ -43,7 +43,7 @@ | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 ```python import cv2 diff --git a/modules/image/semantic_segmentation/U2Net/README_en.md b/modules/image/semantic_segmentation/U2Net/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..4cea82d051779812790eb09f8571fc8d7a5b8d01 --- /dev/null +++ b/modules/image/semantic_segmentation/U2Net/README_en.md @@ -0,0 +1,96 @@ +# U2Net + +|Module Name |U2Net| +| :--- | :---: | +|Category |Image segmentation| +|Network |U^2Net| +|Dataset|-| +|Fine-tuning supported or not|No| +|Module Size |254MB| +|Data indicators|-| +|Latest update date|2021-02-26| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: + +

+ +

+ + +- ### Module Introduction + + - Network architecture: +

+
+

+ + - For more information, please refer to: [U2Net](https://github.com/xuebinqin/U-2-Net) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + - paddlehub >= 2.0.0 + +- ### 2、Installation + - ```shell + $ hub install U2Net + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + ```python + import cv2 + import paddlehub as hub + + model = hub.Module(name='U2Net') + + result = model.Segmentation( + images=[cv2.imread('/PATH/TO/IMAGE')], + paths=None, + batch_size=1, + input_size=320, + output_dir='output', + visualization=True) + ``` + - ### 2、API + + ```python + def Segmentation( + images=None, + paths=None, + batch_size=1, + input_size=320, + output_dir='output', + visualization=False): + ``` + - Prediction API, obtaining segmentation result. + + - **Parameter** + * images (list[np.ndarray]) : Image data, ndarray.shape is in the format [H, W, C], BGR. + * paths (list[str]) : Image path. + * batch_size (int) : Batch size. + * input_size (int) : Input image size, default is 320. + * output_dir (str) : Save path of images, 'output' by default. + * visualization (bool) : Whether to save the results as picture files. + + - **Return** + * results (list[np.ndarray]): The list of segmentation results. + +## IV. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/U2Netp/README.md b/modules/image/semantic_segmentation/U2Netp/README.md index b476a9f35007e4a74398d95998c4998f6d2c2c13..267409a5d414a59eee45b04e6d5ef63d430607a1 100644 --- a/modules/image/semantic_segmentation/U2Netp/README.md +++ b/modules/image/semantic_segmentation/U2Netp/README.md @@ -47,7 +47,7 @@ | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 ```python import cv2 diff --git a/modules/image/semantic_segmentation/U2Netp/README_en.md b/modules/image/semantic_segmentation/U2Netp/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..ffb0bac24f0d46294b94b7e65fad784c73a43854 --- /dev/null +++ b/modules/image/semantic_segmentation/U2Netp/README_en.md @@ -0,0 +1,96 @@ +# U2Netp + +|Module Name |U2Netp| +| :--- | :---: | +|Category |Image segmentation| +|Network |U^2Net| +|Dataset|-| +|Fine-tuning supported or not|No| +|Module Size |6.7MB| +|Data indicators|-| +|Latest update date|2021-02-26| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: + +

+ +

+ + +- ### Module Introduction + + - Network architecture: +

+
+

+ + - For more information, please refer to: [U2Net](https://github.com/xuebinqin/U-2-Net) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + - paddlehub >= 2.0.0 + +- ### 2、Installation + - ```shell + $ hub install U2Netp + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + ```python + import cv2 + import paddlehub as hub + + model = hub.Module(name='U2Netp') + + result = model.Segmentation( + images=[cv2.imread('/PATH/TO/IMAGE')], + paths=None, + batch_size=1, + input_size=320, + output_dir='output', + visualization=True) + ``` + - ### 2、API + + ```python + def Segmentation( + images=None, + paths=None, + batch_size=1, + input_size=320, + output_dir='output', + visualization=False): + ``` + - Prediction API, obtaining segmentation result. + + - **Parameter** + * images (list[np.ndarray]) : Image data, ndarray.shape is in the format [H, W, C], BGR. + * paths (list[str]) : Image path. + * batch_size (int) : Batch size. + * input_size (int) : Input image size, default is 320. + * output_dir (str) : Save path of images, 'output' by default. + * visualization (bool) : Whether to save the results as picture files. + + - **Return** + * results (list[np.ndarray]): The list of segmentation results. + +## IV. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/ace2p/README.md b/modules/image/semantic_segmentation/ace2p/README.md index 710c2424a45298d86b1486afbf751eb874ae4764..12b23cf4f1beed338058a89e64a0ac1d854e3892 100644 --- a/modules/image/semantic_segmentation/ace2p/README.md +++ b/modules/image/semantic_segmentation/ace2p/README.md @@ -57,10 +57,10 @@ - ### 1、命令行预测 ```shell - $ hub install ace2p==1.1.0 + $ hub run ace2p --input_path "/PATH/TO/IMAGE" ``` - - ### 2、代码示例 + - ### 2、预测代码示例 ```python import paddlehub as hub @@ -70,49 +70,49 @@ result = human_parser.segmentation(images=[cv2.imread('/PATH/TO/IMAGE')]) ``` - - ### 3、API + - ### 3、API - ```python - def segmentation(images=None, - paths=None, - batch_size=1, - use_gpu=False, - output_dir='ace2p_output', - visualization=False): - ``` + ```python + def segmentation(images=None, + paths=None, + batch_size=1, + use_gpu=False, + output_dir='ace2p_output', + visualization=False): + ``` - - 预测API,用于图像分割得到人体解析。 + - 预测API,用于图像分割得到人体解析。 - - **参数** + - **参数** - * images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; - * paths (list\[str\]): 图片的路径; - * batch\_size (int): batch 的大小; - * use\_gpu (bool): 是否使用 GPU; - * output\_dir (str): 保存处理结果的文件目录; - * visualization (bool): 是否将识别结果保存为图片文件。 + * images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * paths (list\[str\]): 图片的路径; + * batch\_size (int): batch 的大小; + * use\_gpu (bool): 是否使用 GPU; + * output\_dir (str): 保存处理结果的文件目录; + * visualization (bool): 是否将识别结果保存为图片文件。 - - **返回** + - **返回** - * res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有'path', 'data',相应的取值为: - * path (str): 原输入图片的路径; - * data (numpy.ndarray): 图像分割得到的结果,shape 为`H * W`,元素的取值为0-19,表示每个像素的分类结果,映射顺序与下面的调色板相同。 + * res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有'path', 'data',相应的取值为: + * path (str): 原输入图片的路径; + * data (numpy.ndarray): 图像分割得到的结果,shape 为`H * W`,元素的取值为0-19,表示每个像素的分类结果,映射顺序与下面的调色板相同。 - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) - ``` + ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` - - 将模型保存到指定路径。 + - 将模型保存到指定路径。 - - **参数** + - **参数** - * dirname: 存在模型的目录名称 - * model\_filename: 模型文件名称,默认为\_\_model\_\_ - * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) - * combined: 是否将参数保存到统一的一个文件中。 + * dirname: 存在模型的目录名称 + * model\_filename: 模型文件名称,默认为\_\_model\_\_ + * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) + * combined: 是否将参数保存到统一的一个文件中。 ## 四、服务部署 diff --git a/modules/image/semantic_segmentation/ace2p/README_en.md b/modules/image/semantic_segmentation/ace2p/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..3fa0c273e3b3095ce8ba7b8abf97543e3be6ca48 --- /dev/null +++ b/modules/image/semantic_segmentation/ace2p/README_en.md @@ -0,0 +1,184 @@ +# ace2p + +|Module Name|ace2p| +| :--- | :---: | +|Category|Image segmentation| +|Network|ACE2P| +|Dataset|LIP| +|Fine-tuning supported or not|No| +|Module Size|259MB| +|Data indicators|-| +|Latest update date |2021-02-26| + + +## I. Basic Information + +- ### Application Effect Display + + - Network architecture: +

+
+

+ + - Color palette + +

+
+

+ + - Sample results: +

+ +

+ +- ### Module Introduction + + - Human Parsing is a fine-grained semantic segmentation task that aims to identify the components (for example, body parts and clothing) of a human image at the pixel level. The PaddleHub Module uses ResNet101 as the backbone network, and accepts input image sizes of 473x473x3. + + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install ace2p + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run ace2p --input_path "/PATH/TO/IMAGE" + ``` + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + human_parser = hub.Module(name="ace2p") + result = human_parser.segmentation(images=[cv2.imread('/PATH/TO/IMAGE')]) + ``` + +- ### 3、API + + - ```python + def segmentation(images=None, + paths=None, + batch_size=1, + use_gpu=False, + output_dir='ace2p_output', + visualization=False): + ``` + + - Prediction API, used for human parsing. + + - **Parameter** + + * images (list\[numpy.ndarray\]): Image data, ndarray.shape is in the format [H, W, C], BGR. + * paths (list\[str\]): Image path. + * batch\_size (int): Batch size. + * use\_gpu (bool): Use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + * output\_dir (str): Save path of output, default is 'ace2p_output'. + * visualization (bool): Whether to save the recognition results as picture files. + + - **Return** + + * res (list\[dict\]): The list of recognition results, where each element is dict and each field is: + * save\_path (str, optional): Save path of the result. + * data (numpy.ndarray): The result of portrait segmentation. + + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + + - Save the model to the specified path. + + - **Parameters** + * dirname: Save path. + * model\_filename: mMdel file name,defalt is \_\_model\_\_ + * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) + * combined: Whether to save the parameters to a unified file. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of human parsing + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m ace2p + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + + - ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ace2p" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(base64_to_cv2(r.json()["results"][0]['data'])) + ``` + + +## 五、更新历史 + +- 1.0.0 + + First release + +* 1.1.0 + + Adapt to paddlehub2.0 diff --git a/modules/image/semantic_segmentation/bisenet_lane_segmentation/README.md b/modules/image/semantic_segmentation/bisenet_lane_segmentation/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b9814fe7bb98ca34f13b0a94741a57d365ed035c --- /dev/null +++ b/modules/image/semantic_segmentation/bisenet_lane_segmentation/README.md @@ -0,0 +1,151 @@ +# bisenet_lane_segmentation + +|模型名称|bisenet_lane_segmentation| +| :--- | :---: | +|类别|图像-图像分割| +|网络|bisenet| +|数据集|TuSimple| +|是否支持Fine-tuning|否| +|模型大小|9.7MB| +|指标|ACC96.09%| +|最新更新日期|2021-12-03| + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 样例结果示例(左为原图,右为效果图): +

+ + +

+ +- ### 模型介绍 + + - 车道线分割是自动驾驶算法的一个范畴,可以用来辅助进行车辆定位和进行决策,早期已有基于传统图像处理的车道线检测方法,但是随着技术的演进,车道线检测任务所应对的场景越来越多样化,目前更多的方式是寻求在语义上对车道线存在位置的检测。bisenet_lane_segmentation是一个轻量化车道线分割模型。 + + - 更多详情请参考:[bisenet_lane_segmentation](https://github.com/PaddlePaddle/PaddleSeg) + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 + + - paddleseg >= 2.3.0 + + - Python >= 3.7+ + + +- ### 2、安装 + + - ```shell + $ hub install bisenet_lane_segmentation + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run bisenet_lane_segmentation --input_path "/PATH/TO/IMAGE" + ``` + + - 通过命令行方式实现hub模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="bisenet_lane_segmentation") + result = model.predict(image_list=["/PATH/TO/IMAGE"]) + print(result) + ``` +- ### 3、API + + - ```python + def predict(self, + image_list, + visualization, + save_path): + ``` + + - 车道线分割预测API,用于将输入图片中的车道线分割出来。 + + - 参数 + + - image_list (list(str | numpy.ndarray)):图片输入路径或者BGR格式numpy数据。 + - visualization (bool): 是否进行可视化,默认为False。 + - save_path (str): 当visualization为True时,保存图片的路径,默认为"bisenet_lane_segmentation_output"。 + + - 返回 + + - result (list(numpy.ndarray)):模型分割结果: + + +## 四、服务部署 + +- PaddleHub Serving可以部署车道线分割在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m bisenet_lane_segmentation + ``` + + - 这样就完成了一个车道线分割在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/bisenet_lane_segmentation" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + #print(r.json()) + mask = base64_to_cv2(r.json()["results"]['data'][0]) + print(mask) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + diff --git a/modules/image/semantic_segmentation/bisenet_lane_segmentation/README_en.md b/modules/image/semantic_segmentation/bisenet_lane_segmentation/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..8e6364bc34e44465d6ece095184f7eb1d8cedcd4 --- /dev/null +++ b/modules/image/semantic_segmentation/bisenet_lane_segmentation/README_en.md @@ -0,0 +1,154 @@ +# bisenet_lane_segmentation + +|Module Name|bisenet_lane_segmentation| +| :--- | :---: | +|Category|Image Segmentation| +|Network|bisenet| +|Dataset|TuSimple| +|Support Fine-tuning|No| +|Module Size|9.7MB| +|Data Indicators|ACC96.09%| +|Latest update date|2021-12-03| + + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ + +

+ +- ### Module Introduction + + - Lane segmentation is a category of automatic driving algorithms, which can be used to assist vehicle positioning and decision-making. In the early days, there were lane detection methods based on traditional image processing, but with the evolution of technology, the scenes that lane detection tasks deal with More and more diversified, and more methods are currently seeking to detect the location of lane semantically. bisenet_lane_segmentation is a lightweight model for lane segmentation. + + + + - For more information, please refer to: [bisenet_lane_segmentation](https://github.com/PaddlePaddle/PaddleSeg) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 + + - paddleseg >= 2.3.0 + + - Python >= 3.7+ + + +- ### 2、Installation + + - ```shell + $ hub install bisenet_lane_segmentation + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run bisenet_lane_segmentation --input_path "/PATH/TO/IMAGE" + ``` + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="bisenet_lane_segmentation") + result = model.predict(image_list=["/PATH/TO/IMAGE"]) + print(result) + + ``` +- ### 3、API + + - ```python + def predict(self, + image_list, + visualization, + save_path): + ``` + + - Prediction API for lane segmentation. + + - **Parameter** + + - image_list (list(str | numpy.ndarray)): Image path or image data, ndarray.shape is in the format \[H, W, C\],BGR. + - visualization (bool): Whether to save the recognition results as picture files, default is False. + - save_path (str): Save path of images, "bisenet_lane_segmentation_output" by default. + + - **Return** + + - result (list(numpy.ndarray)):The list of model results. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of lane segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m bisenet_lane_segmentation + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/bisenet_lane_segmentation" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + #print(r.json()) + mask = base64_to_cv2(r.json()["results"]['data'][0]) + print(mask) + ``` + +## V. Release Note + +- 1.0.0 + + First release \ No newline at end of file diff --git a/modules/image/semantic_segmentation/bisenet_lane_segmentation/lane_processor/get_lane_coords.py b/modules/image/semantic_segmentation/bisenet_lane_segmentation/lane_processor/get_lane_coords.py new file mode 100644 index 0000000000000000000000000000000000000000..868f0bcc37ed850c90c6bec0616ac4e0b929b30f --- /dev/null +++ b/modules/image/semantic_segmentation/bisenet_lane_segmentation/lane_processor/get_lane_coords.py @@ -0,0 +1,156 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# this code is based on +# https://github.com/ZJULearning/resa/blob/main/datasets/tusimple.py + +import cv2 +import numpy as np + + +class LaneProcessor: + def __init__(self, + num_classes=2, + ori_shape=(720, 1280), + cut_height=0, + y_pixel_gap=10, + points_nums=56, + thresh=0.6, + smooth=True): + super(LaneProcessor, self).__init__() + self.num_classes = num_classes + self.ori_shape = ori_shape + self.cut_height = cut_height + self.y_pixel_gap = y_pixel_gap + self.points_nums = points_nums + self.thresh = thresh + self.smooth = smooth + + def get_lane_coords(self, seg_pred): + lane_coords_list = [] + for batch in range(len(seg_pred)): + seg = seg_pred[batch] + lane_coords = self.heatmap2coords(seg) + for i in range(len(lane_coords)): + lane_coords[i] = sorted( + lane_coords[i], key=lambda pair: pair[1]) + lane_coords_list.append(lane_coords) + return lane_coords_list + + def process_gap(self, coordinate): + if any(x > 0 for x in coordinate): + start = [i for i, x in enumerate(coordinate) if x > 0][0] + end = [ + i for i, x in reversed(list(enumerate(coordinate))) if x > 0 + ][0] + lane = coordinate[start:end + 1] + # The line segment is not continuous + if any(x < 0 for x in lane): + gap_start = [ + i for i, x in enumerate(lane[:-1]) + if x > 0 and lane[i + 1] < 0 + ] + gap_end = [ + i + 1 for i, x in enumerate(lane[:-1]) + if x < 0 and lane[i + 1] > 0 + ] + gap_id = [i for i, x in enumerate(lane) if x < 0] + if len(gap_start) == 0 or len(gap_end) == 0: + return coordinate + for id in gap_id: + for i in range(len(gap_start)): + if i >= len(gap_end): + return coordinate + if id > gap_start[i] and id < gap_end[i]: + gap_width = float(gap_end[i] - gap_start[i]) + # line interpolation + lane[id] = int((id - gap_start[i]) / gap_width * + lane[gap_end[i]] + + (gap_end[i] - id) / gap_width * + lane[gap_start[i]]) + if not all(x > 0 for x in lane): + print("Gaps still exist!") + coordinate[start:end + 1] = lane + return coordinate + + def get_coords(self, heat_map): + dst_height = self.ori_shape[0] - self.cut_height + coords = np.zeros(self.points_nums) + coords[:] = -2 + pointCount = 0 + for i in range(self.points_nums): + y_coord = dst_height - 10 - i * self.y_pixel_gap + y = int(y_coord / dst_height * heat_map.shape[0]) + if y < 0: + break + prob_line = heat_map[y, :] + x = np.argmax(prob_line) + prob = prob_line[x] + if prob > self.thresh: + coords[i] = int(x / heat_map.shape[1] * self.ori_shape[1]) + pointCount = pointCount + 1 + if pointCount < 2: + coords[:] = -2 + self.process_gap(coords) + return coords + + def fix_outliers(self, coords): + data = [x for i, x in enumerate(coords) if x > 0] + index = [i for i, x in enumerate(coords) if x > 0] + if len(data) == 0: + return coords + diff = [] + is_outlier = False + n = 1 + x_gap = abs((data[-1] - data[0]) / (1.0 * (len(data) - 1))) + for idx, dt in enumerate(data): + if is_outlier == False: + t = idx - 1 + n = 1 + if idx == 0: + diff.append(0) + else: + diff.append(abs(data[idx] - data[t])) + if abs(data[idx] - data[t]) > n * (x_gap * 1.5): + n = n + 1 + is_outlier = True + ind = index[idx] + coords[ind] = -1 + else: + is_outlier = False + + def heatmap2coords(self, seg_pred): + coordinates = [] + for i in range(self.num_classes - 1): + heat_map = seg_pred[i + 1] + if self.smooth: + heat_map = cv2.blur( + heat_map, (9, 9), borderType=cv2.BORDER_REPLICATE) + coords = self.get_coords(heat_map) + indexes = [i for i, x in enumerate(coords) if x > 0] + if not indexes: + continue + self.add_coords(coordinates, coords) + + if len(coordinates) == 0: + coords = np.zeros(self.points_nums) + self.add_coords(coordinates, coords) + return coordinates + + def add_coords(self, coordinates, coords): + sub_lanes = [] + for j in range(self.points_nums): + y_lane = self.ori_shape[0] - 10 - j * self.y_pixel_gap + x_lane = coords[j] if coords[j] > 0 else -2 + sub_lanes.append([x_lane, y_lane]) + coordinates.append(sub_lanes) diff --git a/modules/image/semantic_segmentation/bisenet_lane_segmentation/lane_processor/lane.py b/modules/image/semantic_segmentation/bisenet_lane_segmentation/lane_processor/lane.py new file mode 100644 index 0000000000000000000000000000000000000000..8a7a481570e993810079445a7f54a70bd2e41c57 --- /dev/null +++ b/modules/image/semantic_segmentation/bisenet_lane_segmentation/lane_processor/lane.py @@ -0,0 +1,141 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# this code is from https://github.com/TuSimple/tusimple-benchmark/blob/master/evaluate/lane.py + +import json as json +import numpy as np +from sklearn.linear_model import LinearRegression + + +class LaneEval(object): + lr = LinearRegression() + pixel_thresh = 20 + pt_thresh = 0.85 + + @staticmethod + def get_angle(xs, y_samples): + xs, ys = xs[xs >= 0], y_samples[xs >= 0] + if len(xs) > 1: + LaneEval.lr.fit(ys[:, None], xs) + k = LaneEval.lr.coef_[0] + theta = np.arctan(k) + else: + theta = 0 + return theta + + @staticmethod + def line_accuracy(pred, gt, thresh): + pred = np.array([p if p >= 0 else -100 for p in pred]) + gt = np.array([g if g >= 0 else -100 for g in gt]) + return np.sum(np.where(np.abs(pred - gt) < thresh, 1., 0.)) / len(gt) + + @staticmethod + def bench(pred, gt, y_samples, running_time): + if any(len(p) != len(y_samples) for p in pred): + raise Exception('Format of lanes error.') + if running_time > 200 or len(gt) + 2 < len(pred): + return 0., 0., 1. + angles = [ + LaneEval.get_angle(np.array(x_gts), np.array(y_samples)) + for x_gts in gt + ] + threshs = [LaneEval.pixel_thresh / np.cos(angle) for angle in angles] + line_accs = [] + fp, fn = 0., 0. + matched = 0. + for x_gts, thresh in zip(gt, threshs): + accs = [ + LaneEval.line_accuracy( + np.array(x_preds), np.array(x_gts), thresh) + for x_preds in pred + ] + max_acc = np.max(accs) if len(accs) > 0 else 0. + if max_acc < LaneEval.pt_thresh: + fn += 1 + else: + matched += 1 + line_accs.append(max_acc) + fp = len(pred) - matched + if len(gt) > 4 and fn > 0: + fn -= 1 + s = sum(line_accs) + if len(gt) > 4: + s -= min(line_accs) + return s / max(min(4.0, len(gt)), + 1.), fp / len(pred) if len(pred) > 0 else 0., fn / max( + min(len(gt), 4.), 1.) + + @staticmethod + def bench_one_submit(pred_file, gt_file): + try: + json_pred = [ + json.loads(line) for line in open(pred_file).readlines() + ] + except BaseException as e: + raise Exception('Fail to load json file of the prediction.') + json_gt = [json.loads(line) for line in open(gt_file).readlines()] + if len(json_gt) != len(json_pred): + raise Exception( + 'We do not get the predictions of all the test tasks') + gts = {l['raw_file']: l for l in json_gt} + accuracy, fp, fn = 0., 0., 0. + for pred in json_pred: + if 'raw_file' not in pred or 'lanes' not in pred or 'run_time' not in pred: + raise Exception( + 'raw_file or lanes or run_time not in some predictions.') + raw_file = pred['raw_file'] + pred_lanes = pred['lanes'] + run_time = pred['run_time'] + if raw_file not in gts: + raise Exception( + 'Some raw_file from your predictions do not exist in the test tasks.' + ) + gt = gts[raw_file] + gt_lanes = gt['lanes'] + y_samples = gt['h_samples'] + try: + a, p, n = LaneEval.bench(pred_lanes, gt_lanes, y_samples, + run_time) + except BaseException as e: + raise Exception('Format of lanes error.') + accuracy += a + fp += p + fn += n + num = len(gts) + # the first return parameter is the default ranking parameter + return json.dumps([{ + 'name': 'Accuracy', + 'value': accuracy / num, + 'order': 'desc' + }, { + 'name': 'FP', + 'value': fp / num, + 'order': 'asc' + }, { + 'name': 'FN', + 'value': fn / num, + 'order': 'asc' + }]), accuracy / num, fp / num, fn / num + + +if __name__ == '__main__': + import sys + + try: + if len(sys.argv) != 3: + raise Exception('Invalid input arguments') + print(LaneEval.bench_one_submit(sys.argv[1], sys.argv[2])) + except Exception as e: + print(e.message) + sys.exit(e.message) diff --git a/modules/image/semantic_segmentation/bisenet_lane_segmentation/lane_processor/tusimple_processor.py b/modules/image/semantic_segmentation/bisenet_lane_segmentation/lane_processor/tusimple_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..6fa7fc55d2513e5bd2c4edeb78f761a8882466b2 --- /dev/null +++ b/modules/image/semantic_segmentation/bisenet_lane_segmentation/lane_processor/tusimple_processor.py @@ -0,0 +1,125 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import cv2 +import json +import paddle.nn as nn + +from .lane import LaneEval +from .get_lane_coords import LaneProcessor + + +def mkdir(path): + sub_dir = os.path.dirname(path) + if not os.path.exists(sub_dir): + os.makedirs(sub_dir) + + +class TusimpleProcessor: + def __init__(self, + num_classes=2, + ori_shape=(720, 1280), + cut_height=0, + thresh=0.6, + test_gt_json=None, + save_dir='output/'): + super(TusimpleProcessor, self).__init__() + self.num_classes = num_classes + self.dump_to_json = [] + self.save_dir = save_dir + self.test_gt_json = test_gt_json + self.color_map = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), + (255, 0, 255), (0, 255, 125), (50, 100, 50), + (100, 50, 100)] + self.laneProcessor = LaneProcessor( + num_classes=self.num_classes, + ori_shape=ori_shape, + cut_height=cut_height, + y_pixel_gap=10, + points_nums=56, + thresh=thresh, + smooth=True) + + def dump_data_to_json(self, + output, + im_path, + run_time=0, + is_dump_json=True, + is_view=False): + seg_pred = output[0] + seg_pred = nn.functional.softmax(seg_pred, axis=1) + seg_pred = seg_pred.numpy() + lane_coords_list = self.laneProcessor.get_lane_coords(seg_pred) + + for batch in range(len(seg_pred)): + lane_coords = lane_coords_list[batch] + path_list = im_path[batch].split("/") + if is_dump_json: + json_pred = {} + json_pred['lanes'] = [] + json_pred['run_time'] = run_time * 1000 + json_pred['h_sample'] = [] + + json_pred['raw_file'] = os.path.join(*path_list[-4:]) + for l in lane_coords: + if len(l) == 0: + continue + json_pred['lanes'].append([]) + for (x, y) in l: + json_pred['lanes'][-1].append(int(x)) + for (x, y) in lane_coords[0]: + json_pred['h_sample'].append(y) + self.dump_to_json.append(json.dumps(json_pred)) + + if is_view: + img = cv2.imread(im_path[batch]) + if is_dump_json: + img_name = '_'.join([x for x in path_list[-4:]]) + sub_dir = 'visual_eval' + else: + img_name = os.path.basename(im_path[batch]) + sub_dir = 'visual_points' + saved_path = os.path.join(self.save_dir, sub_dir, img_name) + self.draw(img, lane_coords, saved_path) + + def predict(self, output, im_path): + self.dump_data_to_json( + output, [im_path], is_dump_json=False, is_view=True) + + def bench_one_submit(self): + output_file = os.path.join(self.save_dir, 'pred.json') + if output_file is not None: + mkdir(output_file) + with open(output_file, "w+") as f: + for line in self.dump_to_json: + print(line, end="\n", file=f) + + eval_rst, acc, fp, fn = LaneEval.bench_one_submit( + output_file, self.test_gt_json) + self.dump_to_json = [] + return acc, fp, fn, eval_rst + + def draw(self, img, coords, file_path=None): + for i, coord in enumerate(coords): + for x, y in coord: + if x <= 0 or y <= 0: + continue + cv2.circle(img, (int(x), int(y)), 4, + self.color_map[i % self.num_classes], 2) + + if file_path is not None: + mkdir(file_path) + cv2.imwrite(file_path, img) diff --git a/modules/image/semantic_segmentation/bisenet_lane_segmentation/module.py b/modules/image/semantic_segmentation/bisenet_lane_segmentation/module.py new file mode 100644 index 0000000000000000000000000000000000000000..29dcb93d36f994c831e5ee5a982bb06affc8193f --- /dev/null +++ b/modules/image/semantic_segmentation/bisenet_lane_segmentation/module.py @@ -0,0 +1,165 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time +import argparse +import os +from typing import Union, List, Tuple + +import cv2 +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo, runnable, serving +import paddleseg.transforms as T +from paddleseg.utils import logger, progbar, visualize +from paddlehub.module.cv_module import ImageSegmentationModule +import paddleseg.utils as utils +from paddleseg.models import layers +from paddleseg.models import BiSeNetV2 + +from bisenet_lane_segmentation.processor import Crop, reverse_transform, cv2_to_base64, base64_to_cv2 +from bisenet_lane_segmentation.lane_processor.tusimple_processor import TusimpleProcessor + +@moduleinfo( + name="bisenet_lane_segmentation", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="BiSeNetLane is a lane segmentation model.", + version="1.0.0") +class BiSeNetLane(nn.Layer): + """ + The BiSeNetLane use BiseNet V2 to process lane segmentation . + + Args: + num_classes (int): The unique number of target classes. + lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 7, + lambd: float = 0.25, + align_corners: bool = False, + pretrained: str = None): + super(BiSeNetLane, self).__init__() + + self.net = BiSeNetV2( + num_classes=num_classes, + lambd=lambd, + align_corners=align_corners, + pretrained=None) + + self.transforms = [Crop(up_h_off=160), T.Resize([640, 368]), T.Normalize()] + self.cut_height = 160 + self.postprocessor = TusimpleProcessor(num_classes=7, cut_height=160,) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + logit_list = self.net(x) + return logit_list + + def predict(self, image_list: list, visualization: bool = False, save_path: str = "bisenet_lane_segmentation_output") -> List[np.ndarray]: + self.eval() + result = [] + with paddle.no_grad(): + for i, im in enumerate(image_list): + if isinstance(im, str): + im = cv2.imread(im) + + ori_shape = im.shape[:2] + for op in self.transforms: + outputs = op(im) + im = outputs[0] + + im = np.transpose(im, (2, 0, 1)) + im = im[np.newaxis, ...] + im = paddle.to_tensor(im) + logit = self.forward(im)[0] + pred = reverse_transform(logit, ori_shape, self.transforms, mode='bilinear') + pred = paddle.argmax(pred, axis=1, keepdim=True, dtype='int32') + pred = paddle.squeeze(pred[0]) + pred = pred.numpy().astype('uint8') + if visualization: + color_map = visualize.get_color_map_list(256) + pred_mask = visualize.get_pseudo_color_map(pred, color_map) + if not os.path.exists(save_path): + os.makedirs(save_path) + img_name = str(time.time()) + '.png' + image_save_path = os.path.join(save_path, img_name) + pred_mask.save(image_save_path) + result.append(pred) + return result + + @serving + def serving_method(self, images: str, **kwargs) -> dict: + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + outputs = self.predict(image_list=images_decode, **kwargs) + serving_data = [cv2_to_base64(outputs[i]) for i in range(len(outputs))] + results = {'data': serving_data} + + return results + + @runnable + def run_cmd(self, argvs: list) -> List[np.ndarray]: + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + + results = self.predict(image_list=[args.input_path], save_path=args.output_dir, visualization=args.visualization) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + + self.arg_config_group.add_argument( + '--output_dir', type=str, default="bisenet_lane_segmentation_output", help="The directory to save output images.") + self.arg_config_group.add_argument( + '--visualization', type=bool, default=True, help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + \ No newline at end of file diff --git a/modules/image/semantic_segmentation/bisenet_lane_segmentation/processor.py b/modules/image/semantic_segmentation/bisenet_lane_segmentation/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..dc1cf08804a03cef641f7620a5fa2262713cce54 --- /dev/null +++ b/modules/image/semantic_segmentation/bisenet_lane_segmentation/processor.py @@ -0,0 +1,185 @@ +import base64 +import collections.abc +from itertools import combinations +from typing import Union, List, Tuple, Callable + +import numpy as np +import cv2 +import paddle +import paddle.nn.functional as F + + +def get_reverse_list(ori_shape: list, transforms: Callable) -> list: + """ + get reverse list of transform. + + Args: + ori_shape (list): Origin shape of image. + transforms (list): List of transform. + + Returns: + list: List of tuple, there are two format: + ('resize', (h, w)) The image shape before resize, + ('padding', (h, w)) The image shape before padding. + """ + reverse_list = [] + h, w = ori_shape[0], ori_shape[1] + for op in transforms: + if op.__class__.__name__ in ['Resize']: + reverse_list.append(('resize', (h, w))) + h, w = op.target_size[0], op.target_size[1] + if op.__class__.__name__ in ['Crop']: + reverse_list.append(('crop', (op.up_h_off, op.down_h_off), + (op.left_w_off, op.right_w_off))) + h = h - op.up_h_off + h = h - op.down_h_off + w = w - op.left_w_off + w = w - op.right_w_off + if op.__class__.__name__ in ['ResizeByLong']: + reverse_list.append(('resize', (h, w))) + long_edge = max(h, w) + short_edge = min(h, w) + short_edge = int(round(short_edge * op.long_size / long_edge)) + long_edge = op.long_size + if h > w: + h = long_edge + w = short_edge + else: + w = long_edge + h = short_edge + if op.__class__.__name__ in ['ResizeByShort']: + reverse_list.append(('resize', (h, w))) + long_edge = max(h, w) + short_edge = min(h, w) + long_edge = int(round(long_edge * op.short_size / short_edge)) + short_edge = op.short_size + if h > w: + h = long_edge + w = short_edge + else: + w = long_edge + h = short_edge + if op.__class__.__name__ in ['Padding']: + reverse_list.append(('padding', (h, w))) + w, h = op.target_size[0], op.target_size[1] + if op.__class__.__name__ in ['PaddingByAspectRatio']: + reverse_list.append(('padding', (h, w))) + ratio = w / h + if ratio == op.aspect_ratio: + pass + elif ratio > op.aspect_ratio: + h = int(w / op.aspect_ratio) + else: + w = int(h * op.aspect_ratio) + if op.__class__.__name__ in ['LimitLong']: + long_edge = max(h, w) + short_edge = min(h, w) + if ((op.max_long is not None) and (long_edge > op.max_long)): + reverse_list.append(('resize', (h, w))) + long_edge = op.max_long + short_edge = int(round(short_edge * op.max_long / long_edge)) + elif ((op.min_long is not None) and (long_edge < op.min_long)): + reverse_list.append(('resize', (h, w))) + long_edge = op.min_long + short_edge = int(round(short_edge * op.min_long / long_edge)) + if h > w: + h = long_edge + w = short_edge + else: + w = long_edge + h = short_edge + return reverse_list + + +def reverse_transform(pred: paddle.Tensor, ori_shape: list, transforms: Callable, mode: str = 'nearest') -> paddle.Tensor: + """recover pred to origin shape""" + reverse_list = get_reverse_list(ori_shape, transforms) + for item in reverse_list[::-1]: + if item[0] == 'resize': + h, w = item[1][0], item[1][1] + # if paddle.get_device() == 'cpu': + # pred = paddle.cast(pred, 'uint8') + # pred = F.interpolate(pred, (h, w), mode=mode) + # pred = paddle.cast(pred, 'int32') + # else: + pred = F.interpolate(pred, (h, w), mode=mode) + elif item[0] == 'crop': + up_h_off, down_h_off = item[1][0], item[1][1] + left_w_off, right_w_off = item[2][0], item[2][1] + pred = F.pad( + pred, [left_w_off, right_w_off, up_h_off, down_h_off], + value=0, + mode='constant', + data_format="NCHW") + elif item[0] == 'padding': + h, w = item[1][0], item[1][1] + pred = pred[:, :, 0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format(item[0])) + return pred + + +class Crop: + """ + crop an image from four forwards. + + Args: + up_h_off (int, optional): The cut height for image from up to down. Default: 0. + down_h_off (int, optional): The cut height for image from down to up . Default: 0. + left_w_off (int, optional): The cut height for image from left to right. Default: 0. + right_w_off (int, optional): The cut width for image from right to left. Default: 0. + """ + + def __init__(self, up_h_off: int = 0, down_h_off: int = 0, left_w_off: int = 0, right_w_off: int = 0): + self.up_h_off = up_h_off + self.down_h_off = down_h_off + self.left_w_off = left_w_off + self.right_w_off = right_w_off + + def __call__(self, im: np.ndarray, label: np.ndarray = None) -> Tuple[np.ndarray]: + if self.up_h_off < 0 or self.down_h_off < 0 or self.left_w_off < 0 or self.right_w_off < 0: + raise Exception( + "up_h_off, down_h_off, left_w_off, right_w_off must equal or greater zero" + ) + + if self.up_h_off > 0 and self.up_h_off < im.shape[0]: + im = im[self.up_h_off:, :, :] + if label is not None: + label = label[self.up_h_off:, :] + + if self.down_h_off > 0 and self.down_h_off < im.shape[0]: + im = im[:-self.down_h_off, :, :] + if label is not None: + label = label[:-self.down_h_off, :] + + if self.left_w_off > 0 and self.left_w_off < im.shape[1]: + im = im[:, self.left_w_off:, :] + if label is not None: + label = label[:, self.left_w_off:] + + if self.right_w_off > 0 and self.right_w_off < im.shape[1]: + im = im[:, :-self.right_w_off, :] + if label is not None: + label = label[:, :-self.right_w_off] + + if label is None: + return (im, ) + else: + return (im, label) + +def cv2_to_base64(image: np.ndarray) -> str: + """ + Convert data from BGR to base64 format. + """ + data = cv2.imencode('.png', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str: str) -> np.ndarray: + """ + Convert data from base64 to BGR format. + """ + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..1afa20b09c0da5a9b051fcbfc59f8d43c52ce908 --- /dev/null +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md @@ -0,0 +1,175 @@ +# deeplabv3p_xception65_humanseg + +|Module Name |deeplabv3p_xception65_humanseg| +| :--- | :---: | +|Category|Image segmentation| +|Network|deeplabv3p| +|Dataset|Baidu self-built dataset| +|Fine-tuning supported or not|No| +|Module Size|162MB| +|Data indicators |-| +|Latest update date|2021-02-26| + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ +

+ +- ### Module Introduction + + - DeepLabv3+ model is trained by Baidu self-built dataset, which can be used for portrait segmentation. +

+
+

+ +- For more information, please refer to: [deeplabv3p](https://github.com/PaddlePaddle/PaddleSeg) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install deeplabv3p_xception65_humanseg + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + hub run deeplabv3p_xception65_humanseg --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + + + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + human_seg = hub.Module(name="deeplabv3p_xception65_humanseg") + result = human_seg.segmentation(images=[cv2.imread('/PATH/TO/IMAGE')]) + ``` + +- ### 3.API + + - ```python + def segmentation(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_output') + ``` + + - Prediction API, generating segmentation result. + + - **Parameter** + * images (list\[numpy.ndarray\]): Image data, ndarray.shape is in the format [H, W, C], BGR. + * paths (list\[str\]): Image path. + * batch\_size (int): Batch size. + * use\_gpu (bool): Use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + * visualization (bool): Whether to save the recognition results as picture files. + * output\_dir (str): Save path of images. + + - **Return** + + * res (list\[dict\]): The list of recognition results, where each element is dict and each field is: + * save\_path (str, optional): Save path of the result. + * data (numpy.ndarray): The result of portrait segmentation. + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + + - Save the model to the specified path. + + - **Parameters** + * dirname: Save path. + * model\_filename: Model file name,defalt is \_\_model\_\_ + * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) + * combined: Whether to save the parameters to a unified file. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of for human segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m deeplabv3p_xception65_humanseg + ``` + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/deeplabv3p_xception65_humanseg" + r = requests.post(url=url, headers=headers, + mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) + rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) + cv2.imwrite("segment_human_server.png", rgba) + ``` +## V. Release Note + +- 1.0.0 + + First release + +* 1.1.0 + + Improve prediction performance + +* 1.1.1 + + Fix the bug of image value out of range + +* 1.1.2 + + Fix memory leakage problem of on cudnn 8.0.4 diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/README.md b/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8a3951ac11aed63c93fdb383f47537813ef5ea69 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/README.md @@ -0,0 +1,186 @@ +# ginet_resnet101vd_ade20k + +|模型名称|ginet_resnet101vd_ade20k| +| :--- | :---: | +|类别|图像-图像分割| +|网络|ginet_resnet101vd| +|数据集|ADE20K| +|是否支持Fine-tuning|是| +|模型大小|287MB| +|指标|-| +|最新更新日期|2021-12-14| + +## 一、模型基本信息 + + - 样例结果示例: + - Sample results: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[ginet](https://arxiv.org/pdf/2009.06160) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install ginet_resnet101vd_ade20k + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet101vd_ade20k') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ginet_resnet101vd_ade20k模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='ginet_resnet101vd_ade20k', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet101vd_ade20k', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m ginet_resnet101vd_ade20k + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet101vd_ade20k" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/README_en.md b/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..b7d0b3e0fd095c589edfbe29fbb2a19cc3524d2e --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/README_en.md @@ -0,0 +1,185 @@ +# ginet_resnet101vd_ade20k + +|Module Name|ginet_resnet101vd_ade20k| +| :--- | :---: | +|Category|Image Segmentation| +|Network|ginet_resnet101vd| +|Dataset|ADE20K| +|Fine-tuning supported or not|Yes| +|Module Size|287MB| +|Data indicators|-| +|Latest update date|2021-12-14| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [ginet](https://arxiv.org/pdf/2009.06160) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install ginet_resnet101vd_ade20k + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet101vd_ade20k') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the ginet_resnet101vd_ade20k model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='ginet_resnet101vd_ade20k', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet101vd_ade20k', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m ginet_resnet101vd_ade20k + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet101vd_ade20k" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/layers.py b/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..7e46219fd671ed9834795c9881292eed787b990d --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/layers.py @@ -0,0 +1,345 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + + Returns: + A callable object of Activation. + + Raises: + KeyError: When parameter `act` is not in the optional range. + + Examples: + + from paddleseg.models.common.activation import Activation + + relu = Activation("relu") + print(relu) + # + + sigmoid = Activation("sigmoid") + print(sigmoid) + # + + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool= False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/module.py b/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/module.py new file mode 100644 index 0000000000000000000000000000000000000000..4a7aff27e9b964b069c0c2be44ab719d2298591d --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/module.py @@ -0,0 +1,309 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule +from paddleseg.utils import utils +from paddleseg.models import layers + +from ginet_resnet101vd_ade20k.resnet import ResNet101_vd + + +@moduleinfo( + name="ginet_resnet101vd_ade20k", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="GINetResnet101 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class GINetResNet101(nn.Layer): + """ + The GINetResNet101 implementation based on PaddlePaddle. + The original article refers to + Wu, Tianyi, Yu Lu, Yu Zhu, Chuang Zhang, Ming Wu, Zhanyu Ma, and Guodong Guo. "GINet: Graph interaction network for scene parsing." In European Conference on Computer Vision, pp. 34-51. Springer, Cham, 2020. + (https://arxiv.org/pdf/2009.06160). + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): Values in the tuple indicate the indices of output of backbone. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. + If true, auxiliary loss will be added after LearningToDownsample module. Default: False. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.. Default: False. + jpu (bool, optional)): whether to use jpu unit in the base forward. Default:True. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 150, + backbone_indices: Tuple[int]=(0, 1, 2, 3), + enable_auxiliary_loss: bool = True, + align_corners: bool = True, + jpu: bool = True, + pretrained: str = None): + super(GINetResNet101, self).__init__() + self.nclass = num_classes + self.aux = enable_auxiliary_loss + self.jpu = jpu + + self.backbone = ResNet101_vd() + self.backbone_indices = backbone_indices + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + self.jpu = layers.JPU([512, 1024, 2048], width=512) if jpu else None + self.head = GIHead(in_channels=2048, nclass=num_classes) + + if self.aux: + self.auxlayer = layers.AuxLayer( + 1024, 1024 // 4, num_classes, bias_attr=False) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def base_forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feat_list = self.backbone(x) + c1, c2, c3, c4 = [feat_list[i] for i in self.backbone_indices] + + if self.jpu: + return self.jpu(c1, c2, c3, c4) + else: + return c1, c2, c3, c4 + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + _, _, h, w = x.shape + _, _, c3, c4 = self.base_forward(x) + + logit_list = [] + x, _ = self.head(c4) + logit_list.append(x) + + if self.aux: + auxout = self.auxlayer(c3) + + logit_list.append(auxout) + + return [ + F.interpolate( + logit, (h, w), + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + + +class GIHead(nn.Layer): + """The Graph Interaction Network head.""" + + def __init__(self, in_channels: int, nclass: int): + super().__init__() + self.nclass = nclass + inter_channels = in_channels // 4 + self.inp = paddle.zeros(shape=(nclass, 300), dtype='float32') + self.inp = paddle.create_parameter( + shape=self.inp.shape, + dtype=str(self.inp.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.inp)) + + self.fc1 = nn.Sequential( + nn.Linear(300, 128), nn.BatchNorm1D(128), nn.ReLU()) + self.fc2 = nn.Sequential( + nn.Linear(128, 256), nn.BatchNorm1D(256), nn.ReLU()) + self.conv5 = layers.ConvBNReLU( + in_channels, + inter_channels, + 3, + padding=1, + bias_attr=False, + stride=1) + + self.gloru = GlobalReasonUnit( + in_channels=inter_channels, + num_state=256, + num_node=84, + nclass=nclass) + self.conv6 = nn.Sequential( + nn.Dropout(0.1), nn.Conv2D(inter_channels, nclass, 1)) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + B, C, H, W = x.shape + inp = self.inp.detach() + + inp = self.fc1(inp) + inp = self.fc2(inp).unsqueeze(axis=0).transpose((0, 2, 1))\ + .expand((B, 256, self.nclass)) + + out = self.conv5(x) + + out, se_out = self.gloru(out, inp) + out = self.conv6(out) + return out, se_out + + +class GlobalReasonUnit(nn.Layer): + """ + The original paper refers to: + Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks" (https://arxiv.org/abs/1811.12814) + """ + + def __init__(self, in_channels: int, num_state: int = 256, num_node: int = 84, nclass: int = 59): + super().__init__() + self.num_state = num_state + self.conv_theta = nn.Conv2D( + in_channels, num_node, kernel_size=1, stride=1, padding=0) + self.conv_phi = nn.Conv2D( + in_channels, num_state, kernel_size=1, stride=1, padding=0) + self.graph = GraphLayer(num_state, num_node, nclass) + self.extend_dim = nn.Conv2D( + num_state, in_channels, kernel_size=1, bias_attr=False) + + self.bn = layers.SyncBatchNorm(in_channels) + + def forward(self, x: paddle.Tensor, inp: paddle.Tensor) -> List[paddle.Tensor]: + B = self.conv_theta(x) + sizeB = B.shape + B = B.reshape((sizeB[0], sizeB[1], -1)) + + sizex = x.shape + x_reduce = self.conv_phi(x) + x_reduce = x_reduce.reshape((sizex[0], -1, sizex[2] * sizex[3]))\ + .transpose((0, 2, 1)) + + V = paddle.bmm(B, x_reduce).transpose((0, 2, 1)) + V = paddle.divide( + V, paddle.to_tensor([sizex[2] * sizex[3]], dtype='float32')) + + class_node, new_V = self.graph(inp, V) + D = B.reshape((sizeB[0], -1, sizeB[2] * sizeB[3])).transpose((0, 2, 1)) + Y = paddle.bmm(D, new_V.transpose((0, 2, 1))) + Y = Y.transpose((0, 2, 1)).reshape((sizex[0], self.num_state, \ + sizex[2], -1)) + Y = self.extend_dim(Y) + Y = self.bn(Y) + out = Y + x + + return out, class_node + + +class GraphLayer(nn.Layer): + def __init__(self, num_state: int, num_node: int, num_class: int): + super().__init__() + self.vis_gcn = GCN(num_state, num_node) + self.word_gcn = GCN(num_state, num_class) + self.transfer = GraphTransfer(num_state) + self.gamma_vis = paddle.zeros([num_node]) + self.gamma_word = paddle.zeros([num_class]) + self.gamma_vis = paddle.create_parameter( + shape=self.gamma_vis.shape, + dtype=str(self.gamma_vis.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.gamma_vis)) + self.gamma_word = paddle.create_parameter( + shape=self.gamma_word.shape, + dtype=str(self.gamma_word.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.gamma_word)) + + def forward(self, inp: paddle.Tensor, vis_node: paddle.Tensor) -> List[paddle.Tensor]: + inp = self.word_gcn(inp) + new_V = self.vis_gcn(vis_node) + class_node, vis_node = self.transfer(inp, new_V) + + class_node = self.gamma_word * inp + class_node + new_V = self.gamma_vis * vis_node + new_V + return class_node, new_V + + +class GCN(nn.Layer): + def __init__(self, num_state: int = 128, num_node: int = 64, bias=False): + super().__init__() + self.conv1 = nn.Conv1D( + num_node, + num_node, + kernel_size=1, + padding=0, + stride=1, + groups=1, + ) + self.relu = nn.ReLU() + self.conv2 = nn.Conv1D( + num_state, + num_state, + kernel_size=1, + padding=0, + stride=1, + groups=1, + bias_attr=bias) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + h = self.conv1(x.transpose((0, 2, 1))).transpose((0, 2, 1)) + h = h + x + h = self.relu(h) + h = self.conv2(h) + return h + + +class GraphTransfer(nn.Layer): + """Transfer vis graph to class node, transfer class node to vis feature""" + + def __init__(self, in_dim: int): + super().__init__() + self.channle_in = in_dim + self.query_conv = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1) + self.key_conv = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1) + self.value_conv_vis = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.value_conv_word = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.softmax_vis = nn.Softmax(axis=-1) + self.softmax_word = nn.Softmax(axis=-2) + + def forward(self, word: paddle.Tensor, vis_node: paddle.Tensor) -> List[paddle.Tensor]: + m_batchsize, C, Nc = word.shape + m_batchsize, C, Nn = vis_node.shape + + proj_query = self.query_conv(word).reshape((m_batchsize, -1, Nc))\ + .transpose((0, 2, 1)) + proj_key = self.key_conv(vis_node).reshape((m_batchsize, -1, Nn)) + + energy = paddle.bmm(proj_query, proj_key) + attention_vis = self.softmax_vis(energy).transpose((0, 2, 1)) + attention_word = self.softmax_word(energy) + + proj_value_vis = self.value_conv_vis(vis_node).reshape((m_batchsize, -1, + Nn)) + proj_value_word = self.value_conv_word(word).reshape((m_batchsize, -1, + Nc)) + + class_out = paddle.bmm(proj_value_vis, attention_vis) + node_out = paddle.bmm(proj_value_word, attention_word) + return class_out, node_out \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/resnet.py b/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..e3e031f0e239a2d8e965596579ed16a5501b324f --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_ade20k/resnet.py @@ -0,0 +1,136 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import ginet_resnet101vd_ade20k.layers as L + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = L.ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + act=None, + name=name + "_branch2b") + + if not shortcut: + self.short = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + + return y + + +class ResNet101_vd(nn.Layer): + def __init__(self, + multi_grid: tuple = (1, 2, 4)): + super(ResNet101_vd, self).__init__() + depth = [3, 4, 23, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + self.feat_channels = [c * 4 for c in num_filters] + dilation_dict = {2: 2, 3: 4} + self.conv1_1 = L.ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = L.ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = L.ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + name="conv1_3") + self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) + self.stage_list = [] + + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + L.BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name, + dilation=dilation_rate)) + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + return feat_list \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/README.md b/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..faa1a537b2e96f2af75ac81a9d6e5247fbe84379 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/README.md @@ -0,0 +1,185 @@ +# ginet_resnet101vd_cityscapes + +|模型名称|ginet_resnet101vd_cityscapes| +| :--- | :---: | +|类别|图像-图像分割| +|网络|ginet_resnet101vd| +|数据集|Cityscapes| +|是否支持Fine-tuning|是| +|模型大小|286MB| +|指标|-| +|最新更新日期|2021-12-14| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[ginet](https://arxiv.org/pdf/2009.06160) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install ginet_resnet101vd_cityscapes + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet101vd_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ginet_resnet101vd_cityscapes模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='ginet_resnet101vd_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet101vd_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m ginet_resnet101vd_cityscapes + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet101vd_cityscapes" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/README_en.md b/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..2e09ff0c9121c1531b8f4892a3ae8b492b87019b --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/README_en.md @@ -0,0 +1,185 @@ +# ginet_resnet101vd_cityscapes + +|Module Name|ginet_resnet101vd_cityscapes| +| :--- | :---: | +|Category|Image Segmentation| +|Network|ginet_resnet101vd| +|Dataset|Cityscapes| +|Fine-tuning supported or not|Yes| +|Module Size|286MB| +|Data indicators|-| +|Latest update date|2021-12-14| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [ginet](https://arxiv.org/pdf/2009.06160) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install ginet_resnet101vd_cityscapes + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet101vd_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the ginet_resnet101vd_cityscapes model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='ginet_resnet101vd_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet101vd_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m ginet_resnet101vd_cityscapes + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet101vd_cityscapes" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/layers.py b/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..7e46219fd671ed9834795c9881292eed787b990d --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/layers.py @@ -0,0 +1,345 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + + Returns: + A callable object of Activation. + + Raises: + KeyError: When parameter `act` is not in the optional range. + + Examples: + + from paddleseg.models.common.activation import Activation + + relu = Activation("relu") + print(relu) + # + + sigmoid = Activation("sigmoid") + print(sigmoid) + # + + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool= False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/module.py b/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e135d4ab484a4bd9c7c81e6905d527680fe69a04 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/module.py @@ -0,0 +1,308 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule +from paddleseg.utils import utils +from paddleseg.models import layers + +from ginet_resnet101vd_cityscapes.resnet import ResNet101_vd + + +@moduleinfo( + name="ginet_resnet101vd_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="GINetResnet101 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class GINetResNet101(nn.Layer): + """ + The GINetResNet101 implementation based on PaddlePaddle. + The original article refers to + Wu, Tianyi, Yu Lu, Yu Zhu, Chuang Zhang, Ming Wu, Zhanyu Ma, and Guodong Guo. "GINet: Graph interaction network for scene parsing." In European Conference on Computer Vision, pp. 34-51. Springer, Cham, 2020. + (https://arxiv.org/pdf/2009.06160). + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): Values in the tuple indicate the indices of output of backbone. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. + If true, auxiliary loss will be added after LearningToDownsample module. Default: False. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.. Default: False. + jpu (bool, optional)): whether to use jpu unit in the base forward. Default:True. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 19, + backbone_indices: Tuple[int]=(0, 1, 2, 3), + enable_auxiliary_loss: bool = True, + align_corners: bool = True, + jpu: bool = True, + pretrained: str = None): + super(GINetResNet101, self).__init__() + self.nclass = num_classes + self.aux = enable_auxiliary_loss + self.jpu = jpu + + self.backbone = ResNet101_vd() + self.backbone_indices = backbone_indices + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + self.jpu = layers.JPU([512, 1024, 2048], width=512) if jpu else None + self.head = GIHead(in_channels=2048, nclass=num_classes) + + if self.aux: + self.auxlayer = layers.AuxLayer( + 1024, 1024 // 4, num_classes, bias_attr=False) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def base_forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feat_list = self.backbone(x) + c1, c2, c3, c4 = [feat_list[i] for i in self.backbone_indices] + + if self.jpu: + return self.jpu(c1, c2, c3, c4) + else: + return c1, c2, c3, c4 + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + _, _, h, w = x.shape + _, _, c3, c4 = self.base_forward(x) + + logit_list = [] + x, _ = self.head(c4) + logit_list.append(x) + + if self.aux: + auxout = self.auxlayer(c3) + + logit_list.append(auxout) + + return [ + F.interpolate( + logit, (h, w), + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + + +class GIHead(nn.Layer): + """The Graph Interaction Network head.""" + + def __init__(self, in_channels: int, nclass: int): + super().__init__() + self.nclass = nclass + inter_channels = in_channels // 4 + self.inp = paddle.zeros(shape=(nclass, 300), dtype='float32') + self.inp = paddle.create_parameter( + shape=self.inp.shape, + dtype=str(self.inp.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.inp)) + + self.fc1 = nn.Sequential( + nn.Linear(300, 128), nn.BatchNorm1D(128), nn.ReLU()) + self.fc2 = nn.Sequential( + nn.Linear(128, 256), nn.BatchNorm1D(256), nn.ReLU()) + self.conv5 = layers.ConvBNReLU( + in_channels, + inter_channels, + 3, + padding=1, + bias_attr=False, + stride=1) + + self.gloru = GlobalReasonUnit( + in_channels=inter_channels, + num_state=256, + num_node=84, + nclass=nclass) + self.conv6 = nn.Sequential( + nn.Dropout(0.1), nn.Conv2D(inter_channels, nclass, 1)) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + B, C, H, W = x.shape + inp = self.inp.detach() + + inp = self.fc1(inp) + inp = self.fc2(inp).unsqueeze(axis=0).transpose((0, 2, 1))\ + .expand((B, 256, self.nclass)) + + out = self.conv5(x) + + out, se_out = self.gloru(out, inp) + out = self.conv6(out) + return out, se_out + + +class GlobalReasonUnit(nn.Layer): + """ + The original paper refers to: + Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks" (https://arxiv.org/abs/1811.12814) + """ + + def __init__(self, in_channels: int, num_state: int = 256, num_node: int = 84, nclass: int = 59): + super().__init__() + self.num_state = num_state + self.conv_theta = nn.Conv2D( + in_channels, num_node, kernel_size=1, stride=1, padding=0) + self.conv_phi = nn.Conv2D( + in_channels, num_state, kernel_size=1, stride=1, padding=0) + self.graph = GraphLayer(num_state, num_node, nclass) + self.extend_dim = nn.Conv2D( + num_state, in_channels, kernel_size=1, bias_attr=False) + + self.bn = layers.SyncBatchNorm(in_channels) + + def forward(self, x: paddle.Tensor, inp: paddle.Tensor) -> paddle.Tensor: + B = self.conv_theta(x) + sizeB = B.shape + B = B.reshape((sizeB[0], sizeB[1], -1)) + + sizex = x.shape + x_reduce = self.conv_phi(x) + x_reduce = x_reduce.reshape((sizex[0], -1, sizex[2] * sizex[3]))\ + .transpose((0, 2, 1)) + + V = paddle.bmm(B, x_reduce).transpose((0, 2, 1)) + V = paddle.divide( + V, paddle.to_tensor([sizex[2] * sizex[3]], dtype='float32')) + + class_node, new_V = self.graph(inp, V) + D = B.reshape((sizeB[0], -1, sizeB[2] * sizeB[3])).transpose((0, 2, 1)) + Y = paddle.bmm(D, new_V.transpose((0, 2, 1))) + Y = Y.transpose((0, 2, 1)).reshape((sizex[0], self.num_state, \ + sizex[2], -1)) + Y = self.extend_dim(Y) + Y = self.bn(Y) + out = Y + x + + return out, class_node + + +class GraphLayer(nn.Layer): + def __init__(self, num_state: int, num_node: int, num_class: int): + super().__init__() + self.vis_gcn = GCN(num_state, num_node) + self.word_gcn = GCN(num_state, num_class) + self.transfer = GraphTransfer(num_state) + self.gamma_vis = paddle.zeros([num_node]) + self.gamma_word = paddle.zeros([num_class]) + self.gamma_vis = paddle.create_parameter( + shape=self.gamma_vis.shape, + dtype=str(self.gamma_vis.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.gamma_vis)) + self.gamma_word = paddle.create_parameter( + shape=self.gamma_word.shape, + dtype=str(self.gamma_word.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.gamma_word)) + + def forward(self, inp: paddle.Tensor, vis_node: paddle.Tensor) -> List[paddle.Tensor]: + inp = self.word_gcn(inp) + new_V = self.vis_gcn(vis_node) + class_node, vis_node = self.transfer(inp, new_V) + + class_node = self.gamma_word * inp + class_node + new_V = self.gamma_vis * vis_node + new_V + return class_node, new_V + + +class GCN(nn.Layer): + def __init__(self, num_state: int = 128, num_node: int = 64, bias=False): + super().__init__() + self.conv1 = nn.Conv1D( + num_node, + num_node, + kernel_size=1, + padding=0, + stride=1, + groups=1, + ) + self.relu = nn.ReLU() + self.conv2 = nn.Conv1D( + num_state, + num_state, + kernel_size=1, + padding=0, + stride=1, + groups=1, + bias_attr=bias) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + h = self.conv1(x.transpose((0, 2, 1))).transpose((0, 2, 1)) + h = h + x + h = self.relu(h) + h = self.conv2(h) + return h + + +class GraphTransfer(nn.Layer): + """Transfer vis graph to class node, transfer class node to vis feature""" + + def __init__(self, in_dim: int): + super().__init__() + self.channle_in = in_dim + self.query_conv = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1) + self.key_conv = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1) + self.value_conv_vis = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.value_conv_word = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.softmax_vis = nn.Softmax(axis=-1) + self.softmax_word = nn.Softmax(axis=-2) + + def forward(self, word: paddle.Tensor, vis_node: paddle.Tensor) -> List[paddle.Tensor]: + m_batchsize, C, Nc = word.shape + m_batchsize, C, Nn = vis_node.shape + + proj_query = self.query_conv(word).reshape((m_batchsize, -1, Nc))\ + .transpose((0, 2, 1)) + proj_key = self.key_conv(vis_node).reshape((m_batchsize, -1, Nn)) + + energy = paddle.bmm(proj_query, proj_key) + attention_vis = self.softmax_vis(energy).transpose((0, 2, 1)) + attention_word = self.softmax_word(energy) + proj_value_vis = self.value_conv_vis(vis_node).reshape((m_batchsize, -1, + Nn)) + proj_value_word = self.value_conv_word(word).reshape((m_batchsize, -1, + Nc)) + + class_out = paddle.bmm(proj_value_vis, attention_vis) + node_out = paddle.bmm(proj_value_word, attention_word) + return class_out, node_out \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/resnet.py b/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..6104fa44ac2286e3636960631768599e2467c336 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_cityscapes/resnet.py @@ -0,0 +1,136 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import ginet_resnet101vd_cityscapes.layers as L + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = L.ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + act=None, + name=name + "_branch2b") + + if not shortcut: + self.short = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + + return y + + +class ResNet101_vd(nn.Layer): + def __init__(self, + multi_grid: tuple = (1, 2, 4)): + super(ResNet101_vd, self).__init__() + depth = [3, 4, 23, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + self.feat_channels = [c * 4 for c in num_filters] + dilation_dict = {2: 2, 3: 4} + self.conv1_1 = L.ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = L.ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = L.ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + name="conv1_3") + self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) + self.stage_list = [] + + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + L.BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name, + dilation=dilation_rate)) + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + return feat_list \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_voc/README.md b/modules/image/semantic_segmentation/ginet_resnet101vd_voc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..41f95d112f885e3e5decb5854b35a71a99eba452 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_voc/README.md @@ -0,0 +1,185 @@ +# ginet_resnet101vd_voc + +|模型名称|ginet_resnet101vd_voc| +| :--- | :---: | +|类别|图像-图像分割| +|网络|ginet_resnet101vd| +|数据集|PascalVOC2012| +|是否支持Fine-tuning|是| +|模型大小|286MB| +|指标|-| +|最新更新日期|2021-12-14| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[ginet](https://arxiv.org/pdf/2009.06160) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install ginet_resnet101vd_voc + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet101vd_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ginet_resnet101vd_voc模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='ginet_resnet101vd_voc', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet101vd_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m ginet_resnet101vd_voc + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet101vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_voc/README_en.md b/modules/image/semantic_segmentation/ginet_resnet101vd_voc/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..1bfc41ddd29da74e1df9da24cc23e0c65cf2a02f --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_voc/README_en.md @@ -0,0 +1,185 @@ +# ginet_resnet101vd_voc + +|Module Name|ginet_resnet101vd_voc| +| :--- | :---: | +|Category|Image Segmentation| +|Network|ginet_resnet101vd| +|Dataset|PascalVOC2012| +|Fine-tuning supported or not|Yes| +|Module Size|286MB| +|Data indicators|-| +|Latest update date|2021-12-14| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [ginet](https://arxiv.org/pdf/2009.06160) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install ginet_resnet101vd_voc + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet101vd_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the ginet_resnet101vd_voc model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='ginet_resnet101vd_voc', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='ttest_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet101vd_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m ginet_resnet101vd_voc + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet101vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_voc/layers.py b/modules/image/semantic_segmentation/ginet_resnet101vd_voc/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..7e46219fd671ed9834795c9881292eed787b990d --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_voc/layers.py @@ -0,0 +1,345 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + + Returns: + A callable object of Activation. + + Raises: + KeyError: When parameter `act` is not in the optional range. + + Examples: + + from paddleseg.models.common.activation import Activation + + relu = Activation("relu") + print(relu) + # + + sigmoid = Activation("sigmoid") + print(sigmoid) + # + + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool= False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_voc/module.py b/modules/image/semantic_segmentation/ginet_resnet101vd_voc/module.py new file mode 100644 index 0000000000000000000000000000000000000000..19422e3e70d829be67d62256403812df93811e7e --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_voc/module.py @@ -0,0 +1,309 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule +from paddleseg.utils import utils +from paddleseg.models import layers + +from ginet_resnet101vd_voc.resnet import ResNet101_vd + + +@moduleinfo( + name="ginet_resnet101vd_voc", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="GINetResnet101 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class GINetResNet101(nn.Layer): + """ + The GINetResNet101 implementation based on PaddlePaddle. + The original article refers to + Wu, Tianyi, Yu Lu, Yu Zhu, Chuang Zhang, Ming Wu, Zhanyu Ma, and Guodong Guo. "GINet: Graph interaction network for scene parsing." In European Conference on Computer Vision, pp. 34-51. Springer, Cham, 2020. + (https://arxiv.org/pdf/2009.06160). + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): Values in the tuple indicate the indices of output of backbone. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. + If true, auxiliary loss will be added after LearningToDownsample module. Default: False. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.. Default: False. + jpu (bool, optional)): whether to use jpu unit in the base forward. Default:True. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 21, + backbone_indices: Tuple[int]=(0, 1, 2, 3), + enable_auxiliary_loss: bool = True, + align_corners: bool = True, + jpu: bool = True, + pretrained: str = None): + super(GINetResNet101, self).__init__() + self.nclass = num_classes + self.aux = enable_auxiliary_loss + self.jpu = jpu + + self.backbone = ResNet101_vd() + self.backbone_indices = backbone_indices + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + self.jpu = layers.JPU([512, 1024, 2048], width=512) if jpu else None + self.head = GIHead(in_channels=2048, nclass=num_classes) + + if self.aux: + self.auxlayer = layers.AuxLayer( + 1024, 1024 // 4, num_classes, bias_attr=False) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def base_forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feat_list = self.backbone(x) + c1, c2, c3, c4 = [feat_list[i] for i in self.backbone_indices] + + if self.jpu: + return self.jpu(c1, c2, c3, c4) + else: + return c1, c2, c3, c4 + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + _, _, h, w = x.shape + _, _, c3, c4 = self.base_forward(x) + + logit_list = [] + x, _ = self.head(c4) + logit_list.append(x) + + if self.aux: + auxout = self.auxlayer(c3) + + logit_list.append(auxout) + + return [ + F.interpolate( + logit, (h, w), + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + + +class GIHead(nn.Layer): + """The Graph Interaction Network head.""" + + def __init__(self, in_channels: int, nclass: int): + super().__init__() + self.nclass = nclass + inter_channels = in_channels // 4 + self.inp = paddle.zeros(shape=(nclass, 300), dtype='float32') + self.inp = paddle.create_parameter( + shape=self.inp.shape, + dtype=str(self.inp.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.inp)) + + self.fc1 = nn.Sequential( + nn.Linear(300, 128), nn.BatchNorm1D(128), nn.ReLU()) + self.fc2 = nn.Sequential( + nn.Linear(128, 256), nn.BatchNorm1D(256), nn.ReLU()) + self.conv5 = layers.ConvBNReLU( + in_channels, + inter_channels, + 3, + padding=1, + bias_attr=False, + stride=1) + + self.gloru = GlobalReasonUnit( + in_channels=inter_channels, + num_state=256, + num_node=84, + nclass=nclass) + self.conv6 = nn.Sequential( + nn.Dropout(0.1), nn.Conv2D(inter_channels, nclass, 1)) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + B, C, H, W = x.shape + inp = self.inp.detach() + + inp = self.fc1(inp) + inp = self.fc2(inp).unsqueeze(axis=0).transpose((0, 2, 1))\ + .expand((B, 256, self.nclass)) + + out = self.conv5(x) + + out, se_out = self.gloru(out, inp) + out = self.conv6(out) + return out, se_out + + +class GlobalReasonUnit(nn.Layer): + """ + The original paper refers to: + Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks" (https://arxiv.org/abs/1811.12814) + """ + + def __init__(self, in_channels: int, num_state: int = 256, num_node: int = 84, nclass: int = 59): + super().__init__() + self.num_state = num_state + self.conv_theta = nn.Conv2D( + in_channels, num_node, kernel_size=1, stride=1, padding=0) + self.conv_phi = nn.Conv2D( + in_channels, num_state, kernel_size=1, stride=1, padding=0) + self.graph = GraphLayer(num_state, num_node, nclass) + self.extend_dim = nn.Conv2D( + num_state, in_channels, kernel_size=1, bias_attr=False) + + self.bn = layers.SyncBatchNorm(in_channels) + + def forward(self, x: paddle.Tensor, inp: paddle.Tensor) -> List[paddle.Tensor]: + B = self.conv_theta(x) + sizeB = B.shape + B = B.reshape((sizeB[0], sizeB[1], -1)) + + sizex = x.shape + x_reduce = self.conv_phi(x) + x_reduce = x_reduce.reshape((sizex[0], -1, sizex[2] * sizex[3]))\ + .transpose((0, 2, 1)) + + V = paddle.bmm(B, x_reduce).transpose((0, 2, 1)) + V = paddle.divide( + V, paddle.to_tensor([sizex[2] * sizex[3]], dtype='float32')) + + class_node, new_V = self.graph(inp, V) + D = B.reshape((sizeB[0], -1, sizeB[2] * sizeB[3])).transpose((0, 2, 1)) + Y = paddle.bmm(D, new_V.transpose((0, 2, 1))) + Y = Y.transpose((0, 2, 1)).reshape((sizex[0], self.num_state, \ + sizex[2], -1)) + Y = self.extend_dim(Y) + Y = self.bn(Y) + out = Y + x + + return out, class_node + + +class GraphLayer(nn.Layer): + def __init__(self, num_state: int, num_node: int, num_class: int): + super().__init__() + self.vis_gcn = GCN(num_state, num_node) + self.word_gcn = GCN(num_state, num_class) + self.transfer = GraphTransfer(num_state) + self.gamma_vis = paddle.zeros([num_node]) + self.gamma_word = paddle.zeros([num_class]) + self.gamma_vis = paddle.create_parameter( + shape=self.gamma_vis.shape, + dtype=str(self.gamma_vis.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.gamma_vis)) + self.gamma_word = paddle.create_parameter( + shape=self.gamma_word.shape, + dtype=str(self.gamma_word.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.gamma_word)) + + def forward(self, inp: paddle.Tensor, vis_node: paddle.Tensor) -> List[paddle.Tensor]: + inp = self.word_gcn(inp) + new_V = self.vis_gcn(vis_node) + class_node, vis_node = self.transfer(inp, new_V) + + class_node = self.gamma_word * inp + class_node + new_V = self.gamma_vis * vis_node + new_V + return class_node, new_V + + +class GCN(nn.Layer): + def __init__(self, num_state: int = 128, num_node: int = 64, bias=False): + super().__init__() + self.conv1 = nn.Conv1D( + num_node, + num_node, + kernel_size=1, + padding=0, + stride=1, + groups=1, + ) + self.relu = nn.ReLU() + self.conv2 = nn.Conv1D( + num_state, + num_state, + kernel_size=1, + padding=0, + stride=1, + groups=1, + bias_attr=bias) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + h = self.conv1(x.transpose((0, 2, 1))).transpose((0, 2, 1)) + h = h + x + h = self.relu(h) + h = self.conv2(h) + return h + + +class GraphTransfer(nn.Layer): + """Transfer vis graph to class node, transfer class node to vis feature""" + + def __init__(self, in_dim: int): + super().__init__() + self.channle_in = in_dim + self.query_conv = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1) + self.key_conv = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1) + self.value_conv_vis = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.value_conv_word = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.softmax_vis = nn.Softmax(axis=-1) + self.softmax_word = nn.Softmax(axis=-2) + + def forward(self, word: paddle.Tensor, vis_node: paddle.Tensor) -> List[paddle.Tensor]: + m_batchsize, C, Nc = word.shape + m_batchsize, C, Nn = vis_node.shape + + proj_query = self.query_conv(word).reshape((m_batchsize, -1, Nc))\ + .transpose((0, 2, 1)) + proj_key = self.key_conv(vis_node).reshape((m_batchsize, -1, Nn)) + + energy = paddle.bmm(proj_query, proj_key) + attention_vis = self.softmax_vis(energy).transpose((0, 2, 1)) + attention_word = self.softmax_word(energy) + + proj_value_vis = self.value_conv_vis(vis_node).reshape((m_batchsize, -1, + Nn)) + proj_value_word = self.value_conv_word(word).reshape((m_batchsize, -1, + Nc)) + + class_out = paddle.bmm(proj_value_vis, attention_vis) + node_out = paddle.bmm(proj_value_word, attention_word) + return class_out, node_out \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet101vd_voc/resnet.py b/modules/image/semantic_segmentation/ginet_resnet101vd_voc/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..4014d4f8932ba9e81cd5afb8ca81a73863197151 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet101vd_voc/resnet.py @@ -0,0 +1,136 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import ginet_resnet101vd_voc.layers as L + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = L.ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + act=None, + name=name + "_branch2b") + + if not shortcut: + self.short = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + + return y + + +class ResNet101_vd(nn.Layer): + def __init__(self, + multi_grid: tuple = (1, 2, 4)): + super(ResNet101_vd, self).__init__() + depth = [3, 4, 23, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + self.feat_channels = [c * 4 for c in num_filters] + dilation_dict = {2: 2, 3: 4} + self.conv1_1 = L.ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = L.ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = L.ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + name="conv1_3") + self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) + self.stage_list = [] + + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + L.BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name, + dilation=dilation_rate)) + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + return feat_list \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/README.md b/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/README.md new file mode 100644 index 0000000000000000000000000000000000000000..341563f32cf13647472b2c0e7a8fd38f4d83adaa --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/README.md @@ -0,0 +1,186 @@ +# ginet_resnet50vd_ade20k + +|模型名称|ginet_resnet50vd_ade20k| +| :--- | :---: | +|类别|图像-图像分割| +|网络|ginet_resnet50vd| +|数据集|ADE20K| +|是否支持Fine-tuning|是| +|模型大小|214MB| +|指标|-| +|最新更新日期|2021-12-14| + +## 一、模型基本信息 + + - 样例结果示例: + - Sample results: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[ginet](https://arxiv.org/pdf/2009.06160) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install ginet_resnet50vd_ade20k + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet50vd_ade20k') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ginet_resnet50vd_ade20k模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='ginet_resnet50vd_ade20k', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet50vd_ade20k', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m ginet_resnet50vd_ade20k + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_ade20k" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/README_en.md b/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..d9c1a26daaecc5b22e622146d67b2664700fca74 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/README_en.md @@ -0,0 +1,185 @@ +# ginet_resnet50vd_ade20k + +|Module Name|ginet_resnet50vd_ade20k| +| :--- | :---: | +|Category|Image Segmentation| +|Network|ginet_resnet50vd| +|Dataset|ADE20K| +|Fine-tuning supported or not|Yes| +|Module Size|214MB| +|Data indicators|-| +|Latest update date|2021-12-14| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [ginet](https://arxiv.org/pdf/2009.06160) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install ginet_resnet50vd_ade20k + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet50vd_ade20k') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the ginet_resnet50vd_ade20k model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='ginet_resnet50vd_ade20k', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet50vd_ade20k', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m ginet_resnet50vd_ade20k + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_ade20k" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/layers.py b/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..7e46219fd671ed9834795c9881292eed787b990d --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/layers.py @@ -0,0 +1,345 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + + Returns: + A callable object of Activation. + + Raises: + KeyError: When parameter `act` is not in the optional range. + + Examples: + + from paddleseg.models.common.activation import Activation + + relu = Activation("relu") + print(relu) + # + + sigmoid = Activation("sigmoid") + print(sigmoid) + # + + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool= False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/module.py b/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/module.py new file mode 100644 index 0000000000000000000000000000000000000000..79ce4d0f070472b989c5a83b6f2542bd66f550fc --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/module.py @@ -0,0 +1,309 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule +from paddleseg.utils import utils +from paddleseg.models import layers + +from ginet_resnet50vd_ade20k.resnet import ResNet50_vd + + +@moduleinfo( + name="ginet_resnet50vd_ade20k", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="GINetResnet50 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class GINetResNet50(nn.Layer): + """ + The GINetResNet50 implementation based on PaddlePaddle. + The original article refers to + Wu, Tianyi, Yu Lu, Yu Zhu, Chuang Zhang, Ming Wu, Zhanyu Ma, and Guodong Guo. "GINet: Graph interaction network for scene parsing." In European Conference on Computer Vision, pp. 34-51. Springer, Cham, 2020. + (https://arxiv.org/pdf/2009.06160). + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): Values in the tuple indicate the indices of output of backbone. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. + If true, auxiliary loss will be added after LearningToDownsample module. Default: False. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.. Default: False. + jpu (bool, optional)): whether to use jpu unit in the base forward. Default:True. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 150, + backbone_indices: Tuple[int]=(0, 1, 2, 3), + enable_auxiliary_loss: bool = True, + align_corners: bool = True, + jpu: bool = True, + pretrained: str = None): + super(GINetResNet50, self).__init__() + self.nclass = num_classes + self.aux = enable_auxiliary_loss + self.jpu = jpu + + self.backbone = ResNet50_vd() + self.backbone_indices = backbone_indices + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + self.jpu = layers.JPU([512, 1024, 2048], width=512) if jpu else None + self.head = GIHead(in_channels=2048, nclass=num_classes) + + if self.aux: + self.auxlayer = layers.AuxLayer( + 1024, 1024 // 4, num_classes, bias_attr=False) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def base_forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feat_list = self.backbone(x) + c1, c2, c3, c4 = [feat_list[i] for i in self.backbone_indices] + + if self.jpu: + return self.jpu(c1, c2, c3, c4) + else: + return c1, c2, c3, c4 + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + _, _, h, w = x.shape + _, _, c3, c4 = self.base_forward(x) + + logit_list = [] + x, _ = self.head(c4) + logit_list.append(x) + + if self.aux: + auxout = self.auxlayer(c3) + + logit_list.append(auxout) + + return [ + F.interpolate( + logit, (h, w), + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + + +class GIHead(nn.Layer): + """The Graph Interaction Network head.""" + + def __init__(self, in_channels: int, nclass: int): + super().__init__() + self.nclass = nclass + inter_channels = in_channels // 4 + self.inp = paddle.zeros(shape=(nclass, 300), dtype='float32') + self.inp = paddle.create_parameter( + shape=self.inp.shape, + dtype=str(self.inp.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.inp)) + + self.fc1 = nn.Sequential( + nn.Linear(300, 128), nn.BatchNorm1D(128), nn.ReLU()) + self.fc2 = nn.Sequential( + nn.Linear(128, 256), nn.BatchNorm1D(256), nn.ReLU()) + self.conv5 = layers.ConvBNReLU( + in_channels, + inter_channels, + 3, + padding=1, + bias_attr=False, + stride=1) + + self.gloru = GlobalReasonUnit( + in_channels=inter_channels, + num_state=256, + num_node=84, + nclass=nclass) + self.conv6 = nn.Sequential( + nn.Dropout(0.1), nn.Conv2D(inter_channels, nclass, 1)) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + B, C, H, W = x.shape + inp = self.inp.detach() + + inp = self.fc1(inp) + inp = self.fc2(inp).unsqueeze(axis=0).transpose((0, 2, 1))\ + .expand((B, 256, self.nclass)) + + out = self.conv5(x) + + out, se_out = self.gloru(out, inp) + out = self.conv6(out) + return out, se_out + + +class GlobalReasonUnit(nn.Layer): + """ + The original paper refers to: + Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks" (https://arxiv.org/abs/1811.12814) + """ + + def __init__(self, in_channels: int, num_state: int = 256, num_node: int = 84, nclass: int = 59): + super().__init__() + self.num_state = num_state + self.conv_theta = nn.Conv2D( + in_channels, num_node, kernel_size=1, stride=1, padding=0) + self.conv_phi = nn.Conv2D( + in_channels, num_state, kernel_size=1, stride=1, padding=0) + self.graph = GraphLayer(num_state, num_node, nclass) + self.extend_dim = nn.Conv2D( + num_state, in_channels, kernel_size=1, bias_attr=False) + + self.bn = layers.SyncBatchNorm(in_channels) + + def forward(self, x: paddle.Tensor, inp:paddle.Tensor) -> List[paddle.Tensor]: + B = self.conv_theta(x) + sizeB = B.shape + B = B.reshape((sizeB[0], sizeB[1], -1)) + + sizex = x.shape + x_reduce = self.conv_phi(x) + x_reduce = x_reduce.reshape((sizex[0], -1, sizex[2] * sizex[3]))\ + .transpose((0, 2, 1)) + + V = paddle.bmm(B, x_reduce).transpose((0, 2, 1)) + V = paddle.divide( + V, paddle.to_tensor([sizex[2] * sizex[3]], dtype='float32')) + + class_node, new_V = self.graph(inp, V) + D = B.reshape((sizeB[0], -1, sizeB[2] * sizeB[3])).transpose((0, 2, 1)) + Y = paddle.bmm(D, new_V.transpose((0, 2, 1))) + Y = Y.transpose((0, 2, 1)).reshape((sizex[0], self.num_state, \ + sizex[2], -1)) + Y = self.extend_dim(Y) + Y = self.bn(Y) + out = Y + x + + return out, class_node + + +class GraphLayer(nn.Layer): + def __init__(self, num_state: int, num_node: int, num_class: int): + super().__init__() + self.vis_gcn = GCN(num_state, num_node) + self.word_gcn = GCN(num_state, num_class) + self.transfer = GraphTransfer(num_state) + self.gamma_vis = paddle.zeros([num_node]) + self.gamma_word = paddle.zeros([num_class]) + self.gamma_vis = paddle.create_parameter( + shape=self.gamma_vis.shape, + dtype=str(self.gamma_vis.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.gamma_vis)) + self.gamma_word = paddle.create_parameter( + shape=self.gamma_word.shape, + dtype=str(self.gamma_word.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.gamma_word)) + + def forward(self, inp: paddle.Tensor, vis_node: paddle.Tensor) -> List[paddle.Tensor]: + inp = self.word_gcn(inp) + new_V = self.vis_gcn(vis_node) + class_node, vis_node = self.transfer(inp, new_V) + + class_node = self.gamma_word * inp + class_node + new_V = self.gamma_vis * vis_node + new_V + return class_node, new_V + + +class GCN(nn.Layer): + def __init__(self, num_state: int = 128, num_node: int = 64, bias: bool = False): + super().__init__() + self.conv1 = nn.Conv1D( + num_node, + num_node, + kernel_size=1, + padding=0, + stride=1, + groups=1, + ) + self.relu = nn.ReLU() + self.conv2 = nn.Conv1D( + num_state, + num_state, + kernel_size=1, + padding=0, + stride=1, + groups=1, + bias_attr=bias) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + h = self.conv1(x.transpose((0, 2, 1))).transpose((0, 2, 1)) + h = h + x + h = self.relu(h) + h = self.conv2(h) + return h + + +class GraphTransfer(nn.Layer): + """Transfer vis graph to class node, transfer class node to vis feature""" + + def __init__(self, in_dim: int): + super().__init__() + self.channle_in = in_dim + self.query_conv = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1) + self.key_conv = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1) + self.value_conv_vis = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.value_conv_word = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.softmax_vis = nn.Softmax(axis=-1) + self.softmax_word = nn.Softmax(axis=-2) + + def forward(self, word: paddle.Tensor, vis_node: paddle.Tensor) -> List[paddle.Tensor]: + m_batchsize, C, Nc = word.shape + m_batchsize, C, Nn = vis_node.shape + + proj_query = self.query_conv(word).reshape((m_batchsize, -1, Nc))\ + .transpose((0, 2, 1)) + proj_key = self.key_conv(vis_node).reshape((m_batchsize, -1, Nn)) + + energy = paddle.bmm(proj_query, proj_key) + attention_vis = self.softmax_vis(energy).transpose((0, 2, 1)) + attention_word = self.softmax_word(energy) + + proj_value_vis = self.value_conv_vis(vis_node).reshape((m_batchsize, -1, + Nn)) + proj_value_word = self.value_conv_word(word).reshape((m_batchsize, -1, + Nc)) + + class_out = paddle.bmm(proj_value_vis, attention_vis) + node_out = paddle.bmm(proj_value_word, attention_word) + return class_out, node_out \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/resnet.py b/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..d6e376ddca8c01569f1f20d0e25ec3e9fa513922 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_ade20k/resnet.py @@ -0,0 +1,137 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import ginet_resnet50vd_ade20k.layers as L + + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = L.ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + act=None, + name=name + "_branch2b") + + if not shortcut: + self.short = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + + return y + + +class ResNet50_vd(nn.Layer): + def __init__(self, + multi_grid: tuple = (1, 2, 4)): + super(ResNet50_vd, self).__init__() + depth = [3, 4, 6, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + self.feat_channels = [c * 4 for c in num_filters] + dilation_dict = {2: 2, 3: 4} + self.conv1_1 = L.ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = L.ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = L.ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + name="conv1_3") + self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) + self.stage_list = [] + + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + L.BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name, + dilation=dilation_rate)) + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + return feat_list \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/README.md b/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..849f47627fa1e5c3c2150188981e9aff32737ae8 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/README.md @@ -0,0 +1,185 @@ +# ginet_resnet50vd_cityscapes + +|模型名称|ginet_resnet50vd_cityscapes| +| :--- | :---: | +|类别|图像-图像分割| +|网络|ginet_resnet50vd| +|数据集|Cityscapes| +|是否支持Fine-tuning|是| +|模型大小|214MB| +|指标|-| +|最新更新日期|2021-12-14| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[ginet](https://arxiv.org/pdf/2009.06160) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install ginet_resnet50vd_cityscapes + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet50vd_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ginet_resnet50vd_cityscapes模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='ginet_resnet50vd_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet50vd_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m ginet_resnet50vd_cityscapes + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_cityscapes" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/README_en.md b/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..b265ee908f2476008405d2f548f8f029a81775a0 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/README_en.md @@ -0,0 +1,185 @@ +# ginet_resnet50vd_cityscapes + +|Module Name|ginet_resnet50vd_cityscapes| +| :--- | :---: | +|Category|Image Segmentation| +|Network|ginet_resnet50vd| +|Dataset|Cityscapes| +|Fine-tuning supported or not|Yes| +|Module Size|214MB| +|Data indicators|-| +|Latest update date|2021-12-14| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [ginet](https://arxiv.org/pdf/2009.06160) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install ginet_resnet50vd_cityscapes + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet50vd_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the ginet_resnet50vd_cityscapes model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='ginet_resnet50vd_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet50vd_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m ginet_resnet50vd_cityscapes + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_cityscapes" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/layers.py b/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..7e46219fd671ed9834795c9881292eed787b990d --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/layers.py @@ -0,0 +1,345 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + + Returns: + A callable object of Activation. + + Raises: + KeyError: When parameter `act` is not in the optional range. + + Examples: + + from paddleseg.models.common.activation import Activation + + relu = Activation("relu") + print(relu) + # + + sigmoid = Activation("sigmoid") + print(sigmoid) + # + + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool= False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/module.py b/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/module.py new file mode 100644 index 0000000000000000000000000000000000000000..1dac751bca852b3ee9ae247248b19c878d44365e --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/module.py @@ -0,0 +1,309 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule +from paddleseg.utils import utils +from paddleseg.models import layers + +from ginet_resnet50vd_cityscapes.resnet import ResNet50_vd + + +@moduleinfo( + name="ginet_resnet50vd_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="GINetResnet50 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class GINetResNet50(nn.Layer): + """ + The GINetResNet50 implementation based on PaddlePaddle. + The original article refers to + Wu, Tianyi, Yu Lu, Yu Zhu, Chuang Zhang, Ming Wu, Zhanyu Ma, and Guodong Guo. "GINet: Graph interaction network for scene parsing." In European Conference on Computer Vision, pp. 34-51. Springer, Cham, 2020. + (https://arxiv.org/pdf/2009.06160). + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): Values in the tuple indicate the indices of output of backbone. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. + If true, auxiliary loss will be added after LearningToDownsample module. Default: False. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.. Default: False. + jpu (bool, optional)): whether to use jpu unit in the base forward. Default:True. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 19, + backbone_indices: Tuple[int]=(0, 1, 2, 3), + enable_auxiliary_loss: bool = True, + align_corners: bool = True, + jpu: bool = True, + pretrained: str = None): + super(GINetResNet50, self).__init__() + self.nclass = num_classes + self.aux = enable_auxiliary_loss + self.jpu = jpu + + self.backbone = ResNet50_vd() + self.backbone_indices = backbone_indices + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + self.jpu = layers.JPU([512, 1024, 2048], width=512) if jpu else None + self.head = GIHead(in_channels=2048, nclass=num_classes) + + if self.aux: + self.auxlayer = layers.AuxLayer( + 1024, 1024 // 4, num_classes, bias_attr=False) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def base_forward(self, x: paddle.Tensor) -> paddle.Tensor: + feat_list = self.backbone(x) + c1, c2, c3, c4 = [feat_list[i] for i in self.backbone_indices] + + if self.jpu: + return self.jpu(c1, c2, c3, c4) + else: + return c1, c2, c3, c4 + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + _, _, h, w = x.shape + _, _, c3, c4 = self.base_forward(x) + + logit_list = [] + x, _ = self.head(c4) + logit_list.append(x) + + if self.aux: + auxout = self.auxlayer(c3) + + logit_list.append(auxout) + + return [ + F.interpolate( + logit, (h, w), + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + + +class GIHead(nn.Layer): + """The Graph Interaction Network head.""" + + def __init__(self, in_channels: int, nclass: int): + super().__init__() + self.nclass = nclass + inter_channels = in_channels // 4 + self.inp = paddle.zeros(shape=(nclass, 300), dtype='float32') + self.inp = paddle.create_parameter( + shape=self.inp.shape, + dtype=str(self.inp.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.inp)) + + self.fc1 = nn.Sequential( + nn.Linear(300, 128), nn.BatchNorm1D(128), nn.ReLU()) + self.fc2 = nn.Sequential( + nn.Linear(128, 256), nn.BatchNorm1D(256), nn.ReLU()) + self.conv5 = layers.ConvBNReLU( + in_channels, + inter_channels, + 3, + padding=1, + bias_attr=False, + stride=1) + + self.gloru = GlobalReasonUnit( + in_channels=inter_channels, + num_state=256, + num_node=84, + nclass=nclass) + self.conv6 = nn.Sequential( + nn.Dropout(0.1), nn.Conv2D(inter_channels, nclass, 1)) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + B, C, H, W = x.shape + inp = self.inp.detach() + + inp = self.fc1(inp) + inp = self.fc2(inp).unsqueeze(axis=0).transpose((0, 2, 1))\ + .expand((B, 256, self.nclass)) + + out = self.conv5(x) + + out, se_out = self.gloru(out, inp) + out = self.conv6(out) + return out, se_out + + +class GlobalReasonUnit(nn.Layer): + """ + The original paper refers to: + Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks" (https://arxiv.org/abs/1811.12814) + """ + + def __init__(self, in_channels: int, num_state: int = 256, num_node: int = 84, nclass: int = 59): + super().__init__() + self.num_state = num_state + self.conv_theta = nn.Conv2D( + in_channels, num_node, kernel_size=1, stride=1, padding=0) + self.conv_phi = nn.Conv2D( + in_channels, num_state, kernel_size=1, stride=1, padding=0) + self.graph = GraphLayer(num_state, num_node, nclass) + self.extend_dim = nn.Conv2D( + num_state, in_channels, kernel_size=1, bias_attr=False) + + self.bn = layers.SyncBatchNorm(in_channels) + + def forward(self, x: paddle.Tensor, inp: paddle.Tensor) -> paddle.Tensor: + B = self.conv_theta(x) + sizeB = B.shape + B = B.reshape((sizeB[0], sizeB[1], -1)) + + sizex = x.shape + x_reduce = self.conv_phi(x) + x_reduce = x_reduce.reshape((sizex[0], -1, sizex[2] * sizex[3]))\ + .transpose((0, 2, 1)) + + V = paddle.bmm(B, x_reduce).transpose((0, 2, 1)) + V = paddle.divide( + V, paddle.to_tensor([sizex[2] * sizex[3]], dtype='float32')) + + class_node, new_V = self.graph(inp, V) + D = B.reshape((sizeB[0], -1, sizeB[2] * sizeB[3])).transpose((0, 2, 1)) + Y = paddle.bmm(D, new_V.transpose((0, 2, 1))) + Y = Y.transpose((0, 2, 1)).reshape((sizex[0], self.num_state, \ + sizex[2], -1)) + Y = self.extend_dim(Y) + Y = self.bn(Y) + out = Y + x + + return out, class_node + + +class GraphLayer(nn.Layer): + def __init__(self, num_state: int, num_node: int, num_class: int): + super().__init__() + self.vis_gcn = GCN(num_state, num_node) + self.word_gcn = GCN(num_state, num_class) + self.transfer = GraphTransfer(num_state) + self.gamma_vis = paddle.zeros([num_node]) + self.gamma_word = paddle.zeros([num_class]) + self.gamma_vis = paddle.create_parameter( + shape=self.gamma_vis.shape, + dtype=str(self.gamma_vis.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.gamma_vis)) + self.gamma_word = paddle.create_parameter( + shape=self.gamma_word.shape, + dtype=str(self.gamma_word.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.gamma_word)) + + def forward(self, inp: paddle.Tensor, vis_node: paddle.Tensor) -> List[paddle.Tensor]: + inp = self.word_gcn(inp) + new_V = self.vis_gcn(vis_node) + class_node, vis_node = self.transfer(inp, new_V) + + class_node = self.gamma_word * inp + class_node + new_V = self.gamma_vis * vis_node + new_V + return class_node, new_V + + +class GCN(nn.Layer): + def __init__(self, num_state: int = 128, num_node: int = 64, bias: bool = False): + super().__init__() + self.conv1 = nn.Conv1D( + num_node, + num_node, + kernel_size=1, + padding=0, + stride=1, + groups=1, + ) + self.relu = nn.ReLU() + self.conv2 = nn.Conv1D( + num_state, + num_state, + kernel_size=1, + padding=0, + stride=1, + groups=1, + bias_attr=bias) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + h = self.conv1(x.transpose((0, 2, 1))).transpose((0, 2, 1)) + h = h + x + h = self.relu(h) + h = self.conv2(h) + return h + + +class GraphTransfer(nn.Layer): + """Transfer vis graph to class node, transfer class node to vis feature""" + + def __init__(self, in_dim: int): + super().__init__() + self.channle_in = in_dim + self.query_conv = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1) + self.key_conv = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1) + self.value_conv_vis = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.value_conv_word = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.softmax_vis = nn.Softmax(axis=-1) + self.softmax_word = nn.Softmax(axis=-2) + + def forward(self, word: paddle.Tensor, vis_node: paddle.Tensor) -> List[paddle.Tensor]: + m_batchsize, C, Nc = word.shape + m_batchsize, C, Nn = vis_node.shape + + proj_query = self.query_conv(word).reshape((m_batchsize, -1, Nc))\ + .transpose((0, 2, 1)) + proj_key = self.key_conv(vis_node).reshape((m_batchsize, -1, Nn)) + + energy = paddle.bmm(proj_query, proj_key) + attention_vis = self.softmax_vis(energy).transpose((0, 2, 1)) + attention_word = self.softmax_word(energy) + + proj_value_vis = self.value_conv_vis(vis_node).reshape((m_batchsize, -1, + Nn)) + proj_value_word = self.value_conv_word(word).reshape((m_batchsize, -1, + Nc)) + + class_out = paddle.bmm(proj_value_vis, attention_vis) + node_out = paddle.bmm(proj_value_word, attention_word) + return class_out, node_out \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/resnet.py b/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..d526b26991ff72083d7431971608b8a489f60df9 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_cityscapes/resnet.py @@ -0,0 +1,137 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import ginet_resnet50vd_cityscapes.layers as L + + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = L.ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + act=None, + name=name + "_branch2b") + + if not shortcut: + self.short = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + + return y + + +class ResNet50_vd(nn.Layer): + def __init__(self, + multi_grid: tuple = (1, 2, 4)): + super(ResNet50_vd, self).__init__() + depth = [3, 4, 6, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + self.feat_channels = [c * 4 for c in num_filters] + dilation_dict = {2: 2, 3: 4} + self.conv1_1 = L.ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = L.ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = L.ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + name="conv1_3") + self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) + self.stage_list = [] + + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + L.BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name, + dilation=dilation_rate)) + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + return feat_list \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_voc/README.md b/modules/image/semantic_segmentation/ginet_resnet50vd_voc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e0f1d605c5f8f87c1ad56d6c12b3a1384a514720 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_voc/README.md @@ -0,0 +1,185 @@ +# ginet_resnet50vd_voc + +|模型名称|ginet_resnet50vd_voc| +| :--- | :---: | +|类别|图像-图像分割| +|网络|ginet_resnet50vd| +|数据集|PascalVOC2012| +|是否支持Fine-tuning|是| +|模型大小|214MB| +|指标|-| +|最新更新日期|2021-12-14| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[ginet](https://arxiv.org/pdf/2009.06160) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install ginet_resnet50vd_voc + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet50vd_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ginet_resnet50vd_voc模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='ginet_resnet50vd_voc', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet50vd_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m ginet_resnet50vd_voc + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_voc/README_en.md b/modules/image/semantic_segmentation/ginet_resnet50vd_voc/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..71bba22353984fa84150ed687c9432db6ba0da65 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_voc/README_en.md @@ -0,0 +1,185 @@ +# ginet_resnet50vd_voc + +|Module Name|ginet_resnet50vd_voc| +| :--- | :---: | +|Category|Image Segmentation| +|Network|ginet_resnet50vd| +|Dataset|PascalVOC2012| +|Fine-tuning supported or not|Yes| +|Module Size|214MB| +|Data indicators|-| +|Latest update date|2021-12-14| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [ginet](https://arxiv.org/pdf/2009.06160) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install ginet_resnet50vd_voc + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet50vd_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the ginet_resnet50vd_voc model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='ginet_resnet50vd_voc', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ginet_resnet50vd_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m ginet_resnet50vd_voc + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_voc/layers.py b/modules/image/semantic_segmentation/ginet_resnet50vd_voc/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..7e46219fd671ed9834795c9881292eed787b990d --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_voc/layers.py @@ -0,0 +1,345 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + + Returns: + A callable object of Activation. + + Raises: + KeyError: When parameter `act` is not in the optional range. + + Examples: + + from paddleseg.models.common.activation import Activation + + relu = Activation("relu") + print(relu) + # + + sigmoid = Activation("sigmoid") + print(sigmoid) + # + + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool= False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_voc/module.py b/modules/image/semantic_segmentation/ginet_resnet50vd_voc/module.py new file mode 100644 index 0000000000000000000000000000000000000000..fed27ebf3a07794343c5841dc5c31b51e46f6544 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_voc/module.py @@ -0,0 +1,309 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule +from paddleseg.utils import utils +from paddleseg.models import layers + +from ginet_resnet50vd_voc.resnet import ResNet50_vd + + +@moduleinfo( + name="ginet_resnet50vd_voc", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="GINetResnet50 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class GINetResNet50(nn.Layer): + """ + The GINetResNet50 implementation based on PaddlePaddle. + The original article refers to + Wu, Tianyi, Yu Lu, Yu Zhu, Chuang Zhang, Ming Wu, Zhanyu Ma, and Guodong Guo. "GINet: Graph interaction network for scene parsing." In European Conference on Computer Vision, pp. 34-51. Springer, Cham, 2020. + (https://arxiv.org/pdf/2009.06160). + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): Values in the tuple indicate the indices of output of backbone. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. + If true, auxiliary loss will be added after LearningToDownsample module. Default: False. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.. Default: False. + jpu (bool, optional)): whether to use jpu unit in the base forward. Default:True. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 21, + backbone_indices: Tuple[int]=(0, 1, 2, 3), + enable_auxiliary_loss:bool = True, + align_corners: bool = True, + jpu: bool = True, + pretrained: str = None): + super(GINetResNet50, self).__init__() + self.nclass = num_classes + self.aux = enable_auxiliary_loss + self.jpu = jpu + + self.backbone = ResNet50_vd() + self.backbone_indices = backbone_indices + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + self.jpu = layers.JPU([512, 1024, 2048], width=512) if jpu else None + self.head = GIHead(in_channels=2048, nclass=num_classes) + + if self.aux: + self.auxlayer = layers.AuxLayer( + 1024, 1024 // 4, num_classes, bias_attr=False) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def base_forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feat_list = self.backbone(x) + c1, c2, c3, c4 = [feat_list[i] for i in self.backbone_indices] + + if self.jpu: + return self.jpu(c1, c2, c3, c4) + else: + return c1, c2, c3, c4 + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + _, _, h, w = x.shape + _, _, c3, c4 = self.base_forward(x) + + logit_list = [] + x, _ = self.head(c4) + logit_list.append(x) + + if self.aux: + auxout = self.auxlayer(c3) + + logit_list.append(auxout) + + return [ + F.interpolate( + logit, (h, w), + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + + +class GIHead(nn.Layer): + """The Graph Interaction Network head.""" + + def __init__(self, in_channels: int, nclass: int): + super().__init__() + self.nclass = nclass + inter_channels = in_channels // 4 + self.inp = paddle.zeros(shape=(nclass, 300), dtype='float32') + self.inp = paddle.create_parameter( + shape=self.inp.shape, + dtype=str(self.inp.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.inp)) + + self.fc1 = nn.Sequential( + nn.Linear(300, 128), nn.BatchNorm1D(128), nn.ReLU()) + self.fc2 = nn.Sequential( + nn.Linear(128, 256), nn.BatchNorm1D(256), nn.ReLU()) + self.conv5 = layers.ConvBNReLU( + in_channels, + inter_channels, + 3, + padding=1, + bias_attr=False, + stride=1) + + self.gloru = GlobalReasonUnit( + in_channels=inter_channels, + num_state=256, + num_node=84, + nclass=nclass) + self.conv6 = nn.Sequential( + nn.Dropout(0.1), nn.Conv2D(inter_channels, nclass, 1)) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + B, C, H, W = x.shape + inp = self.inp.detach() + + inp = self.fc1(inp) + inp = self.fc2(inp).unsqueeze(axis=0).transpose((0, 2, 1))\ + .expand((B, 256, self.nclass)) + + out = self.conv5(x) + + out, se_out = self.gloru(out, inp) + out = self.conv6(out) + return out, se_out + + +class GlobalReasonUnit(nn.Layer): + """ + The original paper refers to: + Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks" (https://arxiv.org/abs/1811.12814) + """ + + def __init__(self, in_channels: int, num_state: int = 256, num_node: int = 84, nclass: int = 59): + super().__init__() + self.num_state = num_state + self.conv_theta = nn.Conv2D( + in_channels, num_node, kernel_size=1, stride=1, padding=0) + self.conv_phi = nn.Conv2D( + in_channels, num_state, kernel_size=1, stride=1, padding=0) + self.graph = GraphLayer(num_state, num_node, nclass) + self.extend_dim = nn.Conv2D( + num_state, in_channels, kernel_size=1, bias_attr=False) + + self.bn = layers.SyncBatchNorm(in_channels) + + def forward(self, x: paddle.Tensor, inp: paddle.Tensor) -> List[paddle.Tensor]: + B = self.conv_theta(x) + sizeB = B.shape + B = B.reshape((sizeB[0], sizeB[1], -1)) + + sizex = x.shape + x_reduce = self.conv_phi(x) + x_reduce = x_reduce.reshape((sizex[0], -1, sizex[2] * sizex[3]))\ + .transpose((0, 2, 1)) + + V = paddle.bmm(B, x_reduce).transpose((0, 2, 1)) + V = paddle.divide( + V, paddle.to_tensor([sizex[2] * sizex[3]], dtype='float32')) + + class_node, new_V = self.graph(inp, V) + D = B.reshape((sizeB[0], -1, sizeB[2] * sizeB[3])).transpose((0, 2, 1)) + Y = paddle.bmm(D, new_V.transpose((0, 2, 1))) + Y = Y.transpose((0, 2, 1)).reshape((sizex[0], self.num_state, \ + sizex[2], -1)) + Y = self.extend_dim(Y) + Y = self.bn(Y) + out = Y + x + + return out, class_node + + +class GraphLayer(nn.Layer): + def __init__(self, num_state: int, num_node: int, num_class: int): + super().__init__() + self.vis_gcn = GCN(num_state, num_node) + self.word_gcn = GCN(num_state, num_class) + self.transfer = GraphTransfer(num_state) + self.gamma_vis = paddle.zeros([num_node]) + self.gamma_word = paddle.zeros([num_class]) + self.gamma_vis = paddle.create_parameter( + shape=self.gamma_vis.shape, + dtype=str(self.gamma_vis.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.gamma_vis)) + self.gamma_word = paddle.create_parameter( + shape=self.gamma_word.shape, + dtype=str(self.gamma_word.numpy().dtype), + default_initializer=paddle.nn.initializer.Assign(self.gamma_word)) + + def forward(self, inp: paddle.Tensor, vis_node: paddle.Tensor) -> List[paddle.Tensor]: + inp = self.word_gcn(inp) + new_V = self.vis_gcn(vis_node) + class_node, vis_node = self.transfer(inp, new_V) + + class_node = self.gamma_word * inp + class_node + new_V = self.gamma_vis * vis_node + new_V + return class_node, new_V + + +class GCN(nn.Layer): + def __init__(self, num_state: int = 128, num_node: int = 64, bias: bool = False): + super().__init__() + self.conv1 = nn.Conv1D( + num_node, + num_node, + kernel_size=1, + padding=0, + stride=1, + groups=1, + ) + self.relu = nn.ReLU() + self.conv2 = nn.Conv1D( + num_state, + num_state, + kernel_size=1, + padding=0, + stride=1, + groups=1, + bias_attr=bias) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + h = self.conv1(x.transpose((0, 2, 1))).transpose((0, 2, 1)) + h = h + x + h = self.relu(h) + h = self.conv2(h) + return h + + +class GraphTransfer(nn.Layer): + """Transfer vis graph to class node, transfer class node to vis feature""" + + def __init__(self, in_dim: int): + super().__init__() + self.channle_in = in_dim + self.query_conv = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1) + self.key_conv = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1) + self.value_conv_vis = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.value_conv_word = nn.Conv1D( + in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.softmax_vis = nn.Softmax(axis=-1) + self.softmax_word = nn.Softmax(axis=-2) + + def forward(self, word: paddle.Tensor, vis_node: paddle.Tensor) -> List[paddle.Tensor]: + m_batchsize, C, Nc = word.shape + m_batchsize, C, Nn = vis_node.shape + + proj_query = self.query_conv(word).reshape((m_batchsize, -1, Nc))\ + .transpose((0, 2, 1)) + proj_key = self.key_conv(vis_node).reshape((m_batchsize, -1, Nn)) + + energy = paddle.bmm(proj_query, proj_key) + attention_vis = self.softmax_vis(energy).transpose((0, 2, 1)) + attention_word = self.softmax_word(energy) + + proj_value_vis = self.value_conv_vis(vis_node).reshape((m_batchsize, -1, + Nn)) + proj_value_word = self.value_conv_word(word).reshape((m_batchsize, -1, + Nc)) + + class_out = paddle.bmm(proj_value_vis, attention_vis) + node_out = paddle.bmm(proj_value_word, attention_word) + return class_out, node_out \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ginet_resnet50vd_voc/resnet.py b/modules/image/semantic_segmentation/ginet_resnet50vd_voc/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..79f648ef9f3381b41852a8010381a6087d6b7f72 --- /dev/null +++ b/modules/image/semantic_segmentation/ginet_resnet50vd_voc/resnet.py @@ -0,0 +1,137 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import ginet_resnet50vd_voc.layers as L + + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = L.ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + act=None, + name=name + "_branch2b") + + if not shortcut: + self.short = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + + return y + + +class ResNet50_vd(nn.Layer): + def __init__(self, + multi_grid: tuple = (1, 2, 4)): + super(ResNet50_vd, self).__init__() + depth = [3, 4, 6, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + self.feat_channels = [c * 4 for c in num_filters] + dilation_dict = {2: 2, 3: 4} + self.conv1_1 = L.ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + name="conv1_1") + self.conv1_2 = L.ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + name="conv1_2") + self.conv1_3 = L.ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + name="conv1_3") + self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) + self.stage_list = [] + + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + L.BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name, + dilation=dilation_rate)) + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + return feat_list \ No newline at end of file diff --git a/modules/image/semantic_segmentation/humanseg_lite/README.md b/modules/image/semantic_segmentation/humanseg_lite/README.md index effab0ff515694b2e376711a097c76ab564fdcbe..67472e1818aae31ef2d78b09410b1646a7bc388f 100644 --- a/modules/image/semantic_segmentation/humanseg_lite/README.md +++ b/modules/image/semantic_segmentation/humanseg_lite/README.md @@ -48,7 +48,7 @@ ``` hub run humanseg_lite --input_path "/PATH/TO/IMAGE" ``` -- ### 2、代码示例 +- ### 2、预测代码示例 - 图片分割及视频分割代码示例: @@ -72,7 +72,7 @@ import numpy as np import paddlehub as hub - human_seg = hub.Module('humanseg_lite') + human_seg = hub.Module(name='humanseg_lite') cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') fps = cap_video.get(cv2.CAP_PROP_FPS) save_path = 'humanseg_lite_video.avi' diff --git a/modules/image/semantic_segmentation/humanseg_lite/README_en.md b/modules/image/semantic_segmentation/humanseg_lite/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..e37ba0123129939cd84601293d5d8b1e536b93ad --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_lite/README_en.md @@ -0,0 +1,255 @@ +# humanseg_lite + +|Module Name |humanseg_lite| +| :--- | :---: | +|Category |Image segmentation| +|Network|shufflenet| +|Dataset|Baidu self-built dataset| +|Fine-tuning supported or not|No| +|Module Size|541k| +|Data indicators|-| +|Latest update date|2021-02-26| + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ +

+ +- ### Module Introduction + + - HumanSeg_lite is based on ShuffleNetV2 network. The network size is only 541K. It is suitable for selfie portrait segmentation and can be segmented in real time on the mobile terminal. + + - For more information, please refer to:[humanseg_lite](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/HumanSeg) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install humanseg_lite + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ``` + hub run humanseg_lite --input_path "/PATH/TO/IMAGE" + + ``` + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + - Image segmentation and video segmentation example: + - ```python + import cv2 + import paddlehub as hub + + human_seg = hub.Module(name='humanseg_lite') + im = cv2.imread('/PATH/TO/IMAGE') + res = human_seg.segment(images=[im],visualization=True) + print(res[0]['data']) + human_seg.video_segment('/PATH/TO/VIDEO') + human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') + + ``` + - Video prediction example: + + - ```python + import cv2 + import numpy as np + import paddlehub as hub + + human_seg = hub.Module('humanseg_lite') + cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') + fps = cap_video.get(cv2.CAP_PROP_FPS) + save_path = 'humanseg_lite_video.avi' + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) + prev_gray = None + prev_cfd = None + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + + cap_video.release() + cap_out.release() + + ``` + +- ### 3、API + + - ```python + def segment(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_lite_output') + ``` + + - Prediction API, generating segmentation result. + + - **Parameter** + + * images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR. + * paths (list\[str\]): image path. + * batch\_size (int): batch size. + * use\_gpu (bool): use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + * visualization (bool): Whether to save the results as picture files. + * output\_dir (str): save path of images, humanseg_lite_output by default. + + - **Return** + + * res (list\[dict\]): The list of recognition results, where each element is dict and each field is: + * save\_path (str, optional): Save path of the result. + * data (numpy.ndarray): The result of portrait segmentation. + + - ```python + def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): + ``` + - Prediction API, used to segment video portraits frame by frame. + + - **Parameter** + + * frame_org (numpy.ndarray): single frame for prediction,ndarray.shape is in the format [H, W, C], BGR. + * frame_id (int): The number of the current frame. + * prev_gray (numpy.ndarray): Grayscale image of the previous network input. + * prev_cfd (numpy.ndarray): The fusion image from optical flow and the prediction result from previous frame. + * use\_gpu (bool): use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + + + - **Return** + + * img_matting (numpy.ndarray): The result of portrait segmentation. + * cur_gray (numpy.ndarray): Grayscale image of the current network input. + * optflow_map (numpy.ndarray): The fusion image from optical flow and the prediction result from current frame. + + + - ```python + def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_lite_video_result'): + ``` + + - Prediction API to produce video segmentation result. + + - **Parameter** + + * video\_path (str): Video path for segmentation。If None, the video will be obtained from the local camera, and a window will display the online segmentation result. + * use\_gpu (bool): use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + * save\_dir (str): save path of video. + + + - ```python + def save_inference_model(dirname='humanseg_lite_model', + model_filename=None, + params_filename=None, + combined=True) + ``` + + + - Save the model to the specified path. + + - **Parameters** + + * dirname: Save path. + * model\_filename: model file name,defalt is \_\_model\_\_ + * params\_filename: parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) + * combined: Whether to save the parameters to a unified file. + + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of for human segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + hub serving start -m humanseg_lite + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # Send an HTTP request + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/humanseg_lite" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) + rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) + cv2.imwrite("segment_human_lite.png", rgba) + ``` + + +## V. Release Note + +- 1.0.0 + + First release + +- 1.1.0 + + Added video portrait segmentation interface + + Added video stream portrait segmentation interface +* 1.1.1 + + Fix memory leakage problem of on cudnn 8.0.4 diff --git a/modules/image/semantic_segmentation/humanseg_mobile/README.md b/modules/image/semantic_segmentation/humanseg_mobile/README.md index 2e65c49b47a6c8751c4581bef5a7258e872cd078..188234ed27f826c9f1bf99454616237a3e102fb6 100644 --- a/modules/image/semantic_segmentation/humanseg_mobile/README.md +++ b/modules/image/semantic_segmentation/humanseg_mobile/README.md @@ -52,7 +52,7 @@ ``` hub run humanseg_mobile --input_path "/PATH/TO/IMAGE" ``` -- ### 2、代码示例 +- ### 2、预测代码示例 - 图片分割及视频分割代码示例: @@ -76,7 +76,7 @@ import numpy as np import paddlehub as hub - human_seg = hub.Module('humanseg_mobile') + human_seg = hub.Module(name='humanseg_mobile') cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') fps = cap_video.get(cv2.CAP_PROP_FPS) save_path = 'humanseg_mobile_video.avi' diff --git a/modules/image/semantic_segmentation/humanseg_mobile/README_en.md b/modules/image/semantic_segmentation/humanseg_mobile/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..7af902ceda26f503c00311a2d9da445ea500cbeb --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_mobile/README_en.md @@ -0,0 +1,256 @@ +# humanseg_mobile + +|Module Name |humanseg_mobile| +| :--- | :---: | +|Category |Image segmentation| +|Network|hrnet| +|Dataset|Baidu self-built dataset| +|Fine-tuning supported or not|No| +|Module Size|5.8M| +|Data indicators|-| +|Latest update date|2021-02-26| + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ +

+ +- ### Module Introduction + + - HumanSeg_mobile is based on HRNet_w18_small_v1 network. The network size is only 5.8M. It is suitable for selfie portrait segmentation and can be segmented in real time on the mobile terminal. + + - For more information, please refer to:[humanseg_mobile](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/HumanSeg) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install humanseg_mobile + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ``` + hub run humanseg_mobile --input_path "/PATH/TO/IMAGE" + + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + + +- ### 2、Prediction Code Example + - Image segmentation and video segmentation example: + ```python + import cv2 + import paddlehub as hub + + human_seg = hub.Module(name='humanseg_mobile') + im = cv2.imread('/PATH/TO/IMAGE') + res = human_seg.segment(images=[im],visualization=True) + print(res[0]['data']) + human_seg.video_segment('/PATH/TO/VIDEO') + human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') + + ``` + - Video prediction example: + + ```python + import cv2 + import numpy as np + import paddlehub as hub + + human_seg = hub.Module('humanseg_mobile') + cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') + fps = cap_video.get(cv2.CAP_PROP_FPS) + save_path = 'humanseg_mobile_video.avi' + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) + prev_gray = None + prev_cfd = None + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + + cap_video.release() + cap_out.release() + + ``` + +- ### 3、API + + ```python + def segment(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_mobile_output') + ``` + + - Prediction API, generating segmentation result. + + - **Parameter** + + * images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR. + * paths (list\[str\]): image path. + * batch\_size (int): batch size. + * use\_gpu (bool): use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + * visualization (bool): Whether to save the results as picture files. + * output\_dir (str): save path of images, humanseg_mobile_output by default. + + - **Return** + + * res (list\[dict\]): The list of recognition results, where each element is dict and each field is: + * save\_path (str, optional): Save path of the result. + * data (numpy.ndarray): The result of portrait segmentation. + + ```python + def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): + ``` + + - Prediction API, used to segment video portraits frame by frame. + + - **Parameter** + + * frame_org (numpy.ndarray): single frame for prediction,ndarray.shape is in the format [H, W, C], BGR. + * frame_id (int): The number of the current frame. + * prev_gray (numpy.ndarray): Grayscale image of the previous network input. + * prev_cfd (numpy.ndarray): The fusion image from optical flow and the prediction result from previous frame. + * use\_gpu (bool): Use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + + + - **Return** + + * img_matting (numpy.ndarray): The result of portrait segmentation. + * cur_gray (numpy.ndarray): Grayscale image of the current network input. + * optflow_map (numpy.ndarray): The fusion image from optical flow and the prediction result from current frame. + + + ```python + def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_mobile_video_result'): + ``` + + - Prediction API to produce video segmentation result. + + - **Parameter** + + * video\_path (str): Video path for segmentation。If None, the video will be obtained from the local camera, and a window will display the online segmentation result. + * use\_gpu (bool): Use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + * save\_dir (str): save path of video. + + + ```python + def save_inference_model(dirname='humanseg_mobile_model', + model_filename=None, + params_filename=None, + combined=True) + ``` + + + - Save the model to the specified path. + + - **Parameters** + + * dirname: Save path. + * model\_filename: Model file name,defalt is \_\_model\_\_ + * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) + * combined: Whether to save the parameters to a unified file. + + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of for human segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m humanseg_mobile + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # Send an HTTP request + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/humanseg_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) + rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) + cv2.imwrite("segment_human_mobile.png", rgba) + ``` + + +## V. Release Note + +- 1.0.0 + + First release + +- 1.1.0 + + Added video portrait split interface + + Added video stream portrait segmentation interface +* 1.1.1 + + Fix the video memory leakage problem of on cudnn 8.0.4 diff --git a/modules/image/semantic_segmentation/humanseg_server/README.md b/modules/image/semantic_segmentation/humanseg_server/README.md index 8845cb82cd109e6ddfb7b92f01f607333dada588..35e19365cc9f0b6c034ab6012faf5f7355fceaa3 100644 --- a/modules/image/semantic_segmentation/humanseg_server/README.md +++ b/modules/image/semantic_segmentation/humanseg_server/README.md @@ -51,7 +51,7 @@ ``` hub run humanseg_server --input_path "/PATH/TO/IMAGE" ``` -- ### 2、代码示例 +- ### 2、预测代码示例 - 图片分割及视频分割代码示例: @@ -75,7 +75,7 @@ import numpy as np import paddlehub as hub - human_seg = hub.Module('humanseg_server') + human_seg = hub.Module(name='humanseg_server') cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') fps = cap_video.get(cv2.CAP_PROP_FPS) save_path = 'humanseg_server_video.avi' diff --git a/modules/image/semantic_segmentation/humanseg_server/README_en.md b/modules/image/semantic_segmentation/humanseg_server/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..052b37e2af72d9090de2e2950ee2284695cba695 --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_server/README_en.md @@ -0,0 +1,255 @@ +# humanseg_server + +|Module Name |humanseg_server| +| :--- | :---: | +|Category |Image segmentation| +|Network|hrnet| +|Dataset|Baidu self-built dataset| +|Fine-tuning supported or not|No| +|Module Size|159MB| +|Data indicators|-| +|Latest update date|2021-02-26| + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: +

+ +

+ +- ### Module Introduction + + - HumanSeg-server model is trained by Baidu self-built dataset, which can be used for portrait segmentation. + + - For more information, please refer to:[humanseg_server](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/HumanSeg) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install humanseg_server + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III. Module API Prediction + +- ### 1、Command line Prediction + + - ``` + hub run humanseg_server --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_en/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + - Image segmentation and video segmentation example: + ```python + import cv2 + import paddlehub as hub + + human_seg = hub.Module(name='humanseg_server') + im = cv2.imread('/PATH/TO/IMAGE') + res = human_seg.segment(images=[im],visualization=True) + print(res[0]['data']) + human_seg.video_segment('/PATH/TO/VIDEO') + human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') + + ``` + - Video prediction example: + + ```python + import cv2 + import numpy as np + import paddlehub as hub + + human_seg = hub.Module('humanseg_server') + cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') + fps = cap_video.get(cv2.CAP_PROP_FPS) + save_path = 'humanseg_server_video.avi' + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) + prev_gray = None + prev_cfd = None + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + + cap_video.release() + cap_out.release() + + ``` + +- ### 3、API + + ```python + def segment(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_server_output') + ``` + + - Prediction API, generating segmentation result. + + - **Parameter** + + * images (list\[numpy.ndarray\]): Image data, ndarray.shape is in the format [H, W, C], BGR. + * paths (list\[str\]): Image path. + * batch\_size (int): Batch size. + * use\_gpu (bool): Use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + * visualization (bool): Whether to save the results as picture files. + * output\_dir (str): Save path of images, humanseg_server_output by default. + + - **Return** + + * res (list\[dict\]): The list of recognition results, where each element is dict and each field is: + * save\_path (str, optional): Save path of the result. + * data (numpy.ndarray): The result of portrait segmentation. + + ```python + def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): + ``` + + - Prediction API, used to segment video portraits frame by frame. + + - **Parameter** + + * frame_org (numpy.ndarray): Single frame for prediction,ndarray.shape is in the format [H, W, C], BGR. + * frame_id (int): The number of the current frame. + * prev_gray (numpy.ndarray): Grayscale image of the previous network input. + * prev_cfd (numpy.ndarray): The fusion image from optical flow and the prediction result from previous frame. + * use\_gpu (bool): Use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + + + - **Return** + + * img_matting (numpy.ndarray): The result of portrait segmentation. + * cur_gray (numpy.ndarray): Grayscale image of the current network input. + * optflow_map (numpy.ndarray): The fusion image from optical flow and the prediction result from current frame. + + + ```python + def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_server_video_result'): + ``` + + - Prediction API to produce video segmentation result. + + - **Parameter** + + * video\_path (str): Video path for segmentation。If None, the video will be obtained from the local camera, and a window will display the online segmentation result. + * use\_gpu (bool): Use GPU or not. **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + * save\_dir (str): Save path of video. + + + ```python + def save_inference_model(dirname='humanseg_server_model', + model_filename=None, + params_filename=None, + combined=True) + ``` + + + - Save the model to the specified path. + + - **Parameters** + + * dirname: Save path. + * model\_filename: Model file name,defalt is \_\_model\_\_ + * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) + * combined: Whether to save the parameters to a unified file. + + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of for human segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m humanseg_server + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # Send an HTTP request + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/humanseg_server" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) + rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) + cv2.imwrite("segment_human_server.png", rgba) + ``` + + +## V. Release Note + +- 1.0.0 + + First release + +- 1.1.0 + + Added video portrait segmentation interface + + Added video stream portrait segmentation interface + +* 1.1.1 + + Fix memory leakage problem of on cudnn 8.0.4 diff --git a/modules/image/text_recognition/Vehicle_License_Plate_Recognition/README.md b/modules/image/text_recognition/Vehicle_License_Plate_Recognition/README.md index e30a0ef6eeb9fb80feec73420bcaa653430952ee..4540fc73ba04eeb65bf8840132a9d6daeada7b0d 100644 --- a/modules/image/text_recognition/Vehicle_License_Plate_Recognition/README.md +++ b/modules/image/text_recognition/Vehicle_License_Plate_Recognition/README.md @@ -43,7 +43,7 @@ ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/text_recognition/Vehicle_License_Plate_Recognition/README_en.md b/modules/image/text_recognition/Vehicle_License_Plate_Recognition/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..7331cd855f0f1be1b788b2e2589e645dcc3eb4b7 --- /dev/null +++ b/modules/image/text_recognition/Vehicle_License_Plate_Recognition/README_en.md @@ -0,0 +1,123 @@ +# Vehicle_License_Plate_Recognition + +|Module Name|Vehicle_License_Plate_Recognition| +| :--- | :---: | +|Category|text recognition| +|Network|-| +|Dataset|CCPD| +|Fine-tuning supported or not|No| +|Module Size|111MB| +|Latest update date|2021-03-22| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+
+

+ + +- ### Module Introduction + + - Vehicle_License_Plate_Recognition is a module for licence plate recognition, trained on CCPD dataset. This model can detect the position of licence plate and recognize the contents. + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.4 + + - paddleocr >= 2.0.2 + +- ### 2、Installation + + - ```shell + $ hub install Vehicle_License_Plate_Recognition + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="Vehicle_License_Plate_Recognition") + result = model.plate_recognition(images=[cv2.imread('/PATH/TO/IMAGE')]) + ``` + +- ### 2、API + + - ```python + def plate_recognition(images) + ``` + + - Prediction API. + + - **Parameters** + + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + + + - **Return** + - results(list(dict{'license', 'bbox'})): The list of recognition results, where each element is dict. + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of text recognition. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m Vehicle_License_Plate_Recognition + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/Vehicle_License_Plate_Recognition" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install Vehicle_License_Plate_Recognition==1.0.0 + ``` diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README_en.md b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README_en.md index 679b2a0598933d4c5450adca1c997e1a4c323ef4..2ed262867ec0131f343f77d3ccb948aefaefbdfe 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README_en.md +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README_en.md @@ -35,9 +35,9 @@ ## II. Installation -- ### 1、Environmental dependence +- ### 1、Environmental dependence - - paddlepaddle >= 1.7.2 + - paddlepaddle >= 1.7.2 - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) @@ -48,7 +48,7 @@ - ```shell $ pip install shapely pyclipper ``` - - **This Module relies on the third-party libraries shapely and pyclipper. Please install shapely and pyclipper before using this Module.** + - **This Module relies on the third-party libraries shapely and pyclipper. Please install shapely and pyclipper before using this Module.** - ### 2、Installation @@ -87,7 +87,7 @@ - ```python __init__(text_detector_module=None, enable_mkldnn=False) ``` - + - Construct the ChineseOCRDBCRNN object - **Parameter** @@ -192,7 +192,7 @@ * 1.1.1 Supports recognition of spaces in text. - + * 1.1.2 Fixed an issue where only 30 fields can be detected. diff --git a/modules/image/text_recognition/german_ocr_db_crnn_mobile/README_en.md b/modules/image/text_recognition/german_ocr_db_crnn_mobile/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..431f7dbb256a16a90834b75ea50c5ed1c48abc85 --- /dev/null +++ b/modules/image/text_recognition/german_ocr_db_crnn_mobile/README_en.md @@ -0,0 +1,162 @@ +# german_ocr_db_crnn_mobile + +|Module Name|german_ocr_db_crnn_mobile| +| :--- | :---: | +|Category|text recognition| +|Network|Differentiable Binarization+CRNN| +|Dataset|icdar2015Dataset| +|Fine-tuning supported or not|No| +|Module Size|3.8MB| +|Latest update date|2021-02-26| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+
+

+ +- ### Module Introduction + - german_ocr_db_crnn_mobile Module is used to identify Germany characters in pictures. It first obtains the text box detected by [chinese_text_detection_db_mobile Module](), then identifies the Germany characters and carries out angle classification to these text boxes. CRNN(Convolutional Recurrent Neural Network) is adopted as the final recognition algorithm. This Module is an ultra-lightweight Germany OCR model that supports direct prediction. + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + - shapely + + - pyclipper + + - ```shell + $ pip install shapely pyclipper + ``` + - **This Module relies on the third-party libraries, shapely and pyclipper. Please install shapely and pyclipper before using this Module.** + +- ### 2、Installation + + - ```shell + $ hub install german_ocr_db_crnn_mobile + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run german_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="german_ocr_db_crnn_mobile", enable_mkldnn=True) # MKLDNN acceleration is only available on CPU + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def __init__(text_detector_module=None, enable_mkldnn=False) + ``` + - Construct the GenmanOCRDBCRNNMobile object + - **Parameters** + - text_detector_module(str): Name of text detection module in PaddleHub Module, if set to None, [chinese_text_detection_db_mobile Module]() will be used by default. It serves to detect the text in the picture. + - enable_mkldnn(bool): Whether to enable MKLDNN for CPU computing acceleration. This parameter is valid only when the CPU is running. The default is False. + + - ```python + def recognize_text(images=[], + paths=[], + use_gpu=False, + output_dir='ocr_result', + visualization=False, + box_thresh=0.5, + text_thresh=0.5, + angle_classification_thresh=0.9) + ``` + + - Prediction API, detecting the position of all Germany text in the input image. + + - **Parameter** + - paths (list[str]): image path + - images (list[numpy.ndarray]): image data, ndarray.shape is in the format [H, W, C], BGR; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - box_thresh (float): The confidence threshold for text box detection; + - text_thresh (float): The confidence threshold for Germany text recognition; + - angle_classification_thresh(float): The confidence threshold for text angle classification + - visualization (bool): Whether to save the results as picture files; + - output_dir (str): save path of images; + - **Return** + - res (list[dict]): The list of recognition results, where each element is dict and each field is: + - data (list[dict]): recognition results, each element in the list is dict and each field is: + - text(str): Recognized texts + - confidence(float): The confidence of the results + - text_box_position(list): The pixel coordinates of the text box in the original picture, a 4*2 matrix representing the coordinates of the lower left, lower right, upper right and upper left vertices of the text box in turn, data is [] if there's no result + - save_path (str, optional): Save path of the result, save_path is '' if no image is saved. + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of text recognition. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m german_ocr_db_crnn_mobile + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/german_ocr_db_crnn_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install german_ocr_db_crnn_mobile==1.0.0 + ``` diff --git a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/README_en.md b/modules/image/text_recognition/japan_ocr_db_crnn_mobile/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..d518b9405f1a27df305df9e68a5199b683b82712 --- /dev/null +++ b/modules/image/text_recognition/japan_ocr_db_crnn_mobile/README_en.md @@ -0,0 +1,161 @@ +# japan_ocr_db_crnn_mobile + +|Module Name|japan_ocr_db_crnn_mobile| +| :--- | :---: | +|Category|text recognition| +|Network|Differentiable Binarization+CRNN| +|Dataset|icdar2015Dataset| +|Fine-tuning supported or not|No| +|Module Size|8MB| +|Latest update date|2021-04-15| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+
+

+ +- ### Module Introduction + + - japan_ocr_db_crnn_mobile Module is used to identify Japanese characters in pictures. It first obtains the text box detected by [chinese_text_detection_db_mobile Module](), then identifies the Japanese characters and carries out angle classification to these text boxes. CRNN(Convolutional Recurrent Neural Network) is adopted as the final recognition algorithm. This Module is an ultra-lightweight Japanese OCR model that supports direct prediction. +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + + - shapely + + - pyclipper + + - ```shell + $ pip install shapely pyclipper + ``` + - **This Module relies on the third-party libraries, shapely and pyclipper. Please install shapely and pyclipper before using this Module.** + +- ### 2、Installation + + - ```shell + $ hub install japan_ocr_db_crnn_mobile + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run japan_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="japan_ocr_db_crnn_mobile", enable_mkldnn=True) # MKLDNN acceleration is only available on CPU + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def __init__(text_detector_module=None, enable_mkldnn=False) + ``` + - Construct the JapanOCRDBCRNNMobile object + - **Parameters** + - text_detector_module(str): Name of text detection module in PaddleHub Module, if set to None, [chinese_text_detection_db_mobile Module]() will be used by default. It serves to detect the text in the picture. + - enable_mkldnn(bool): Whether to enable MKLDNN for CPU computing acceleration. This parameter is valid only when the CPU is running. The default is False. + + - ```python + def recognize_text(images=[], + paths=[], + use_gpu=False, + output_dir='ocr_result', + visualization=False, + box_thresh=0.5, + text_thresh=0.5, + angle_classification_thresh=0.9) + ``` + + - Prediction API, detecting the position of all Japanese text in the input image. + - **Parameter** + - paths (list[str]): image path + - images (list[numpy.ndarray]): image data, ndarray.shape is in the format [H, W, C], BGR; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - box_thresh (float): The confidence threshold for text box detection; + - text_thresh (float): The confidence threshold for Japanese text recognition; + - angle_classification_thresh(float): The confidence threshold for text angle classification + - visualization (bool): Whether to save the results as picture files; + - output_dir (str): save path of images; + - **Return** + - res (list[dict]): The list of recognition results, where each element is dict and each field is: + - data (list[dict]): recognition results, each element in the list is dict and each field is: + - text(str): Recognized texts + - confidence(float): The confidence of the results + - text_box_position(list): The pixel coordinates of the text box in the original picture, a 4*2 matrix representing the coordinates of the lower left, lower right, upper right and upper left vertices of the text box in turn, data is [] if there's no result + - save_path (str, optional): Save path of the result, save_path is '' if no image is saved. + + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of text recognition. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m japan_ocr_db_crnn_mobile + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/japan_ocr_db_crnn_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + + - ```shell + $ hub install japan_ocr_db_crnn_mobile==1.0.0 + ``` diff --git a/modules/text/embedding/fasttext_crawl_target_word-word_dim300_en/README_en.md b/modules/text/embedding/fasttext_crawl_target_word-word_dim300_en/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..d199dcb21f62a053eb1c60a3e40b36b67faf466b --- /dev/null +++ b/modules/text/embedding/fasttext_crawl_target_word-word_dim300_en/README_en.md @@ -0,0 +1,178 @@ +# fasttext_crawl_target_word-word_dim300_en +|Module Name|fasttext_crawl_target_word-word_dim300_en| +| :--- | :---: | +|Category|Word Embedding| +|Network|fasttext| +|Dataset|crawl| +|Fine-tuning supported|No| +|Module Size|1.19GB| +|Vocab Size|2,000,002| +|Last update date|26 Feb, 2021| +|Data Indicators|-| + +## I. Basic Information + +- ### Module Introduction + + - PaddleHub provides several open source pretrained word embedding models. These embedding models are distinguished by the corpus, training methods and word embedding dimensions. For more informations, please refer to: [Summary of embedding models](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) + +## II. Installation + +- ### 1. Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [PaddleHub Installation Guide](../../../../docs/docs_ch/get_start/installation_en.rst) + +- ### 2. Installation + + - ```shell + $ hub install fasttext_crawl_target_word-word_dim300_en + ``` + + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_ch/get_start/windows_quickstart_en.md) | [Linux_Quickstart](../../../../docs/docs_ch/get_start/linux_quickstart_en.md) | [Mac_Quickstart](../../../../docs/docs_ch/get_start/mac_quickstart_en.md) + +## III. Module API Prediction + +- ### 1. Prediction Code Example + + - ``` + import paddlehub as hub + embedding = hub.Module(name='fasttext_crawl_target_word-word_dim300_en') + + # Get the embedding of the word + embedding.search("中国") + # Calculate the cosine similarity of two word vectors + embedding.cosine_sim("中国", "美国") + # Calculate the inner product of two word vectors + embedding.dot("中国", "美国") + ``` + +- ### 2、API + + - ```python + def __init__( + *args, + **kwargs + ) + ``` + + - Construct an embedding module object without parameters by default. + + - **Parameters** + - `*args`: Arguments specified by the user. + - `**kwargs`:Keyword arguments specified by the user. + + - More info[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) + + + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` + + - Return the embedding of one or multiple words. The input data type can be `str`, `List[str]` and `int`, represent word, multiple words and the embedding of specified word id accordingly. Word id is related to the model vocab, vocab can be obtained by the attribute of `vocab`. + + - **参数** + - `words`: input words or word id. + + + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` + + - Cosine similarity calculation. `word_a` and `word_b` should be in the voacb, or they will be replaced by `unknown_token`. + + - **参数** + - `word_a`: input word a. + - `word_b`: input word b. + + + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` + + - Inner product calculation. `word_a` and `word_b` should be in the voacb, or they will be replaced by `unknown_token`. + + - **参数** + - `word_a`: input word a. + - `word_b`: input word b. + + + - ```python + def get_vocab_path() + ``` + + - Get the path of the local vocab file. + + + - ```python + def get_tokenizer(*args, **kwargs) + ``` + + - Get the tokenizer of current model, it will return an instance of JiebaTokenizer, only supports the chinese embedding models currently. + + - **参数** + - `*args`: Arguments specified by the user. + - `**kwargs`: Keyword arguments specified by the user. + + - For more information about the arguments, please refer to[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) + + - For more information about the usage, please refer to[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of cosine similarity calculation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m fasttext_crawl_target_word-word_dim300_en + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set `CUDA_VISIBLE_DEVICES` environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + + # Specify the word pairs used to calculate the cosine similarity [[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + data = {"data": word_pairs} + # Send an HTTP request + url = "http://127.0.0.1:8866/predict/fasttext_crawl_target_word-word_dim300_en" + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## V. Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Model optimization + - ```shell + $ hub install fasttext_crawl_target_word-word_dim300_en==1.0.1 + ``` \ No newline at end of file diff --git a/modules/text/language_model/albert-base-v1/README.md b/modules/text/language_model/albert-base-v1/README.md new file mode 100644 index 0000000000000000000000000000000000000000..abef64ad567a5f1446e1a7286298d18d8049045b --- /dev/null +++ b/modules/text/language_model/albert-base-v1/README.md @@ -0,0 +1,173 @@ +# albert-base-v1 +|模型名称|albert-base-v1| +| :--- | :---: | +|类别|文本-语义模型| +|网络|albert-base-v1| +|数据集|-| +|是否支持Fine-tuning|是| +|模型大小|90MB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - ALBERT针对当前预训练模型参数量过大的问题,提出了以下改进方案: + + - 嵌入向量参数化的因式分解。ALBERT对词嵌入参数进行了因式分解,先将单词映射到一个低维的词嵌入空间E,然后再将其映射到高维的隐藏空间H。 + + - 跨层参数共享。ALBERT共享了层之间的全部参数。 + +更多详情请参考[ALBERT论文](https://arxiv.org/abs/1909.11942) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install albert-base-v1 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + +```python +import paddlehub as hub + +data = [ + ['这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般'], + ['怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片'], + ['作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。'], +] +label_map = {0: 'negative', 1: 'positive'} + +model = hub.Module( + name='albert-base-v1', + version='1.0.0', + task='seq-cls', + load_checkpoint='/path/to/parameters', + label_map=label_map) +results = model.predict(data, max_seq_len=50, batch_size=1, use_gpu=False) +for idx, text in enumerate(data): + print('Data: {} \t Label: {}'.format(text, results[idx])) +``` + +详情可参考PaddleHub示例: +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) + +- ### 2、API + + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` + + - 创建Module对象(动态图组网版本) + + - **参数** + + - `task`: 任务名称,可为`seq-cls`(文本分类任务)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` + + - **参数** + + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] + + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` + + - 用于获取输入文本的句子粒度特征与字粒度特征 + + - **参数** + + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线获取预训练词向量。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m albert-base-v1 + ``` + + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/albert-base-v1" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/text/language_model/albert-base-v1/__init__.py b/modules/text/language_model/albert-base-v1/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/text/language_model/albert-base-v1/module.py b/modules/text/language_model/albert-base-v1/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b04b2a023566676420a6346d289440360a454766 --- /dev/null +++ b/modules/text/language_model/albert-base-v1/module.py @@ -0,0 +1,177 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os +from typing import Dict + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers.albert.modeling import AlbertForSequenceClassification +from paddlenlp.transformers.albert.modeling import AlbertForTokenClassification +from paddlenlp.transformers.albert.modeling import AlbertModel +from paddlenlp.transformers.albert.tokenizer import AlbertTokenizer + +from paddlehub.module.module import moduleinfo +from paddlehub.module.nlp_module import TransformerModule +from paddlehub.utils.log import logger + + +@moduleinfo(name="albert-base-v1", + version="1.0.0", + summary="", + author="Baidu", + author_email="", + type="nlp/semantic_model", + meta=TransformerModule) +class Albert(nn.Layer): + """ + ALBERT model + """ + + def __init__( + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, + ): + super(Albert, self).__init__() + if label_map: + self.label_map = label_map + self.num_classes = len(label_map) + else: + self.num_classes = num_classes + + if task == 'sequence_classification': + task = 'seq-cls' + logger.warning( + "current task name 'sequence_classification' was renamed to 'seq-cls', " + "'sequence_classification' has been deprecated and will be removed in the future.", ) + if task == 'seq-cls': + self.model = AlbertForSequenceClassification.from_pretrained(pretrained_model_name_or_path='albert-base-v1', + num_classes=self.num_classes, + **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task == 'token-cls': + self.model = AlbertForTokenClassification.from_pretrained(pretrained_model_name_or_path='albert-base-v1', + num_classes=self.num_classes, + **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) + elif task == 'text-matching': + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-base-v1', **kwargs) + self.dropout = paddle.nn.Dropout(0.1) + self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task is None: + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-base-v1', **kwargs) + else: + raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) + + self.task = task + + if load_checkpoint is not None and os.path.isfile(load_checkpoint): + state_dict = paddle.load(load_checkpoint) + self.set_state_dict(state_dict) + logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) + + def forward(self, + input_ids=None, + token_type_ids=None, + position_ids=None, + attention_mask=None, + query_input_ids=None, + query_token_type_ids=None, + query_position_ids=None, + query_attention_mask=None, + title_input_ids=None, + title_token_type_ids=None, + title_position_ids=None, + title_attention_mask=None, + seq_lengths=None, + labels=None): + + if self.task != 'text-matching': + result = self.model(input_ids, token_type_ids, position_ids, attention_mask) + else: + query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) + title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) + + if self.task == 'seq-cls': + logits = result + probs = F.softmax(logits, axis=1) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + elif self.task == 'token-cls': + logits = result + token_level_probs = F.softmax(logits, axis=-1) + preds = token_level_probs.argmax(axis=-1) + if labels is not None: + loss = self.criterion(logits, labels.unsqueeze(-1)) + num_infer_chunks, num_label_chunks, num_correct_chunks = \ + self.metric.compute(None, seq_lengths, preds, labels) + self.metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) + _, _, f1_score = map(float, self.metric.accumulate()) + return token_level_probs, loss, {'f1_score': f1_score} + return token_level_probs + elif self.task == 'text-matching': + query_token_embedding, _ = query_result + query_token_embedding = self.dropout(query_token_embedding) + query_attention_mask = paddle.unsqueeze( + (query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + query_token_embedding = query_token_embedding * query_attention_mask + query_sum_embedding = paddle.sum(query_token_embedding, axis=1) + query_sum_mask = paddle.sum(query_attention_mask, axis=1) + query_mean = query_sum_embedding / query_sum_mask + + title_token_embedding, _ = title_result + title_token_embedding = self.dropout(title_token_embedding) + title_attention_mask = paddle.unsqueeze( + (title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + title_token_embedding = title_token_embedding * title_attention_mask + title_sum_embedding = paddle.sum(title_token_embedding, axis=1) + title_sum_mask = paddle.sum(title_attention_mask, axis=1) + title_mean = title_sum_embedding / title_sum_mask + + sub = paddle.abs(paddle.subtract(query_mean, title_mean)) + projection = paddle.concat([query_mean, title_mean, sub], axis=-1) + logits = self.classifier(projection) + probs = F.softmax(logits) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + else: + sequence_output, pooled_output = result + return sequence_output, pooled_output + + @staticmethod + def get_tokenizer(*args, **kwargs): + """ + Gets the tokenizer that is customized for this module. + """ + return AlbertTokenizer.from_pretrained(pretrained_model_name_or_path='albert-base-v1', *args, **kwargs) diff --git a/modules/video/Video_editing/SkyAR/README.md b/modules/video/Video_editing/SkyAR/README.md index 7e6cb468f2220dcdc12d58cdf8be2986372d5f66..0b43e10fffa98d72a7b4c82406712250520da02a 100644 --- a/modules/video/Video_editing/SkyAR/README.md +++ b/modules/video/Video_editing/SkyAR/README.md @@ -2,9 +2,9 @@ |模型名称|SkyAR| | :--- | :---: | -|类别|图像-图像分割| +|类别|视频-视频编辑| |网络|UNet| -|数据集|UNet| +|数据集|-| |是否支持Fine-tuning|否| |模型大小|206MB| |指标|-| @@ -71,7 +71,7 @@ ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 ```python import paddlehub as hub @@ -79,8 +79,8 @@ model = hub.Module(name='SkyAR') model.MagicSky( - video_path=[path to input video path], - save_path=[path to save video path] + video_path="/PATH/TO/VIDEO", + save_path="/PATH/TO/SAVE/RESULT" ) ``` - ### 2、API diff --git a/modules/video/Video_editing/SkyAR/README_en.md b/modules/video/Video_editing/SkyAR/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..1b122baa1fcf903c8c47aa0303c35ef0f01bdfe8 --- /dev/null +++ b/modules/video/Video_editing/SkyAR/README_en.md @@ -0,0 +1,124 @@ +# SkyAR + +|Module Name|SkyAR| +| :--- | :---: | +|Category|Video editing| +|Network|UNet| +|Dataset|-| +|Fine-tuning supported or not|No| +|Module Size|206MB| +|Data indicators|-| +|Latest update date|2021-02-26| + +## I. Basic Information + +- ### Application Effect Display + + - Sample results: + * Input video: + + ![Input video](https://img-blog.csdnimg.cn/20210126142046572.gif) + + * Jupiter: + + ![Jupiter](https://img-blog.csdnimg.cn/20210125211435619.gif) + * Rainy day: + + ![Rainy day](https://img-blog.csdnimg.cn/2021012521152492.gif) + * Galaxy: + + ![Galaxy](https://img-blog.csdnimg.cn/20210125211523491.gif) + * Ninth area spacecraft: + + ![Ninth area spacecraft](https://img-blog.csdnimg.cn/20210125211520955.gif) + + * Input video: + + ![Input video](https://img-blog.csdnimg.cn/20210126142038716.gif) + * Floating castle: + + ![Floating castle](https://img-blog.csdnimg.cn/20210125211514997.gif) + * Thunder and lightning: + + ![Thunder and lightning](https://img-blog.csdnimg.cn/20210125211433591.gif) + + * Super moon: + + ![Super moon](https://img-blog.csdnimg.cn/20210125211417524.gif) + +- ### Module Introduction + + - SkyAR is based on [Castle in the Sky: Dynamic Sky Replacement and Harmonization in Videos](https://arxiv.org/abs/2010.11800). It mainly consists of three parts: sky matting network, motion estimation and image fusion. + + - For more information, please refer to:[SkyAR](https://github.com/jiupinjia/SkyAR) + + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $hub install SkyAR + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + ```python + import paddlehub as hub + + model = hub.Module(name='SkyAR') + + model.MagicSky( + video_path=[path to input video path], + save_path=[path to save video path] + ) + ``` +- ### 2、API + + ```python + def MagicSky( + video_path, save_path, config='jupiter', + is_rainy=False, preview_frames_num=0, is_video_sky=False, is_show=False, + skybox_img=None, skybox_video=None, rain_cap_path=None, + halo_effect=True, auto_light_matching=False, + relighting_factor=0.8, recoloring_factor=0.5, skybox_center_crop=0.5 + ) + ``` + + - **Parameter** + + * video_path(str):input video path. + * save_path(str):save videp path. + * config(str): SkyBox configuration, all preset configurations are as follows: `['cloudy', 'district9ship', 'floatingcastle', 'galaxy', 'jupiter', + 'rainy', 'sunny', 'sunset', 'supermoon', 'thunderstorm' + ]`, if you use a custom SkyBox, please set it to None. + + * skybox_img(str):custom SkyBox image path + * skybox_video(str):custom SkyBox video path + * is_video_sky(bool):customize whether SkyBox is a video + * rain_cap_path(str):custom video path with rain + * is_rainy(bool): whether the sky is raining + * halo_effect(bool):whether to open halo effect + * auto_light_matching(bool):whether to enable automatic brightness matching + * relighting_factor(float): relighting factor + * recoloring_factor(float): recoloring factor + * skybox_center_crop(float):skyBox center crop factor + * preview_frames_num(int):set the number of preview frames + * is_show(bool):whether to preview graphically + + +## IV. Release Note + +- 1.0.0 + + First release diff --git a/paddlehub/server/server.py b/paddlehub/server/server.py index 64d8f61ca7177a810336f4b14cac67b662e15611..840549c24c62e3438b1fc37ffa4fe1c198f7a4dc 100644 --- a/paddlehub/server/server.py +++ b/paddlehub/server/server.py @@ -159,7 +159,7 @@ class CacheUpdater(threading.Thread): if version: payload['version'] = version api_url = uri_path(hubconf.server, 'search') - cache_path = os.path.join("~") + cache_path = os.path.join("~") hub_name = cache_config.hub_name if os.path.exists(cache_path): extra = {"command": command, "mtime": os.stat(cache_path).st_mtime, "hub_name": hub_name}