提交 037f2ad5 编写于 作者: H haoyuying

revise openpose conflict

import paddle
import paddlehub as hub
if __name__ == '__main__':
place = paddle.CUDAPlace(0)
paddle.disable_static()
model = hub.Module(name='yolov3_darknet53_pascalvoc', is_train=False)
model.eval()
model.predict(imgpath="4026.jpeg", filelist="/PATH/TO/JSON/FILE")
import paddle
import paddlehub as hub
import paddle.nn as nn
from paddlehub.finetune.trainer import Trainer
from paddlehub.datasets.pascalvoc import DetectionData
import paddlehub.process.detect_transforms as T
if __name__ == "__main__":
paddle.disable_static()
transform = T.Compose([
T.RandomDistort(),
T.RandomExpand(fill=[0.485, 0.456, 0.406]),
T.RandomCrop(),
T.Resize(target_size=416),
T.RandomFlip(),
T.ShuffleBox(),
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
train_reader = DetectionData(transform)
model = hub.Module(name='yolov3_darknet53_pascalvoc')
optimizer = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters())
trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_det')
trainer.train(train_reader, epochs=5, batch_size=4, eval_dataset=train_reader, log_interval=1, save_interval=1)
```shell
$ hub install mobilenet_v2_imagenet_ssld==1.0.0
```
<p align="center">
<img src="http://bj.bcebos.com/ibox-thumbnail98/e7b22762cf42ab0e1e1fab6b8720938b?authorization=bce-auth-v1%2Ffbe74140929444858491fbf2b6bc0935%2F2020-04-08T11%3A49%3A16Z%2F1800%2F%2Faf385f56da3c8ee1298588939d93533a72203c079ae1187affa2da555b9898ea" hspace='5' width=800/> <br /> MobileNet 系列的网络结构
</p>
模型的详情可参考[论文](https://arxiv.org/pdf/1801.04381.pdf)
## 命令行预测
```
hub run mobilenet_v2_imagenet_ssld --input_path "/PATH/TO/IMAGE"
```
## API
```python
def get_expected_image_width()
```
返回预处理的图片宽度,也就是224。
```python
def get_expected_image_height()
```
返回预处理的图片高度,也就是224。
```python
def get_pretrained_images_mean()
```
返回预处理的图片均值,也就是 \[0.485, 0.456, 0.406\]
```python
def get_pretrained_images_std()
```
返回预处理的图片标准差,也就是 \[0.229, 0.224, 0.225\]
```python
def context(trainable=True, pretrained=True)
```
**参数**
* trainable (bool): 计算图的参数是否为可训练的;
* pretrained (bool): 是否加载默认的预训练模型。
**返回**
* inputs (dict): 计算图的输入,key 为 'image', value 为图片的张量;
* outputs (dict): 计算图的输出,key 为 'classification' 和 'feature_map',其相应的值为:
* classification (paddle.fluid.framework.Variable): 分类结果,也就是全连接层的输出;
* feature\_map (paddle.fluid.framework.Variable): 特征匹配,全连接层前面的那个张量。
* context\_prog(fluid.Program): 计算图,用于迁移学习。
```python
def classification(images=None,
paths=None,
batch_size=1,
use_gpu=False,
top_k=1):
```
**参数**
* images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
* paths (list\[str\]): 图片的路径;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU 来预测;
* top\_k (int): 返回预测结果的前 k 个。
**返回**
res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别动物的类别,value为置信度。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model_filename: 模型文件名称,默认为\_\_model\_\_
* params_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
classifier = hub.Module(name="mobilenet_v2_imagenet_ssld")
result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = classifier.classification(paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个在线动物识别服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m mobilenet_v2_imagenet_ssld
```
这样就完成了一个在线动物识别服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/mobilenet_v2_imagenet_ssld"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 查看代码
[PaddleClas](https://github.com/PaddlePaddle/PaddleClas)
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
# coding=utf-8
import os
import time
from collections import OrderedDict
import cv2
import numpy as np
from PIL import Image
__all__ = ['reader']
DATA_DIM = 224
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
def resize_short(img, target_size):
percent = float(target_size) / min(img.size[0], img.size[1])
resized_width = int(round(img.size[0] * percent))
resized_height = int(round(img.size[1] * percent))
img = img.resize((resized_width, resized_height), Image.LANCZOS)
return img
def crop_image(img, target_size, center):
width, height = img.size
size = target_size
if center == True:
w_start = (width - size) / 2
h_start = (height - size) / 2
else:
w_start = np.random.randint(0, width - size + 1)
h_start = np.random.randint(0, height - size + 1)
w_end = w_start + size
h_end = h_start + size
img = img.crop((w_start, h_start, w_end, h_end))
return img
def process_image(img):
img = resize_short(img, target_size=256)
img = crop_image(img, target_size=DATA_DIM, center=True)
if img.mode != 'RGB':
img = img.convert('RGB')
img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
img -= img_mean
img /= img_std
return img
def reader(images=None, paths=None):
"""
Preprocess to yield image.
Args:
images (list[numpy.ndarray]): images data, shape of each is [H, W, C].
paths (list[str]): paths to images.
Yield:
each (collections.OrderedDict): info of original image, preprocessed image.
"""
component = list()
if paths:
for im_path in paths:
each = OrderedDict()
assert os.path.isfile(
im_path), "The {} isn't a valid file path.".format(im_path)
each['org_im_path'] = im_path
each['org_im'] = Image.open(im_path)
each['org_im_width'], each['org_im_height'] = each['org_im'].size
component.append(each)
if images is not None:
assert type(images), "images is a list."
for im in images:
each = OrderedDict()
each['org_im'] = Image.fromarray(im[:, :, ::-1])
each['org_im_path'] = 'ndarray_time={}'.format(
round(time.time(), 6) * 1e6)
each['org_im_width'], each['org_im_height'] = each['org_im'].size
component.append(each)
for element in component:
element['image'] = process_image(element['org_im'])
yield element
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
__all__ = [
'MobileNetV2_x0_25', 'MobileNetV2_x0_5', 'MobileNetV2_x0_75',
'MobileNetV2_x1_0', 'MobileNetV2_x1_5', 'MobileNetV2_x2_0', 'MobileNetV2'
]
class MobileNetV2():
def __init__(self, scale=1.0):
self.scale = scale
def net(self, input, class_dim=1000):
scale = self.scale
bottleneck_params_list = [
(1, 16, 1, 1),
(6, 24, 2, 2),
(6, 32, 3, 2),
(6, 64, 4, 2),
(6, 96, 3, 1),
(6, 160, 3, 2),
(6, 320, 1, 1),
]
#conv1
input = self.conv_bn_layer(
input,
num_filters=int(32 * scale),
filter_size=3,
stride=2,
padding=1,
if_act=True,
name='conv1_1')
# bottleneck sequences
i = 1
in_c = int(32 * scale)
for layer_setting in bottleneck_params_list:
t, c, n, s = layer_setting
i += 1
input = self.invresi_blocks(
input=input,
in_c=in_c,
t=t,
c=int(c * scale),
n=n,
s=s,
name='conv' + str(i))
in_c = int(c * scale)
#last_conv
input = self.conv_bn_layer(
input=input,
num_filters=int(1280 * scale) if scale > 1.0 else 1280,
filter_size=1,
stride=1,
padding=0,
if_act=True,
name='conv9')
input = fluid.layers.pool2d(
input=input, pool_type='avg', global_pooling=True)
output = fluid.layers.fc(
input=input,
size=class_dim,
param_attr=ParamAttr(name='fc10_weights'),
bias_attr=ParamAttr(name='fc10_offset'))
return output, input
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
if_act=True,
name=None,
use_cudnn=True):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(name=name + '_weights'),
bias_attr=False)
bn_name = name + '_bn'
bn = fluid.layers.batch_norm(
input=conv,
param_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(name=bn_name + "_offset"),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
if if_act:
return fluid.layers.relu6(bn)
else:
return bn
def shortcut(self, input, data_residual):
return fluid.layers.elementwise_add(input, data_residual)
def inverted_residual_unit(self,
input,
num_in_filter,
num_filters,
ifshortcut,
stride,
filter_size,
padding,
expansion_factor,
name=None):
num_expfilter = int(round(num_in_filter * expansion_factor))
channel_expand = self.conv_bn_layer(
input=input,
num_filters=num_expfilter,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
name=name + '_expand')
bottleneck_conv = self.conv_bn_layer(
input=channel_expand,
num_filters=num_expfilter,
filter_size=filter_size,
stride=stride,
padding=padding,
num_groups=num_expfilter,
if_act=True,
name=name + '_dwise',
use_cudnn=False)
linear_out = self.conv_bn_layer(
input=bottleneck_conv,
num_filters=num_filters,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=False,
name=name + '_linear')
if ifshortcut:
out = self.shortcut(input=input, data_residual=linear_out)
return out
else:
return linear_out
def invresi_blocks(self, input, in_c, t, c, n, s, name=None):
first_block = self.inverted_residual_unit(
input=input,
num_in_filter=in_c,
num_filters=c,
ifshortcut=False,
stride=s,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + '_1')
last_residual_block = first_block
last_c = c
for i in range(1, n):
last_residual_block = self.inverted_residual_unit(
input=last_residual_block,
num_in_filter=last_c,
num_filters=c,
ifshortcut=True,
stride=1,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + '_' + str(i + 1))
return last_residual_block
def MobileNetV2_x0_25():
model = MobileNetV2(scale=0.25)
return model
def MobileNetV2_x0_5():
model = MobileNetV2(scale=0.5)
return model
def MobileNetV2_x0_75():
model = MobileNetV2(scale=0.75)
return model
def MobileNetV2_x1_0():
model = MobileNetV2(scale=1.0)
return model
def MobileNetV2_x1_5():
model = MobileNetV2(scale=1.5)
return model
def MobileNetV2_x2_0():
model = MobileNetV2(scale=2.0)
return model
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
import ast
import argparse
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import paddle.fluid as fluid
import paddlehub as hub
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.module.module import moduleinfo, runnable, serving
from paddlehub.common.paddle_helper import add_vars_prefix
from mobilenet_v2_imagenet_ssld.processor import postprocess, base64_to_cv2
from mobilenet_v2_imagenet_ssld.data_feed import reader
from mobilenet_v2_imagenet_ssld.mobilenet_v2 import MobileNetV2
@moduleinfo(
name="mobilenet_v2_imagenet_ssld",
type="CV/image_classification",
author="paddlepaddle",
author_email="paddle-dev@baidu.com",
summary=
"Mobilenet_V2 is a image classfication model, this module is trained with ImageNet-2012 dataset.",
version="1.0.0")
class MobileNetV2ImageNetSSLD(hub.Module):
def _initialize(self):
self.default_pretrained_model_path = os.path.join(
self.directory, "model")
label_file = os.path.join(self.directory, "label_list.txt")
with open(label_file, 'r', encoding='utf-8') as file:
self.label_list = file.read().split("\n")[:-1]
self._set_config()
def get_expected_image_width(self):
return 224
def get_expected_image_height(self):
return 224
def get_pretrained_images_mean(self):
im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3)
return im_mean
def get_pretrained_images_std(self):
im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3)
return im_std
def _set_config(self):
"""
predictor config setting
"""
cpu_config = AnalysisConfig(self.default_pretrained_model_path)
cpu_config.disable_glog_info()
cpu_config.disable_gpu()
self.cpu_predictor = create_paddle_predictor(cpu_config)
try:
_places = os.environ["CUDA_VISIBLE_DEVICES"]
int(_places[0])
use_gpu = True
except:
use_gpu = False
if use_gpu:
gpu_config = AnalysisConfig(self.default_pretrained_model_path)
gpu_config.disable_glog_info()
gpu_config.enable_use_gpu(
memory_pool_init_size_mb=1000, device_id=0)
self.gpu_predictor = create_paddle_predictor(gpu_config)
def context(self, trainable=True, pretrained=True):
"""context for transfer learning.
Args:
trainable (bool): Set parameters in program to be trainable.
pretrained (bool) : Whether to load pretrained model.
Returns:
inputs (dict): key is 'image', corresponding vaule is image tensor.
outputs (dict): key is :
'classification', corresponding value is the result of classification.
'feature_map', corresponding value is the result of the layer before the fully connected layer.
context_prog (fluid.Program): program for transfer learning.
"""
context_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(context_prog, startup_prog):
with fluid.unique_name.guard():
image = fluid.layers.data(
name="image", shape=[3, 224, 224], dtype="float32")
mobile_net = MobileNetV2()
output, feature_map = mobile_net.net(
input=image, class_dim=len(self.label_list))
name_prefix = '@HUB_{}@'.format(self.name)
inputs = {'image': name_prefix + image.name}
outputs = {
'classification': name_prefix + output.name,
'feature_map': name_prefix + feature_map.name
}
add_vars_prefix(context_prog, name_prefix)
add_vars_prefix(startup_prog, name_prefix)
global_vars = context_prog.global_block().vars
inputs = {
key: global_vars[value]
for key, value in inputs.items()
}
outputs = {
key: global_vars[value]
for key, value in outputs.items()
}
place = fluid.CPUPlace()
exe = fluid.Executor(place)
# pretrained
if pretrained:
def _if_exist(var):
b = os.path.exists(
os.path.join(self.default_pretrained_model_path,
var.name))
return b
fluid.io.load_vars(
exe,
self.default_pretrained_model_path,
context_prog,
predicate=_if_exist)
else:
exe.run(startup_prog)
# trainable
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
return inputs, outputs, context_prog
def save_inference_model(self,
dirname,
model_filename=None,
params_filename=None,
combined=True):
if combined:
model_filename = "__model__" if not model_filename else model_filename
params_filename = "__params__" if not params_filename else params_filename
place = fluid.CPUPlace()
exe = fluid.Executor(place)
program, feeded_var_names, target_vars = fluid.io.load_inference_model(
dirname=self.default_pretrained_model_path, executor=exe)
fluid.io.save_inference_model(
dirname=dirname,
main_program=program,
executor=exe,
feeded_var_names=feeded_var_names,
target_vars=target_vars,
model_filename=model_filename,
params_filename=params_filename)
def classification(self,
images=None,
paths=None,
batch_size=1,
use_gpu=False,
top_k=1):
"""
API for image classification.
Args:
images (numpy.ndarray): data of images, shape of each is [H, W, C], color space must be BGR.
paths (list[str]): The paths of images.
batch_size (int): batch size.
use_gpu (bool): Whether to use gpu.
top_k (int): Return top k results.
Returns:
res (list[dict]): The classfication results.
"""
if use_gpu:
try:
_places = os.environ["CUDA_VISIBLE_DEVICES"]
int(_places[0])
except:
raise RuntimeError(
"Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id."
)
all_data = list()
for yield_data in reader(images, paths):
all_data.append(yield_data)
total_num = len(all_data)
loop_num = int(np.ceil(total_num / batch_size))
res = list()
for iter_id in range(loop_num):
batch_data = list()
handle_id = iter_id * batch_size
for image_id in range(batch_size):
try:
batch_data.append(all_data[handle_id + image_id])
except:
pass
# feed batch image
batch_image = np.array([data['image'] for data in batch_data])
batch_image = PaddleTensor(batch_image.copy())
predictor_output = self.gpu_predictor.run([
batch_image
]) if use_gpu else self.cpu_predictor.run([batch_image])
out = postprocess(
data_out=predictor_output[0].as_ndarray(),
label_list=self.label_list,
top_k=top_k)
res += out
return res
@serving
def serving_method(self, images, **kwargs):
"""
Run as a service.
"""
images_decode = [base64_to_cv2(image) for image in images]
results = self.classification(images=images_decode, **kwargs)
return results
@runnable
def run_cmd(self, argvs):
"""
Run as a command.
"""
self.parser = argparse.ArgumentParser(
description="Run the {} module.".format(self.name),
prog='hub run {}'.format(self.name),
usage='%(prog)s',
add_help=True)
self.arg_input_group = self.parser.add_argument_group(
title="Input options", description="Input data. Required")
self.arg_config_group = self.parser.add_argument_group(
title="Config options",
description=
"Run configuration for controlling module behavior, not required.")
self.add_module_config_arg()
self.add_module_input_arg()
args = self.parser.parse_args(argvs)
results = self.classification(
paths=[args.input_path],
batch_size=args.batch_size,
use_gpu=args.use_gpu)
return results
def add_module_config_arg(self):
"""
Add the command config options.
"""
self.arg_config_group.add_argument(
'--use_gpu',
type=ast.literal_eval,
default=False,
help="whether use GPU or not.")
self.arg_config_group.add_argument(
'--batch_size',
type=ast.literal_eval,
default=1,
help="batch size.")
self.arg_config_group.add_argument(
'--top_k',
type=ast.literal_eval,
default=1,
help="Return top k results.")
def add_module_input_arg(self):
"""
Add the command input options.
"""
self.arg_input_group.add_argument(
'--input_path', type=str, help="path to image.")
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2d, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d
from paddlehub.module.module import moduleinfo
from paddlehub.module.cv_module import ImageClassifierModule
class ConvBNLayer(nn.Layer):
"""Basic conv bn layer."""
def __init__(self,
num_channels: int,
filter_size: int,
num_filters: int,
stride: int,
padding: int,
num_groups: int = 1,
name: str = None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2d(in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
self._batch_norm = BatchNorm(num_filters,
param_attr=ParamAttr(name=name + "_bn_scale"),
bias_attr=ParamAttr(name=name + "_bn_offset"),
moving_mean_name=name + "_bn_mean",
moving_variance_name=name + "_bn_variance")
def forward(self, inputs: paddle.Tensor, if_act: bool = True):
y = self._conv(inputs)
y = self._batch_norm(y)
if if_act:
y = F.relu6(y)
return y
class InvertedResidualUnit(nn.Layer):
"""Inverted Residual unit."""
def __init__(self, num_channels: int, num_in_filter: int, num_filters: int, stride: int, filter_size: int,
padding: int, expansion_factor: int, name: str):
super(InvertedResidualUnit, self).__init__()
num_expfilter = int(round(num_in_filter * expansion_factor))
self._expand_conv = ConvBNLayer(num_channels=num_channels,
num_filters=num_expfilter,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
name=name + "_expand")
self._bottleneck_conv = ConvBNLayer(num_channels=num_expfilter,
num_filters=num_expfilter,
filter_size=filter_size,
stride=stride,
padding=padding,
num_groups=num_expfilter,
name=name + "_dwise")
self._linear_conv = ConvBNLayer(num_channels=num_expfilter,
num_filters=num_filters,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
name=name + "_linear")
def forward(self, inputs: paddle.Tensor, ifshortcut: bool):
y = self._expand_conv(inputs, if_act=True)
y = self._bottleneck_conv(y, if_act=True)
y = self._linear_conv(y, if_act=False)
if ifshortcut:
y = paddle.elementwise_add(inputs, y)
return y
class InversiBlocks(nn.Layer):
"""Inverted residual block composed by inverted residual unit."""
def __init__(self, in_c: int, t: int, c: int, n: int, s: int, name: str):
super(InversiBlocks, self).__init__()
self._first_block = InvertedResidualUnit(num_channels=in_c,
num_in_filter=in_c,
num_filters=c,
stride=s,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + "_1")
self._block_list = []
for i in range(1, n):
block = self.add_sublayer(name + "_" + str(i + 1),
sublayer=InvertedResidualUnit(num_channels=c,
num_in_filter=c,
num_filters=c,
stride=1,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + "_" + str(i + 1)))
self._block_list.append(block)
def forward(self, inputs: paddle.Tensor):
y = self._first_block(inputs, ifshortcut=False)
for block in self._block_list:
y = block(y, ifshortcut=True)
return y
@moduleinfo(name="mobilenet_v2_imagenet_ssld",
type="cv/classification",
author="paddlepaddle",
author_email="",
summary="mobilenet_v2_imagenet_ssld is a classification model, "
"this module is trained with Imagenet dataset.",
version="1.1.0",
meta=ImageClassifierModule)
class MobileNet(nn.Layer):
"""MobileNetV2"""
def __init__(self, class_dim: int = 1000, load_checkpoint: str = None):
super(MobileNet, self).__init__()
self.class_dim = class_dim
bottleneck_params_list = [(1, 16, 1, 1), (6, 24, 2, 2), (6, 32, 3, 2), (6, 64, 4, 2), (6, 96, 3, 1),
(6, 160, 3, 2), (6, 320, 1, 1)]
self.conv1 = ConvBNLayer(num_channels=3,
num_filters=int(32),
filter_size=3,
stride=2,
padding=1,
name="conv1_1")
self.block_list = []
i = 1
in_c = int(32)
for layer_setting in bottleneck_params_list:
t, c, n, s = layer_setting
i += 1
block = self.add_sublayer("conv" + str(i),
sublayer=InversiBlocks(in_c=in_c, t=t, c=int(c), n=n, s=s, name="conv" + str(i)))
self.block_list.append(block)
in_c = int(c)
self.out_c = 1280
self.conv9 = ConvBNLayer(num_channels=in_c,
num_filters=self.out_c,
filter_size=1,
stride=1,
padding=0,
name="conv9")
self.pool2d_avg = AdaptiveAvgPool2d(1)
self.out = Linear(self.out_c,
class_dim,
weight_attr=ParamAttr(name="fc10_weights"),
bias_attr=ParamAttr(name="fc10_offset"))
if load_checkpoint is not None:
model_dict = paddle.load(load_checkpoint)[0]
self.set_dict(model_dict)
print("load custom checkpoint success")
else:
checkpoint = os.path.join(self.directory, 'mobilenet_v2_ssld.pdparams.pdparams')
if not os.path.exists(checkpoint):
os.system(
'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/mobilenet_v2_ssld.pdparams -O ' +
checkpoint)
model_dict = paddle.load(checkpoint)[0]
self.set_dict(model_dict)
print("load pretrained checkpoint success")
def forward(self, inputs: paddle.Tensor):
y = self.conv1(inputs, if_act=True)
for block in self.block_list:
y = block(y)
y = self.conv9(y, if_act=True)
y = self.pool2d_avg(y)
y = paddle.reshape(y, shape=[-1, self.out_c])
y = self.out(y)
return y
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import base64
import cv2
import os
import numpy as np
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def softmax(x):
orig_shape = x.shape
if len(x.shape) > 1:
tmp = np.max(x, axis=1)
x -= tmp.reshape((x.shape[0], 1))
x = np.exp(x)
tmp = np.sum(x, axis=1)
x /= tmp.reshape((x.shape[0], 1))
else:
tmp = np.max(x)
x -= tmp
x = np.exp(x)
tmp = np.sum(x)
x /= tmp
return x
def postprocess(data_out, label_list, top_k):
"""
Postprocess output of network, one image at a time.
Args:
data_out (numpy.ndarray): output data of network.
label_list (list): list of label.
top_k (int): Return top k results.
"""
output = []
for result in data_out:
result_i = softmax(result)
output_i = {}
indexs = np.argsort(result_i)[::-1][0:top_k]
for index in indexs:
label = label_list[index].split(',')[0]
output_i[label] = float(result_i[index])
output.append(output_i)
return output
```shell
$ hub install mobilenet_v3_large_imagenet_ssld==1.0.0
```
## 命令行预测
```
hub run mobilenet_v3_large_imagenet_ssld --input_path "/PATH/TO/IMAGE"
```
## API
```python
def get_expected_image_width()
```
返回预处理的图片宽度,也就是224。
```python
def get_expected_image_height()
```
返回预处理的图片高度,也就是224。
```python
def get_pretrained_images_mean()
```
返回预处理的图片均值,也就是 \[0.485, 0.456, 0.406\]
```python
def get_pretrained_images_std()
```
返回预处理的图片标准差,也就是 \[0.229, 0.224, 0.225\]
```python
def context(trainable=True, pretrained=True)
```
**参数**
* trainable (bool): 计算图的参数是否为可训练的;
* pretrained (bool): 是否加载默认的预训练模型。
**返回**
* inputs (dict): 计算图的输入,key 为 'image', value 为图片的张量;
* outputs (dict): 计算图的输出,key 为 'classification' 和 'feature_map',其相应的值为:
* classification (paddle.fluid.framework.Variable): 分类结果,也就是全连接层的输出;
* feature\_map (paddle.fluid.framework.Variable): 特征匹配,全连接层前面的那个张量。
* context\_prog(fluid.Program): 计算图,用于迁移学习。
```python
def classification(images=None,
paths=None,
batch_size=1,
use_gpu=False,
top_k=1):
```
**参数**
* images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
* paths (list\[str\]): 图片的路径;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU 来预测;
* top\_k (int): 返回预测结果的前 k 个。
**返回**
res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别动物的类别,value为置信度。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model_filename: 模型文件名称,默认为\_\_model\_\_
* params_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
classifier = hub.Module(name="mobilenet_v3_large_imagenet_ssld")
result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = classifier.classification(paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个在线动物识别服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m mobilenet_v3_large_imagenet_ssld
```
这样就完成了一个在线动物识别服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/mobilenet_v3_large_imagenet_ssld"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 查看代码
[PaddleClas](https://github.com/PaddlePaddle/PaddleClas)
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
# coding=utf-8
import os
import time
from collections import OrderedDict
import cv2
import numpy as np
from PIL import Image
__all__ = ['reader']
DATA_DIM = 224
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
def resize_short(img, target_size):
percent = float(target_size) / min(img.size[0], img.size[1])
resized_width = int(round(img.size[0] * percent))
resized_height = int(round(img.size[1] * percent))
img = img.resize((resized_width, resized_height), Image.LANCZOS)
return img
def crop_image(img, target_size, center):
width, height = img.size
size = target_size
if center == True:
w_start = (width - size) / 2
h_start = (height - size) / 2
else:
w_start = np.random.randint(0, width - size + 1)
h_start = np.random.randint(0, height - size + 1)
w_end = w_start + size
h_end = h_start + size
img = img.crop((w_start, h_start, w_end, h_end))
return img
def process_image(img):
img = resize_short(img, target_size=256)
img = crop_image(img, target_size=DATA_DIM, center=True)
if img.mode != 'RGB':
img = img.convert('RGB')
img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
img -= img_mean
img /= img_std
return img
def reader(images=None, paths=None):
"""
Preprocess to yield image.
Args:
images (list[numpy.ndarray]): images data, shape of each is [H, W, C].
paths (list[str]): paths to images.
Yield:
each (collections.OrderedDict): info of original image, preprocessed image.
"""
component = list()
if paths:
for im_path in paths:
each = OrderedDict()
assert os.path.isfile(
im_path), "The {} isn't a valid file path.".format(im_path)
each['org_im_path'] = im_path
each['org_im'] = Image.open(im_path)
each['org_im_width'], each['org_im_height'] = each['org_im'].size
component.append(each)
if images is not None:
assert type(images), "images is a list."
for im in images:
each = OrderedDict()
each['org_im'] = Image.fromarray(im[:, :, ::-1])
each['org_im_path'] = 'ndarray_time={}'.format(
round(time.time(), 6) * 1e6)
each['org_im_width'], each['org_im_height'] = each['org_im'].size
component.append(each)
for element in component:
element['image'] = process_image(element['org_im'])
yield element
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
__all__ = [
'MobileNetV3', 'MobileNetV3_small_x0_35', 'MobileNetV3_small_x0_5',
'MobileNetV3_small_x0_75', 'MobileNetV3_small_x1_0',
'MobileNetV3_small_x1_25', 'MobileNetV3_large_x0_35',
'MobileNetV3_large_x0_5', 'MobileNetV3_large_x0_75',
'MobileNetV3_large_x1_0', 'MobileNetV3_large_x1_25'
]
class MobileNetV3():
def __init__(self, scale=1.0, model_name='small'):
self.scale = scale
self.inplanes = 16
if model_name == "large":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, 'relu', 1],
[3, 64, 24, False, 'relu', 2],
[3, 72, 24, False, 'relu', 1],
[5, 72, 40, True, 'relu', 2],
[5, 120, 40, True, 'relu', 1],
[5, 120, 40, True, 'relu', 1],
[3, 240, 80, False, 'hard_swish', 2],
[3, 200, 80, False, 'hard_swish', 1],
[3, 184, 80, False, 'hard_swish', 1],
[3, 184, 80, False, 'hard_swish', 1],
[3, 480, 112, True, 'hard_swish', 1],
[3, 672, 112, True, 'hard_swish', 1],
[5, 672, 160, True, 'hard_swish', 2],
[5, 960, 160, True, 'hard_swish', 1],
[5, 960, 160, True, 'hard_swish', 1],
]
self.cls_ch_squeeze = 960
self.cls_ch_expand = 1280
elif model_name == "small":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, 'relu', 2],
[3, 72, 24, False, 'relu', 2],
[3, 88, 24, False, 'relu', 1],
[5, 96, 40, True, 'hard_swish', 2],
[5, 240, 40, True, 'hard_swish', 1],
[5, 240, 40, True, 'hard_swish', 1],
[5, 120, 48, True, 'hard_swish', 1],
[5, 144, 48, True, 'hard_swish', 1],
[5, 288, 96, True, 'hard_swish', 2],
[5, 576, 96, True, 'hard_swish', 1],
[5, 576, 96, True, 'hard_swish', 1],
]
self.cls_ch_squeeze = 576
self.cls_ch_expand = 1280
else:
raise NotImplementedError("mode[" + model_name +
"_model] is not implemented!")
def net(self, input, class_dim=1000):
scale = self.scale
inplanes = self.inplanes
cfg = self.cfg
cls_ch_squeeze = self.cls_ch_squeeze
cls_ch_expand = self.cls_ch_expand
#conv1
conv = self.conv_bn_layer(
input,
filter_size=3,
num_filters=self.make_divisible(inplanes * scale),
stride=2,
padding=1,
num_groups=1,
if_act=True,
act='hard_swish',
name='conv1')
i = 0
inplanes = self.make_divisible(inplanes * scale)
for layer_cfg in cfg:
conv = self.residual_unit(
input=conv,
num_in_filter=inplanes,
num_mid_filter=self.make_divisible(scale * layer_cfg[1]),
num_out_filter=self.make_divisible(scale * layer_cfg[2]),
act=layer_cfg[4],
stride=layer_cfg[5],
filter_size=layer_cfg[0],
use_se=layer_cfg[3],
name='conv' + str(i + 2))
inplanes = self.make_divisible(scale * layer_cfg[2])
i += 1
conv = self.conv_bn_layer(
input=conv,
filter_size=1,
num_filters=self.make_divisible(scale * cls_ch_squeeze),
stride=1,
padding=0,
num_groups=1,
if_act=True,
act='hard_swish',
name='conv_last')
conv = fluid.layers.pool2d(
input=conv, pool_type='avg', global_pooling=True, use_cudnn=False)
conv = fluid.layers.conv2d(
input=conv,
num_filters=cls_ch_expand,
filter_size=1,
stride=1,
padding=0,
act=None,
param_attr=ParamAttr(name='last_1x1_conv_weights'),
bias_attr=False)
conv = fluid.layers.hard_swish(conv)
drop = fluid.layers.dropout(x=conv, dropout_prob=0.2)
out = fluid.layers.fc(
input=drop,
size=class_dim,
param_attr=ParamAttr(name='fc_weights'),
bias_attr=ParamAttr(name='fc_offset'))
return out, drop
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride,
padding,
num_groups=1,
if_act=True,
act=None,
name=None,
use_cudnn=True,
res_last_bn_init=False):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(name=name + '_weights'),
bias_attr=False)
bn_name = name + '_bn'
bn = fluid.layers.batch_norm(
input=conv,
param_attr=ParamAttr(
name=bn_name + "_scale",
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0)),
bias_attr=ParamAttr(
name=bn_name + "_offset",
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0)),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
if if_act:
if act == 'relu':
bn = fluid.layers.relu(bn)
elif act == 'hard_swish':
bn = fluid.layers.hard_swish(bn)
return bn
def make_divisible(self, v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
def se_block(self, input, num_out_filter, ratio=4, name=None):
num_mid_filter = num_out_filter // ratio
pool = fluid.layers.pool2d(
input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
conv1 = fluid.layers.conv2d(
input=pool,
filter_size=1,
num_filters=num_mid_filter,
act='relu',
param_attr=ParamAttr(name=name + '_1_weights'),
bias_attr=ParamAttr(name=name + '_1_offset'))
conv2 = fluid.layers.conv2d(
input=conv1,
filter_size=1,
num_filters=num_out_filter,
act='hard_sigmoid',
param_attr=ParamAttr(name=name + '_2_weights'),
bias_attr=ParamAttr(name=name + '_2_offset'))
scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
return scale
def residual_unit(self,
input,
num_in_filter,
num_mid_filter,
num_out_filter,
stride,
filter_size,
act=None,
use_se=False,
name=None):
conv0 = self.conv_bn_layer(
input=input,
filter_size=1,
num_filters=num_mid_filter,
stride=1,
padding=0,
if_act=True,
act=act,
name=name + '_expand')
conv1 = self.conv_bn_layer(
input=conv0,
filter_size=filter_size,
num_filters=num_mid_filter,
stride=stride,
padding=int((filter_size - 1) // 2),
if_act=True,
act=act,
num_groups=num_mid_filter,
use_cudnn=False,
name=name + '_depthwise')
if use_se:
conv1 = self.se_block(
input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
conv2 = self.conv_bn_layer(
input=conv1,
filter_size=1,
num_filters=num_out_filter,
stride=1,
padding=0,
if_act=False,
name=name + '_linear',
res_last_bn_init=True)
if num_in_filter != num_out_filter or stride != 1:
return conv2
else:
return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
def MobileNetV3_small_x0_35():
model = MobileNetV3(model_name='small', scale=0.35)
return model
def MobileNetV3_small_x0_5():
model = MobileNetV3(model_name='small', scale=0.5)
return model
def MobileNetV3_small_x0_75():
model = MobileNetV3(model_name='small', scale=0.75)
return model
def MobileNetV3_small_x1_0():
model = MobileNetV3(model_name='small', scale=1.0)
return model
def MobileNetV3_small_x1_25():
model = MobileNetV3(model_name='small', scale=1.25)
return model
def MobileNetV3_large_x0_35():
model = MobileNetV3(model_name='large', scale=0.35)
return model
def MobileNetV3_large_x0_5():
model = MobileNetV3(model_name='large', scale=0.5)
return model
def MobileNetV3_large_x0_75():
model = MobileNetV3(model_name='large', scale=0.75)
return model
def MobileNetV3_large_x1_0():
model = MobileNetV3(model_name='large', scale=1.0)
return model
def MobileNetV3_large_x1_25():
model = MobileNetV3(model_name='large', scale=1.25)
return model
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import base64
import cv2
import os
import numpy as np
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def softmax(x):
orig_shape = x.shape
if len(x.shape) > 1:
tmp = np.max(x, axis=1)
x -= tmp.reshape((x.shape[0], 1))
x = np.exp(x)
tmp = np.sum(x, axis=1)
x /= tmp.reshape((x.shape[0], 1))
else:
tmp = np.max(x)
x -= tmp
x = np.exp(x)
tmp = np.sum(x)
x /= tmp
return x
def postprocess(data_out, label_list, top_k):
"""
Postprocess output of network, one image at a time.
Args:
data_out (numpy.ndarray): output data of network.
label_list (list): list of label.
top_k (int): Return top k results.
"""
output = []
for result in data_out:
result_i = softmax(result)
output_i = {}
indexs = np.argsort(result_i)[::-1][0:top_k]
for index in indexs:
label = label_list[index].split(',')[0]
output_i[label] = float(result_i[index])
output.append(output_i)
return output
```shell
$ hub install mobilenet_v2_animals==1.0.0
```
## 命令行预测
```
hub run mobilenet_v2_animals --input_path "/PATH/TO/IMAGE"
```
## API
```python
def get_expected_image_width()
```
返回预处理的图片宽度,也就是224。
```python
def get_expected_image_height()
```
返回预处理的图片高度,也就是224。
```python
def get_pretrained_images_mean()
```
返回预处理的图片均值,也就是 \[0.485, 0.456, 0.406\]
```python
def get_pretrained_images_std()
```
返回预处理的图片标准差,也就是 \[0.229, 0.224, 0.225\]
```python
def context(trainable=True, pretrained=True)
```
**参数**
* trainable (bool): 计算图的参数是否为可训练的;
* pretrained (bool): 是否加载默认的预训练模型。
**返回**
* inputs (dict): 计算图的输入,key 为 'image', value 为图片的张量;
* outputs (dict): 计算图的输出,key 为 'classification' 和 'feature_map',其相应的值为:
* classification (paddle.fluid.framework.Variable): 分类结果,也就是全连接层的输出;
* feature\_map (paddle.fluid.framework.Variable): 特征匹配,全连接层前面的那个张量。
* context\_prog(fluid.Program): 计算图,用于迁移学习。
```python
def classification(images=None,
paths=None,
batch_size=1,
use_gpu=False,
top_k=1):
```
**参数**
* images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
* paths (list\[str\]): 图片的路径;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU 来预测;
* top\_k (int): 返回预测结果的前 k 个。
**返回**
res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别动物的类别,value为置信度。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model_filename: 模型文件名称,默认为\_\_model\_\_
* params_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
classifier = hub.Module(name="mobilenet_v2_animals")
result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = classifier.classification(paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个在线动物识别服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m mobilenet_v2_animals
```
这样就完成了一个在线动物识别服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/mobilenet_v2_animals"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 查看代码
[PaddleClas](https://github.com/PaddlePaddle/PaddleClas)
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
# coding=utf-8
import os
import time
from collections import OrderedDict
import cv2
import numpy as np
from PIL import Image
__all__ = ['reader']
DATA_DIM = 224
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
def resize_short(img, target_size):
percent = float(target_size) / min(img.size[0], img.size[1])
resized_width = int(round(img.size[0] * percent))
resized_height = int(round(img.size[1] * percent))
img = img.resize((resized_width, resized_height), Image.LANCZOS)
return img
def crop_image(img, target_size, center):
width, height = img.size
size = target_size
if center == True:
w_start = (width - size) / 2
h_start = (height - size) / 2
else:
w_start = np.random.randint(0, width - size + 1)
h_start = np.random.randint(0, height - size + 1)
w_end = w_start + size
h_end = h_start + size
img = img.crop((w_start, h_start, w_end, h_end))
return img
def process_image(img):
img = resize_short(img, target_size=256)
img = crop_image(img, target_size=DATA_DIM, center=True)
if img.mode != 'RGB':
img = img.convert('RGB')
img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
img -= img_mean
img /= img_std
return img
def reader(images=None, paths=None):
"""
Preprocess to yield image.
Args:
images (list[numpy.ndarray]): images data, shape of each is [H, W, C].
paths (list[str]): paths to images.
Yield:
each (collections.OrderedDict): info of original image, preprocessed image.
"""
component = list()
if paths:
for im_path in paths:
each = OrderedDict()
assert os.path.isfile(
im_path), "The {} isn't a valid file path.".format(im_path)
each['org_im_path'] = im_path
each['org_im'] = Image.open(im_path)
each['org_im_width'], each['org_im_height'] = each['org_im'].size
component.append(each)
if images is not None:
assert type(images), "images is a list."
for im in images:
each = OrderedDict()
each['org_im'] = Image.fromarray(im[:, :, ::-1])
each['org_im_path'] = 'ndarray_time={}'.format(
round(time.time(), 6) * 1e6)
each['org_im_width'], each['org_im_height'] = each['org_im'].size
component.append(each)
for element in component:
element['image'] = process_image(element['org_im'])
yield element
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
__all__ = [
'MobileNetV3', 'MobileNetV3_small_x0_35', 'MobileNetV3_small_x0_5',
'MobileNetV3_small_x0_75', 'MobileNetV3_small_x1_0',
'MobileNetV3_small_x1_25', 'MobileNetV3_large_x0_35',
'MobileNetV3_large_x0_5', 'MobileNetV3_large_x0_75',
'MobileNetV3_large_x1_0', 'MobileNetV3_large_x1_25'
]
class MobileNetV3():
def __init__(self, scale=1.0, model_name='small'):
self.scale = scale
self.inplanes = 16
if model_name == "large":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, 'relu', 1],
[3, 64, 24, False, 'relu', 2],
[3, 72, 24, False, 'relu', 1],
[5, 72, 40, True, 'relu', 2],
[5, 120, 40, True, 'relu', 1],
[5, 120, 40, True, 'relu', 1],
[3, 240, 80, False, 'hard_swish', 2],
[3, 200, 80, False, 'hard_swish', 1],
[3, 184, 80, False, 'hard_swish', 1],
[3, 184, 80, False, 'hard_swish', 1],
[3, 480, 112, True, 'hard_swish', 1],
[3, 672, 112, True, 'hard_swish', 1],
[5, 672, 160, True, 'hard_swish', 2],
[5, 960, 160, True, 'hard_swish', 1],
[5, 960, 160, True, 'hard_swish', 1],
]
self.cls_ch_squeeze = 960
self.cls_ch_expand = 1280
elif model_name == "small":
self.cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, 'relu', 2],
[3, 72, 24, False, 'relu', 2],
[3, 88, 24, False, 'relu', 1],
[5, 96, 40, True, 'hard_swish', 2],
[5, 240, 40, True, 'hard_swish', 1],
[5, 240, 40, True, 'hard_swish', 1],
[5, 120, 48, True, 'hard_swish', 1],
[5, 144, 48, True, 'hard_swish', 1],
[5, 288, 96, True, 'hard_swish', 2],
[5, 576, 96, True, 'hard_swish', 1],
[5, 576, 96, True, 'hard_swish', 1],
]
self.cls_ch_squeeze = 576
self.cls_ch_expand = 1280
else:
raise NotImplementedError("mode[" + model_name +
"_model] is not implemented!")
def net(self, input, class_dim=1000):
scale = self.scale
inplanes = self.inplanes
cfg = self.cfg
cls_ch_squeeze = self.cls_ch_squeeze
cls_ch_expand = self.cls_ch_expand
#conv1
conv = self.conv_bn_layer(
input,
filter_size=3,
num_filters=self.make_divisible(inplanes * scale),
stride=2,
padding=1,
num_groups=1,
if_act=True,
act='hard_swish',
name='conv1')
i = 0
inplanes = self.make_divisible(inplanes * scale)
for layer_cfg in cfg:
conv = self.residual_unit(
input=conv,
num_in_filter=inplanes,
num_mid_filter=self.make_divisible(scale * layer_cfg[1]),
num_out_filter=self.make_divisible(scale * layer_cfg[2]),
act=layer_cfg[4],
stride=layer_cfg[5],
filter_size=layer_cfg[0],
use_se=layer_cfg[3],
name='conv' + str(i + 2))
inplanes = self.make_divisible(scale * layer_cfg[2])
i += 1
conv = self.conv_bn_layer(
input=conv,
filter_size=1,
num_filters=self.make_divisible(scale * cls_ch_squeeze),
stride=1,
padding=0,
num_groups=1,
if_act=True,
act='hard_swish',
name='conv_last')
conv = fluid.layers.pool2d(
input=conv, pool_type='avg', global_pooling=True, use_cudnn=False)
conv = fluid.layers.conv2d(
input=conv,
num_filters=cls_ch_expand,
filter_size=1,
stride=1,
padding=0,
act=None,
param_attr=ParamAttr(name='last_1x1_conv_weights'),
bias_attr=False)
conv = fluid.layers.hard_swish(conv)
drop = fluid.layers.dropout(x=conv, dropout_prob=0.2)
out = fluid.layers.fc(
input=drop,
size=class_dim,
param_attr=ParamAttr(name='fc_weights'),
bias_attr=ParamAttr(name='fc_offset'))
return out, drop
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride,
padding,
num_groups=1,
if_act=True,
act=None,
name=None,
use_cudnn=True,
res_last_bn_init=False):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(name=name + '_weights'),
bias_attr=False)
bn_name = name + '_bn'
bn = fluid.layers.batch_norm(
input=conv,
param_attr=ParamAttr(
name=bn_name + "_scale",
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0)),
bias_attr=ParamAttr(
name=bn_name + "_offset",
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0)),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
if if_act:
if act == 'relu':
bn = fluid.layers.relu(bn)
elif act == 'hard_swish':
bn = fluid.layers.hard_swish(bn)
return bn
def make_divisible(self, v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
def se_block(self, input, num_out_filter, ratio=4, name=None):
num_mid_filter = num_out_filter // ratio
pool = fluid.layers.pool2d(
input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
conv1 = fluid.layers.conv2d(
input=pool,
filter_size=1,
num_filters=num_mid_filter,
act='relu',
param_attr=ParamAttr(name=name + '_1_weights'),
bias_attr=ParamAttr(name=name + '_1_offset'))
conv2 = fluid.layers.conv2d(
input=conv1,
filter_size=1,
num_filters=num_out_filter,
act='hard_sigmoid',
param_attr=ParamAttr(name=name + '_2_weights'),
bias_attr=ParamAttr(name=name + '_2_offset'))
scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
return scale
def residual_unit(self,
input,
num_in_filter,
num_mid_filter,
num_out_filter,
stride,
filter_size,
act=None,
use_se=False,
name=None):
conv0 = self.conv_bn_layer(
input=input,
filter_size=1,
num_filters=num_mid_filter,
stride=1,
padding=0,
if_act=True,
act=act,
name=name + '_expand')
conv1 = self.conv_bn_layer(
input=conv0,
filter_size=filter_size,
num_filters=num_mid_filter,
stride=stride,
padding=int((filter_size - 1) // 2),
if_act=True,
act=act,
num_groups=num_mid_filter,
use_cudnn=False,
name=name + '_depthwise')
if use_se:
conv1 = self.se_block(
input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
conv2 = self.conv_bn_layer(
input=conv1,
filter_size=1,
num_filters=num_out_filter,
stride=1,
padding=0,
if_act=False,
name=name + '_linear',
res_last_bn_init=True)
if num_in_filter != num_out_filter or stride != 1:
return conv2
else:
return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
def MobileNetV3_small_x0_35():
model = MobileNetV3(model_name='small', scale=0.35)
return model
def MobileNetV3_small_x0_5():
model = MobileNetV3(model_name='small', scale=0.5)
return model
def MobileNetV3_small_x0_75():
model = MobileNetV3(model_name='small', scale=0.75)
return model
def MobileNetV3_small_x1_0():
model = MobileNetV3(model_name='small', scale=1.0)
return model
def MobileNetV3_small_x1_25():
model = MobileNetV3(model_name='small', scale=1.25)
return model
def MobileNetV3_large_x0_35():
model = MobileNetV3(model_name='large', scale=0.35)
return model
def MobileNetV3_large_x0_5():
model = MobileNetV3(model_name='large', scale=0.5)
return model
def MobileNetV3_large_x0_75():
model = MobileNetV3(model_name='large', scale=0.75)
return model
def MobileNetV3_large_x1_0():
model = MobileNetV3(model_name='large', scale=1.0)
return model
def MobileNetV3_large_x1_25():
model = MobileNetV3(model_name='large', scale=1.25)
return model
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import base64
import cv2
import os
import numpy as np
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def softmax(x):
orig_shape = x.shape
if len(x.shape) > 1:
tmp = np.max(x, axis=1)
x -= tmp.reshape((x.shape[0], 1))
x = np.exp(x)
tmp = np.sum(x, axis=1)
x /= tmp.reshape((x.shape[0], 1))
else:
tmp = np.max(x)
x -= tmp
x = np.exp(x)
tmp = np.sum(x)
x /= tmp
return x
def postprocess(data_out, label_list, top_k):
"""
Postprocess output of network, one image at a time.
Args:
data_out (numpy.ndarray): output data of network.
label_list (list): list of label.
top_k (int): Return top k results.
"""
output = []
for result in data_out:
result_i = softmax(result)
output_i = {}
indexs = np.argsort(result_i)[::-1][0:top_k]
for index in indexs:
label = label_list[index].split(',')[0]
output_i[label] = float(result_i[index])
output.append(output_i)
return output
<p align="center">
<img src="http://bj.bcebos.com/ibox-thumbnail98/77fa9b7003e4665867855b2b65216519?authorization=bce-auth-v1%2Ffbe74140929444858491fbf2b6bc0935%2F2020-04-08T11%3A05%3A10Z%2F1800%2F%2F1df0ecb4a52adefeae240c9e2189e8032560333e399b3187ef1a76e4ffa5f19f" hspace='5' width=800/> <br /> ResNet 系列的网络结构
</p>
模型的详情可参考[论文](https://arxiv.org/pdf/1812.01187.pdf)
## API
```python
def get_expected_image_width()
```
返回预处理的图片宽度,也就是224。
```python
def get_expected_image_height()
```
返回预处理的图片高度,也就是224。
```python
def get_pretrained_images_mean()
```
返回预处理的图片均值,也就是 \[0.485, 0.456, 0.406\]
```python
def get_pretrained_images_std()
```
返回预处理的图片标准差,也就是 \[0.229, 0.224, 0.225\]
```python
def context(trainable=True, pretrained=True)
```
**参数**
* trainable (bool): 计算图的参数是否为可训练的;
* pretrained (bool): 是否加载默认的预训练模型。
**返回**
* inputs (dict): 计算图的输入,key 为 'image', value 为图片的张量;
* outputs (dict): 计算图的输出,key 为 'feature\_map', value为全连接层前面的那个张量。
* context\_prog(fluid.Program): 计算图,用于迁移学习。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model_filename: 模型文件名称,默认为\_\_model\_\_
* params_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
classifier = hub.Module(name="resnet50_vd_10w")
input_dict, output_dict, program = classifier.context(trainable=True)
```
### 查看代码
[PaddleClas](https://github.com/PaddlePaddle/PaddleClas)
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
# coding=utf-8
import os
import time
from collections import OrderedDict
import cv2
import numpy as np
from PIL import Image
__all__ = ['reader']
DATA_DIM = 224
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
def resize_short(img, target_size):
percent = float(target_size) / min(img.size[0], img.size[1])
resized_width = int(round(img.size[0] * percent))
resized_height = int(round(img.size[1] * percent))
img = img.resize((resized_width, resized_height), Image.LANCZOS)
return img
def crop_image(img, target_size, center):
width, height = img.size
size = target_size
if center == True:
w_start = (width - size) / 2
h_start = (height - size) / 2
else:
w_start = np.random.randint(0, width - size + 1)
h_start = np.random.randint(0, height - size + 1)
w_end = w_start + size
h_end = h_start + size
img = img.crop((w_start, h_start, w_end, h_end))
return img
def process_image(img):
img = resize_short(img, target_size=256)
img = crop_image(img, target_size=DATA_DIM, center=True)
if img.mode != 'RGB':
img = img.convert('RGB')
img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
img -= img_mean
img /= img_std
return img
def reader(images=None, paths=None):
"""
Preprocess to yield image.
Args:
images (list[numpy.ndarray]): images data, shape of each is [H, W, C].
paths (list[str]): paths to images.
Yield:
each (collections.OrderedDict): info of original image, preprocessed image.
"""
component = list()
if paths:
for im_path in paths:
each = OrderedDict()
assert os.path.isfile(
im_path), "The {} isn't a valid file path.".format(im_path)
each['org_im_path'] = im_path
each['org_im'] = Image.open(im_path)
each['org_im_width'], each['org_im_height'] = each['org_im'].size
component.append(each)
if images is not None:
assert type(images), "images is a list."
for im in images:
each = OrderedDict()
each['org_im'] = Image.fromarray(im[:, :, ::-1])
each['org_im_path'] = 'ndarray_time={}'.format(
round(time.time(), 6) * 1e6)
each['org_im_width'], each['org_im_height'] = each['org_im'].size
component.append(each)
for element in component:
element['image'] = process_image(element['org_im'])
yield element
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -15,6 +15,7 @@
import os
import math
import random
import copy
from typing import Callable
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册