update mobilenet_v3_large_imagenet_ssld

e081f3fb · haoyuying · GitHub · c07d1ffe · c07d1ffe · c07d1ffe
6 changed file
--- a/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/README.md
+++ b/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/README.md
-```shell
-$ hub install mobilenet_v3_large_imagenet_ssld==1.0.0
-```
-
-## 命令行预测
-
-```
-hub run mobilenet_v3_large_imagenet_ssld --input_path "/PATH/TO/IMAGE"
-```
-
-## API
-
-```python
-def get_expected_image_width()
-```
-
-返回预处理的图片宽度，也就是224。
-
-```python
-def get_expected_image_height()
-```
-
-返回预处理的图片高度，也就是224。
-
-```python
-def get_pretrained_images_mean()
-```
-
-返回预处理的图片均值，也就是 \[0.485, 0.456, 0.406\]。
-
-```python
-def get_pretrained_images_std()
-```
-
-返回预处理的图片标准差，也就是 \[0.229, 0.224, 0.225\]。
-
-
-```python
-def context(trainable=True, pretrained=True)
-```
-
-**参数**
-
-* trainable (bool): 计算图的参数是否为可训练的；
-* pretrained (bool): 是否加载默认的预训练模型。
-
-**返回**
-
-* inputs (dict): 计算图的输入，key 为 'image', value 为图片的张量；
-* outputs (dict): 计算图的输出，key 为 'classification' 和 'feature_map'，其相应的值为：
-    * classification (paddle.fluid.framework.Variable): 分类结果，也就是全连接层的输出；
-    * feature\_map (paddle.fluid.framework.Variable): 特征匹配，全连接层前面的那个张量。
-* context\_prog(fluid.Program): 计算图，用于迁移学习。
-
-```python
-def classification(images=None,
-                   paths=None,
-                   batch_size=1,
-                   use_gpu=False,
-                   top_k=1):
-```
-
-**参数**
-
-* images (list\[numpy.ndarray\]): 图片数据，每一个图片数据的shape 均为 \[H, W, C\]，颜色空间为 BGR；
-* paths (list\[str\]): 图片的路径；
-* batch\_size (int): batch 的大小；
-* use\_gpu (bool): 是否使用 GPU 来预测；
-* top\_k (int): 返回预测结果的前 k 个。
-
-**返回**
-
-res (list\[dict\]): 分类结果，列表的每一个元素均为字典，其中 key 为识别动物的类别，value为置信度。
-
-```python
-def save_inference_model(dirname,
-                         model_filename=None,
-                         params_filename=None,
-                         combined=True)
-```
-
-将模型保存到指定路径。
-
-**参数**
-
-* dirname: 存在模型的目录名称
-* model_filename: 模型文件名称，默认为\_\_model\_\_
-* params_filename: 参数文件名称，默认为\_\_params\_\_(仅当`combined`为True时生效)
-* combined: 是否将参数保存到统一的一个文件中
-
-## 代码示例
-
-```python
-import paddlehub as hub
-import cv2
-
-classifier = hub.Module(name="mobilenet_v3_large_imagenet_ssld")
-
-result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')])
-# or
-# result = classifier.classification(paths=['/PATH/TO/IMAGE'])
-```
-
-## 服务部署
-
-PaddleHub Serving可以部署一个在线动物识别服务。
-
-## 第一步：启动PaddleHub Serving
-
-运行启动命令：
-```shell
-$ hub serving start -m mobilenet_v3_large_imagenet_ssld
-```
-
-这样就完成了一个在线动物识别服务化API的部署，默认端口号为8866。
-
-**NOTE:** 如使用GPU预测，则需要在启动服务之前，请设置CUDA\_VISIBLE\_DEVICES环境变量，否则不用设置。
-
-## 第二步：发送预测请求
-
-配置好服务端，以下数行代码即可实现发送预测请求，获取预测结果
-
-```python
-import requests
-import json
-import cv2
-import base64
-
-
-def cv2_to_base64(image):
-    data = cv2.imencode('.jpg', image)[1]
-    return base64.b64encode(data.tostring()).decode('utf8')
-
-
-# 发送HTTP请求
-data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
-headers = {"Content-type": "application/json"}
-url = "http://127.0.0.1:8866/predict/mobilenet_v3_large_imagenet_ssld"
-r = requests.post(url=url, headers=headers, data=json.dumps(data))
-
-# 打印预测结果
-print(r.json()["results"])
-```
-
-### 查看代码
-
-[PaddleClas](https://github.com/PaddlePaddle/PaddleClas)
-
-### 依赖
-
-paddlepaddle >= 1.6.2
-
-paddlehub >= 1.6.0
--- a/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/data_feed.py
+++ b/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/data_feed.py
-# coding=utf-8
-import os
-import time
-from collections import OrderedDict
-
-import cv2
-import numpy as np
-from PIL import Image
-
-__all__ = ['reader']
-
-DATA_DIM = 224
-img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
-img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
-
-
-def resize_short(img, target_size):
-    percent = float(target_size) / min(img.size[0], img.size[1])
-    resized_width = int(round(img.size[0] * percent))
-    resized_height = int(round(img.size[1] * percent))
-    img = img.resize((resized_width, resized_height), Image.LANCZOS)
-    return img
-
-
-def crop_image(img, target_size, center):
-    width, height = img.size
-    size = target_size
-    if center == True:
-        w_start = (width - size) / 2
-        h_start = (height - size) / 2
-    else:
-        w_start = np.random.randint(0, width - size + 1)
-        h_start = np.random.randint(0, height - size + 1)
-    w_end = w_start + size
-    h_end = h_start + size
-    img = img.crop((w_start, h_start, w_end, h_end))
-    return img
-
-
-def process_image(img):
-    img = resize_short(img, target_size=256)
-    img = crop_image(img, target_size=DATA_DIM, center=True)
-    if img.mode != 'RGB':
-        img = img.convert('RGB')
-    img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
-    img -= img_mean
-    img /= img_std
-    return img
-
-
-def reader(images=None, paths=None):
-    """
-    Preprocess to yield image.
-
-    Args:
-        images (list[numpy.ndarray]): images data, shape of each is [H, W, C].
-        paths (list[str]): paths to images.
-
-    Yield:
-        each (collections.OrderedDict): info of original image, preprocessed image.
-    """
-    component = list()
-    if paths:
-        for im_path in paths:
-            each = OrderedDict()
-            assert os.path.isfile(
-                im_path), "The {} isn't a valid file path.".format(im_path)
-            each['org_im_path'] = im_path
-            each['org_im'] = Image.open(im_path)
-            each['org_im_width'], each['org_im_height'] = each['org_im'].size
-            component.append(each)
-    if images is not None:
-        assert type(images), "images is a list."
-        for im in images:
-            each = OrderedDict()
-            each['org_im'] = Image.fromarray(im[:, :, ::-1])
-            each['org_im_path'] = 'ndarray_time={}'.format(
-                round(time.time(), 6) * 1e6)
-            each['org_im_width'], each['org_im_height'] = each['org_im'].size
-            component.append(each)
-
-    for element in component:
-        element['image'] = process_image(element['org_im'])
-        yield element
--- a/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/label_list.txt
+++ b/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/label_list.txt
--- a/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/mobilenet_v3.py
+++ b/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/mobilenet_v3.py
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle.fluid as fluid
-from paddle.fluid.initializer import MSRA
-from paddle.fluid.param_attr import ParamAttr
-
-__all__ = [
-    'MobileNetV3', 'MobileNetV3_small_x0_35', 'MobileNetV3_small_x0_5',
-    'MobileNetV3_small_x0_75', 'MobileNetV3_small_x1_0',
-    'MobileNetV3_small_x1_25', 'MobileNetV3_large_x0_35',
-    'MobileNetV3_large_x0_5', 'MobileNetV3_large_x0_75',
-    'MobileNetV3_large_x1_0', 'MobileNetV3_large_x1_25'
-]
-
-
-class MobileNetV3():
-    def __init__(self, scale=1.0, model_name='small'):
-        self.scale = scale
-        self.inplanes = 16
-        if model_name == "large":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, 'relu', 1],
-                [3, 64, 24, False, 'relu', 2],
-                [3, 72, 24, False, 'relu', 1],
-                [5, 72, 40, True, 'relu', 2],
-                [5, 120, 40, True, 'relu', 1],
-                [5, 120, 40, True, 'relu', 1],
-                [3, 240, 80, False, 'hard_swish', 2],
-                [3, 200, 80, False, 'hard_swish', 1],
-                [3, 184, 80, False, 'hard_swish', 1],
-                [3, 184, 80, False, 'hard_swish', 1],
-                [3, 480, 112, True, 'hard_swish', 1],
-                [3, 672, 112, True, 'hard_swish', 1],
-                [5, 672, 160, True, 'hard_swish', 2],
-                [5, 960, 160, True, 'hard_swish', 1],
-                [5, 960, 160, True, 'hard_swish', 1],
-            ]
-            self.cls_ch_squeeze = 960
-            self.cls_ch_expand = 1280
-        elif model_name == "small":
-            self.cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, 'relu', 2],
-                [3, 72, 24, False, 'relu', 2],
-                [3, 88, 24, False, 'relu', 1],
-                [5, 96, 40, True, 'hard_swish', 2],
-                [5, 240, 40, True, 'hard_swish', 1],
-                [5, 240, 40, True, 'hard_swish', 1],
-                [5, 120, 48, True, 'hard_swish', 1],
-                [5, 144, 48, True, 'hard_swish', 1],
-                [5, 288, 96, True, 'hard_swish', 2],
-                [5, 576, 96, True, 'hard_swish', 1],
-                [5, 576, 96, True, 'hard_swish', 1],
-            ]
-            self.cls_ch_squeeze = 576
-            self.cls_ch_expand = 1280
-        else:
-            raise NotImplementedError("mode[" + model_name +
-                                      "_model] is not implemented!")
-
-    def net(self, input, class_dim=1000):
-        scale = self.scale
-        inplanes = self.inplanes
-        cfg = self.cfg
-        cls_ch_squeeze = self.cls_ch_squeeze
-        cls_ch_expand = self.cls_ch_expand
-        #conv1
-        conv = self.conv_bn_layer(
-            input,
-            filter_size=3,
-            num_filters=self.make_divisible(inplanes * scale),
-            stride=2,
-            padding=1,
-            num_groups=1,
-            if_act=True,
-            act='hard_swish',
-            name='conv1')
-        i = 0
-        inplanes = self.make_divisible(inplanes * scale)
-        for layer_cfg in cfg:
-            conv = self.residual_unit(
-                input=conv,
-                num_in_filter=inplanes,
-                num_mid_filter=self.make_divisible(scale * layer_cfg[1]),
-                num_out_filter=self.make_divisible(scale * layer_cfg[2]),
-                act=layer_cfg[4],
-                stride=layer_cfg[5],
-                filter_size=layer_cfg[0],
-                use_se=layer_cfg[3],
-                name='conv' + str(i + 2))
-            inplanes = self.make_divisible(scale * layer_cfg[2])
-            i += 1
-
-        conv = self.conv_bn_layer(
-            input=conv,
-            filter_size=1,
-            num_filters=self.make_divisible(scale * cls_ch_squeeze),
-            stride=1,
-            padding=0,
-            num_groups=1,
-            if_act=True,
-            act='hard_swish',
-            name='conv_last')
-        conv = fluid.layers.pool2d(
-            input=conv, pool_type='avg', global_pooling=True, use_cudnn=False)
-        conv = fluid.layers.conv2d(
-            input=conv,
-            num_filters=cls_ch_expand,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            act=None,
-            param_attr=ParamAttr(name='last_1x1_conv_weights'),
-            bias_attr=False)
-        conv = fluid.layers.hard_swish(conv)
-        drop = fluid.layers.dropout(x=conv, dropout_prob=0.2)
-        out = fluid.layers.fc(
-            input=drop,
-            size=class_dim,
-            param_attr=ParamAttr(name='fc_weights'),
-            bias_attr=ParamAttr(name='fc_offset'))
-        return out, drop
-
-    def conv_bn_layer(self,
-                      input,
-                      filter_size,
-                      num_filters,
-                      stride,
-                      padding,
-                      num_groups=1,
-                      if_act=True,
-                      act=None,
-                      name=None,
-                      use_cudnn=True,
-                      res_last_bn_init=False):
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            act=None,
-            use_cudnn=use_cudnn,
-            param_attr=ParamAttr(name=name + '_weights'),
-            bias_attr=False)
-        bn_name = name + '_bn'
-        bn = fluid.layers.batch_norm(
-            input=conv,
-            param_attr=ParamAttr(
-                name=bn_name + "_scale",
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=0.0)),
-            bias_attr=ParamAttr(
-                name=bn_name + "_offset",
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=0.0)),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-        if if_act:
-            if act == 'relu':
-                bn = fluid.layers.relu(bn)
-            elif act == 'hard_swish':
-                bn = fluid.layers.hard_swish(bn)
-        return bn
-
-    def make_divisible(self, v, divisor=8, min_value=None):
-        if min_value is None:
-            min_value = divisor
-        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-        if new_v < 0.9 * v:
-            new_v += divisor
-        return new_v
-
-    def se_block(self, input, num_out_filter, ratio=4, name=None):
-        num_mid_filter = num_out_filter // ratio
-        pool = fluid.layers.pool2d(
-            input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
-        conv1 = fluid.layers.conv2d(
-            input=pool,
-            filter_size=1,
-            num_filters=num_mid_filter,
-            act='relu',
-            param_attr=ParamAttr(name=name + '_1_weights'),
-            bias_attr=ParamAttr(name=name + '_1_offset'))
-        conv2 = fluid.layers.conv2d(
-            input=conv1,
-            filter_size=1,
-            num_filters=num_out_filter,
-            act='hard_sigmoid',
-            param_attr=ParamAttr(name=name + '_2_weights'),
-            bias_attr=ParamAttr(name=name + '_2_offset'))
-        scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
-        return scale
-
-    def residual_unit(self,
-                      input,
-                      num_in_filter,
-                      num_mid_filter,
-                      num_out_filter,
-                      stride,
-                      filter_size,
-                      act=None,
-                      use_se=False,
-                      name=None):
-
-        conv0 = self.conv_bn_layer(
-            input=input,
-            filter_size=1,
-            num_filters=num_mid_filter,
-            stride=1,
-            padding=0,
-            if_act=True,
-            act=act,
-            name=name + '_expand')
-
-        conv1 = self.conv_bn_layer(
-            input=conv0,
-            filter_size=filter_size,
-            num_filters=num_mid_filter,
-            stride=stride,
-            padding=int((filter_size - 1) // 2),
-            if_act=True,
-            act=act,
-            num_groups=num_mid_filter,
-            use_cudnn=False,
-            name=name + '_depthwise')
-        if use_se:
-            conv1 = self.se_block(
-                input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
-
-        conv2 = self.conv_bn_layer(
-            input=conv1,
-            filter_size=1,
-            num_filters=num_out_filter,
-            stride=1,
-            padding=0,
-            if_act=False,
-            name=name + '_linear',
-            res_last_bn_init=True)
-        if num_in_filter != num_out_filter or stride != 1:
-            return conv2
-        else:
-            return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
-
-
-def MobileNetV3_small_x0_35():
-    model = MobileNetV3(model_name='small', scale=0.35)
-    return model
-
-
-def MobileNetV3_small_x0_5():
-    model = MobileNetV3(model_name='small', scale=0.5)
-    return model
-
-
-def MobileNetV3_small_x0_75():
-    model = MobileNetV3(model_name='small', scale=0.75)
-    return model
-
-
-def MobileNetV3_small_x1_0():
-    model = MobileNetV3(model_name='small', scale=1.0)
-    return model
-
-
-def MobileNetV3_small_x1_25():
-    model = MobileNetV3(model_name='small', scale=1.25)
-    return model
-
-
-def MobileNetV3_large_x0_35():
-    model = MobileNetV3(model_name='large', scale=0.35)
-    return model
-
-
-def MobileNetV3_large_x0_5():
-    model = MobileNetV3(model_name='large', scale=0.5)
-    return model
-
-
-def MobileNetV3_large_x0_75():
-    model = MobileNetV3(model_name='large', scale=0.75)
-    return model
-
-
-def MobileNetV3_large_x1_0():
-    model = MobileNetV3(model_name='large', scale=1.0)
-    return model
-
-
-def MobileNetV3_large_x1_25():
-    model = MobileNetV3(model_name='large', scale=1.25)
-    return model
--- a/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/module.py
+++ b/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/module.py
--- a/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/processor.py
+++ b/hub_module/modules/image/classification/mobilenet_v3_large_imagenet_ssld/processor.py
-# coding=utf-8
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import base64
-import cv2
-import os
-
-import numpy as np
-
-
-def base64_to_cv2(b64str):
-    data = base64.b64decode(b64str.encode('utf8'))
-    data = np.fromstring(data, np.uint8)
-    data = cv2.imdecode(data, cv2.IMREAD_COLOR)
-    return data
-
-
-def softmax(x):
-    orig_shape = x.shape
-    if len(x.shape) > 1:
-        tmp = np.max(x, axis=1)
-        x -= tmp.reshape((x.shape[0], 1))
-        x = np.exp(x)
-        tmp = np.sum(x, axis=1)
-        x /= tmp.reshape((x.shape[0], 1))
-    else:
-        tmp = np.max(x)
-        x -= tmp
-        x = np.exp(x)
-        tmp = np.sum(x)
-        x /= tmp
-    return x
-
-
-def postprocess(data_out, label_list, top_k):
-    """
-    Postprocess output of network, one image at a time.
-
-    Args:
-        data_out (numpy.ndarray): output data of network.
-        label_list (list): list of label.
-        top_k (int): Return top k results.
-    """
-    output = []
-    for result in data_out:
-        result_i = softmax(result)
-        output_i = {}
-        indexs = np.argsort(result_i)[::-1][0:top_k]
-        for index in indexs:
-            label = label_list[index].split(',')[0]
-            output_i[label] = float(result_i[index])
-        output.append(output_i)
-    return output