diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md index 0ad42e87a4b93e1764331057702670e39e4dc7ad..c481bb47ceec72bb1eb573dd8d85ca13a59c24a2 100644 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md @@ -100,20 +100,13 @@ - save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在) - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 - + - dirname: 模型保存路径
## 四、服务部署 @@ -166,6 +159,10 @@ 修复numpy数据读取问题 +* 1.1.0 + + 移除 fluid api + - ```shell - $ hub install yolov3_resnet50_vd_coco2017==1.0.2 + $ hub install yolov3_resnet50_vd_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README_en.md b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README_en.md index 7bb7b10aee81292114a8c01b9bd776de3a2d44f0..2f9b46bd89e7185b49bb1c687b80f9ed01c38dfa 100644 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README_en.md +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README_en.md @@ -99,19 +99,13 @@ - save\_path (str, optional): output path for saving results - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: save model path ## IV.Server Deployment @@ -165,6 +159,10 @@ Fix the problem of reading numpy +* 1.1.0 + + Remove fluid api + - ```shell - $ hub install yolov3_resnet50_vd_coco2017==1.0.2 + $ hub install yolov3_resnet50_vd_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py index cdd037d89255d36cc4a3eb8eb0a4e46ad69b4cfd..7e1101dd860301ca17a5cf8222493e1977c819f2 100644 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py @@ -6,44 +6,43 @@ import argparse import os from functools import partial +import paddle import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix -from yolov3_resnet50_vd_coco2017.resnet import ResNet -from yolov3_resnet50_vd_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from yolov3_resnet50_vd_coco2017.data_feed import reader -from yolov3_resnet50_vd_coco2017.yolo_head import MultiClassNMS, YOLOv3Head +from .processor import load_label_info, postprocess, base64_to_cv2 +from .data_feed import reader @moduleinfo( name="yolov3_resnet50_vd_coco2017", - version="1.0.2", + version="1.1.0", type="CV/object_detection", summary= "Baidu's YOLOv3 model for object detection with backbone ResNet50, trained with dataset coco2017.", author="paddlepaddle", author_email="paddle-dev@baidu.com") -class YOLOv3ResNet50Coco2017(hub.Module): - def _initialize(self): +class YOLOv3ResNet50Coco2017: + def __init__(self): self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_resnet50_model") + self.directory, "yolov3_resnet50_model", "model") self.label_names = load_label_info( os.path.join(self.directory, "label_file.txt")) self._set_config() - + def _set_config(self): """ predictor config setting. """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -52,110 +51,10 @@ class YOLOv3ResNet50Coco2017(hub.Module): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = ResNet( - norm_type='sync_bn', - freeze_at=0, - freeze_norm=False, - norm_decay=0., - dcn_v2_stages=[5], - depth=50, - variant='d', - feature_maps=[3, 4, 5]) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head(num_classes=80) - # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - # outputs - outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog + self.gpu_predictor = create_predictor(gpu_config) def object_detection(self, paths=None, @@ -198,54 +97,33 @@ class YOLOv3ResNet50Coco2017(hub.Module): paths = paths if paths else list() data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + batch_reader = paddle.batch(data_reader, batch_size=batch_size) res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 0]))) + input_handle = predictor.get_input_handle(input_names[1]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 1]))) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + output = postprocess(paths=paths, + images=images, + data_out=output_handle, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) res.extend(output) return res - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/name_adapter.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/name_adapter.py deleted file mode 100644 index bebf8bdeeec3aa76357d95cc52ba5a009e19d46f..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/name_adapter.py +++ /dev/null @@ -1,61 +0,0 @@ -# coding=utf-8 - - -class NameAdapter(object): - """Fix the backbones variable names for pretrained weight""" - - def __init__(self, model): - super(NameAdapter, self).__init__() - self.model = model - - @property - def model_type(self): - return getattr(self.model, '_model_type', '') - - @property - def variant(self): - return getattr(self.model, 'variant', '') - - def fix_conv_norm_name(self, name): - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - # the naming rule is same as pretrained weight - if self.model_type == 'SEResNeXt': - bn_name = name + "_bn" - return bn_name - - def fix_shortcut_name(self, name): - if self.model_type == 'SEResNeXt': - name = 'conv' + name + '_prj' - return name - - def fix_bottleneck_name(self, name): - if self.model_type == 'SEResNeXt': - conv_name1 = 'conv' + name + '_x1' - conv_name2 = 'conv' + name + '_x2' - conv_name3 = 'conv' + name + '_x3' - shortcut_name = name - else: - conv_name1 = name + "_branch2a" - conv_name2 = name + "_branch2b" - conv_name3 = name + "_branch2c" - shortcut_name = name + "_branch1" - return conv_name1, conv_name2, conv_name3, shortcut_name - - def fix_layer_warp_name(self, stage_num, count, i): - name = 'res' + str(stage_num) - if count > 10 and stage_num == 4: - if i == 0: - conv_name = name + "a" - else: - conv_name = name + "b" + str(i) - else: - conv_name = name + chr(ord("a") + i) - if self.model_type == 'SEResNeXt': - conv_name = str(stage_num + 2) + '_' + str(i + 1) - return conv_name - - def fix_c1_stage_name(self): - return "res_conv1" if self.model_type == 'ResNeXt' else "conv1" diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/nonlocal_helper.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/nonlocal_helper.py deleted file mode 100644 index 599b8dfa07b6372f25990649212b232c523e19de..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py index 1039e3e48d66ace28762008ebed3d08da5f702d6..dd2aea11af6ad31874c7b825db58bc39850dbbce 100644 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py @@ -101,7 +101,7 @@ def postprocess(paths, handle_id, visualization=True): """ - postprocess the lod_tensor produced by fluid.Executor.run + postprocess the lod_tensor produced by Executor.run Args: paths (list[str]): The paths of images. @@ -126,9 +126,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + lod = data_out.lod()[0] + results = data_out.copy_to_cpu() check_dir(output_dir) diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/resnet.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/resnet.py deleted file mode 100644 index 4bd6fb61e9a51c70b6197b25a7cf788c2ca5b790..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/test.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/test.py new file mode 100644 index 0000000000000000000000000000000000000000..c70c92380362d2326b032383b6f08aaae8724501 --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/test.py @@ -0,0 +1,108 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="yolov3_resnet50_vd_coco2017") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_object_detection1(self): + results = self.module.object_detection( + paths=['tests/test.jpg'] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection2(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection3(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection4(self): + self.assertRaises( + AssertionError, + self.module.object_detection, + paths=['no.jpg'] + ) + + def test_object_detection5(self): + self.assertRaises( + AttributeError, + self.module.object_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/yolo_head.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/yolo_head.py deleted file mode 100644 index 7428fb4c281507c30918e12a04753d559346cf7b..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/yolo_head.py +++ /dev/null @@ -1,273 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, - normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred