module.py 17.2 KB
Newer Older
W
wuzewu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import ast
import argparse
from collections import OrderedDict
from functools import partial
from math import ceil

import numpy as np
import paddle.fluid as fluid
import paddlehub as hub
16
from paddlehub.module.module import moduleinfo, runnable, serving
W
wuzewu 已提交
17 18
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.io.parser import txt_parser
19
from paddlehub.common.paddle_helper import add_vars_prefix
W
wuzewu 已提交
20

21 22
from faster_rcnn_resnet50_coco2017.processor import load_label_info, postprocess, base64_to_cv2
from faster_rcnn_resnet50_coco2017.data_feed import test_reader, padding_minibatch
W
wuzewu 已提交
23
from faster_rcnn_resnet50_coco2017.resnet import ResNet, ResNetC5
24 25 26 27
from faster_rcnn_resnet50_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, RPNHead
from faster_rcnn_resnet50_coco2017.bbox_head import MultiClassNMS, BBoxHead, SmoothL1Loss
from faster_rcnn_resnet50_coco2017.bbox_assigner import BBoxAssigner
from faster_rcnn_resnet50_coco2017.roi_extractor import RoIAlign
W
wuzewu 已提交
28 29 30 31 32 33


@moduleinfo(
    name="faster_rcnn_resnet50_coco2017",
    version="1.1.0",
    type="cv/object_detection",
W
wuzewu 已提交
34
    summary="Baidu's Faster R-CNN model for object detection with backbone ResNet50, trained with dataset COCO2017",
W
wuzewu 已提交
35
    author="paddlepaddle",
W
wuzewu 已提交
36
    author_email="paddle-dev@baidu.com")
W
wuzewu 已提交
37 38 39
class FasterRCNNResNet50(hub.Module):
    def _initialize(self):
        # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333]
W
wuzewu 已提交
40 41
        self.default_pretrained_model_path = os.path.join(self.directory, "faster_rcnn_resnet50_model")
        self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt"))
W
wuzewu 已提交
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
        self._set_config()

    def _set_config(self):
        """
        predictor config setting
        """
        cpu_config = AnalysisConfig(self.default_pretrained_model_path)
        cpu_config.disable_glog_info()
        cpu_config.disable_gpu()
        self.cpu_predictor = create_paddle_predictor(cpu_config)

        try:
            _places = os.environ["CUDA_VISIBLE_DEVICES"]
            int(_places[0])
            use_gpu = True
        except:
            use_gpu = False
        if use_gpu:
            gpu_config = AnalysisConfig(self.default_pretrained_model_path)
            gpu_config.disable_glog_info()
            gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0)
            self.gpu_predictor = create_paddle_predictor(gpu_config)

W
wuzewu 已提交
65
    def context(self, num_classes=81, trainable=True, pretrained=True, phase='train'):
W
wuzewu 已提交
66
        """
67 68 69
        Distill the Head Features, so as to perform transfer learning.

        Args:
W
wuzewu 已提交
70
            num_classes (int): number of categories
71 72 73 74 75 76 77 78 79 80
            trainable (bool): whether to set parameters trainable.
            pretrained (bool): whether to load default pretrained model.
            phase (str): optional choices are 'train' and 'predict'.

        Returns:
             inputs (dict): the input variables.
             outputs (dict): the output variables.
             context_prog (Program): the program to execute transfer learning.
        """
        context_prog = fluid.Program()
W
wuzewu 已提交
81
        startup_program = fluid.Program()
82
        with fluid.program_guard(context_prog, startup_program):
W
wuzewu 已提交
83
            with fluid.unique_name.guard():
W
wuzewu 已提交
84
                image = fluid.layers.data(name='image', shape=[-1, 3, -1, -1], dtype='float32')
W
wuzewu 已提交
85
                # backbone
W
wuzewu 已提交
86
                backbone = ResNet(norm_type='affine_channel', depth=50, feature_maps=4, freeze_at=2)
W
wuzewu 已提交
87 88
                body_feats = backbone(image)

89 90
                # var_prefix
                var_prefix = '@HUB_{}@'.format(self.name)
W
wuzewu 已提交
91 92
                im_info = fluid.layers.data(name='im_info', shape=[3], dtype='float32', lod_level=0)
                im_shape = fluid.layers.data(name='im_shape', shape=[3], dtype='float32', lod_level=0)
93 94 95 96 97 98
                body_feat_names = list(body_feats.keys())
                # rpn_head: RPNHead
                rpn_head = self.rpn_head()
                rois = rpn_head.get_proposals(body_feats, im_info, mode=phase)
                # train
                if phase == 'train':
W
wuzewu 已提交
99 100 101
                    gt_bbox = fluid.layers.data(name='gt_bbox', shape=[4], dtype='float32', lod_level=1)
                    is_crowd = fluid.layers.data(name='is_crowd', shape=[1], dtype='int32', lod_level=1)
                    gt_class = fluid.layers.data(name='gt_class', shape=[1], dtype='int32', lod_level=1)
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
                    rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd)
                    # bbox_assigner: BBoxAssigner
                    bbox_assigner = self.bbox_assigner(num_classes)
                    outs = fluid.layers.generate_proposal_labels(
                        rpn_rois=rois,
                        gt_classes=gt_class,
                        is_crowd=is_crowd,
                        gt_boxes=gt_bbox,
                        im_info=im_info,
                        batch_size_per_im=bbox_assigner.batch_size_per_im,
                        fg_fraction=bbox_assigner.fg_fraction,
                        fg_thresh=bbox_assigner.fg_thresh,
                        bg_thresh_hi=bbox_assigner.bg_thresh_hi,
                        bg_thresh_lo=bbox_assigner.bg_thresh_lo,
                        bbox_reg_weights=bbox_assigner.bbox_reg_weights,
                        class_nums=bbox_assigner.class_nums,
                        use_random=bbox_assigner.use_random)
                    rois = outs[0]

                body_feat = body_feats[body_feat_names[-1]]
                # roi_extractor: RoIAlign
                roi_extractor = self.roi_extractor()
                roi_feat = fluid.layers.roi_align(
                    input=body_feat,
                    rois=rois,
                    pooled_height=roi_extractor.pooled_height,
                    pooled_width=roi_extractor.pooled_width,
                    spatial_scale=roi_extractor.spatial_scale,
                    sampling_ratio=roi_extractor.sampling_ratio)
                # head_feat
                bbox_head = self.bbox_head(num_classes)
                head_feat = bbox_head.head(roi_feat)
                if isinstance(head_feat, OrderedDict):
                    head_feat = list(head_feat.values())[0]
                if phase == 'train':
                    inputs = {
                        'image': var_prefix + image.name,
                        'im_info': var_prefix + im_info.name,
                        'im_shape': var_prefix + im_shape.name,
                        'gt_class': var_prefix + gt_class.name,
                        'gt_bbox': var_prefix + gt_bbox.name,
                        'is_crowd': var_prefix + is_crowd.name
                    }
                    outputs = {
W
wuzewu 已提交
146 147 148 149
                        'head_features': var_prefix + head_feat.name,
                        'rpn_cls_loss': var_prefix + rpn_loss['rpn_cls_loss'].name,
                        'rpn_reg_loss': var_prefix + rpn_loss['rpn_reg_loss'].name,
                        'generate_proposal_labels': [var_prefix + var.name for var in outs]
150 151
                    }
                elif phase == 'predict':
W
wuzewu 已提交
152
                    pred = bbox_head.get_prediction(roi_feat, rois, im_info, im_shape)
153 154 155 156 157 158
                    inputs = {
                        'image': var_prefix + image.name,
                        'im_info': var_prefix + im_info.name,
                        'im_shape': var_prefix + im_shape.name
                    }
                    outputs = {
W
wuzewu 已提交
159
                        'head_features': var_prefix + head_feat.name,
160 161 162 163 164 165 166
                        'rois': var_prefix + rois.name,
                        'bbox_out': var_prefix + pred.name
                    }
                add_vars_prefix(context_prog, var_prefix)
                add_vars_prefix(startup_program, var_prefix)

                global_vars = context_prog.global_block().vars
W
wuzewu 已提交
167
                inputs = {key: global_vars[value] for key, value in inputs.items()}
168
                outputs = {
W
wuzewu 已提交
169
                    key: global_vars[value] if not isinstance(value, list) else [global_vars[var] for var in value]
170 171 172 173 174
                    for key, value in outputs.items()
                }

                for param in context_prog.global_block().iter_parameters():
                    param.trainable = trainable
W
wuzewu 已提交
175 176 177

                place = fluid.CPUPlace()
                exe = fluid.Executor(place)
W
wuzewu 已提交
178
                exe.run(startup_program)
W
wuzewu 已提交
179 180 181
                if pretrained:

                    def _if_exist(var):
W
wuzewu 已提交
182 183 184
                        if num_classes != 81:
                            if 'bbox_pred' in var.name or 'cls_score' in var.name:
                                return False
W
wuzewu 已提交
185 186 187
                        return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name))

                    fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist)
W
wuzewu 已提交
188 189 190
                return inputs, outputs, context_prog

    def rpn_head(self):
191 192
        return RPNHead(
            anchor_generator=AnchorGenerator(
W
wuzewu 已提交
193 194 195 196
                anchor_sizes=[32, 64, 128, 256, 512],
                aspect_ratios=[0.5, 1.0, 2.0],
                stride=[16.0, 16.0],
                variance=[1.0, 1.0, 1.0, 1.0]),
197
            rpn_target_assign=RPNTargetAssign(
W
wuzewu 已提交
198 199 200 201 202
                rpn_batch_size_per_im=256,
                rpn_fg_fraction=0.5,
                rpn_negative_overlap=0.3,
                rpn_positive_overlap=0.7,
                rpn_straddle_thresh=0.0),
W
wuzewu 已提交
203 204
            train_proposal=GenerateProposals(min_size=0.0, nms_thresh=0.7, post_nms_top_n=12000, pre_nms_top_n=2000),
            test_proposal=GenerateProposals(min_size=0.0, nms_thresh=0.7, post_nms_top_n=6000, pre_nms_top_n=1000))
W
wuzewu 已提交
205 206

    def roi_extractor(self):
207
        return RoIAlign(resolution=14, sampling_ratio=0, spatial_scale=0.0625)
W
wuzewu 已提交
208 209

    def bbox_head(self, num_classes):
210
        return BBoxHead(
W
wuzewu 已提交
211
            head=ResNetC5(depth=50, norm_type='affine_channel'),
W
wuzewu 已提交
212
            nms=MultiClassNMS(keep_top_k=100, nms_threshold=0.5, score_threshold=0.05),
213
            bbox_loss=SmoothL1Loss(),
W
wuzewu 已提交
214 215 216
            num_classes=num_classes)

    def bbox_assigner(self, num_classes):
217
        return BBoxAssigner(
W
wuzewu 已提交
218 219 220 221 222 223 224 225
            batch_size_per_im=512,
            bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
            bg_thresh_hi=0.5,
            bg_thresh_lo=0.0,
            fg_fraction=0.25,
            fg_thresh=0.5,
            class_nums=num_classes)

W
wuzewu 已提交
226
    def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True):
W
wuzewu 已提交
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
        if combined:
            model_filename = "__model__" if not model_filename else model_filename
            params_filename = "__params__" if not params_filename else params_filename
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)

        program, feeded_var_names, target_vars = fluid.io.load_inference_model(
            dirname=self.default_pretrained_model_path, executor=exe)

        fluid.io.save_inference_model(
            dirname=dirname,
            main_program=program,
            executor=exe,
            feeded_var_names=feeded_var_names,
            target_vars=target_vars,
            model_filename=model_filename,
            params_filename=params_filename)

W
wuzewu 已提交
245 246 247 248 249 250 251 252 253 254 255
    def object_detection(self,
                         paths=None,
                         images=None,
                         data=None,
                         use_gpu=False,
                         batch_size=1,
                         output_dir='detection_result',
                         score_thresh=0.5,
                         visualization=True):
        """API of Object Detection.

256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
        Args:
            paths (list[str]): The paths of images.
            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
            batch_size (int): batch size.
            use_gpu (bool): Whether to use gpu.
            output_dir (str): The path to store output images.
            visualization (bool): Whether to save image or not.
            score_thresh (float): threshold for object detecion.

        Returns:
            res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
                data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
                    left (float): The X coordinate of the upper left corner of the bounding box;
                    top (float): The Y coordinate of the upper left corner of the bounding box;
                    right (float): The X coordinate of the lower right corner of the bounding box;
                    bottom (float): The Y coordinate of the lower right corner of the bounding box;
                    label (str): The label of detection result;
                    confidence (float): The confidence of detection result.
                save_path (str, optional): The path to save output images.
W
wuzewu 已提交
275
        """
W
wuzewu 已提交
276 277 278 279 280 281
        if use_gpu:
            try:
                _places = os.environ["CUDA_VISIBLE_DEVICES"]
                int(_places[0])
            except:
                raise RuntimeError(
282
                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id."
W
wuzewu 已提交
283
                )
284
        paths = paths if paths else list()
W
wuzewu 已提交
285
        if data and 'image' in data:
286 287 288 289
            paths += data['image']

        all_images = list()
        for yield_return in test_reader(paths, images):
W
wuzewu 已提交
290 291 292 293 294 295 296 297 298 299 300 301 302
            all_images.append(yield_return)

        images_num = len(all_images)
        loop_num = ceil(images_num / batch_size)
        res = []
        for iter_id in range(loop_num):
            batch_data = []
            handle_id = iter_id * batch_size
            for image_id in range(batch_size):
                try:
                    batch_data.append(all_images[handle_id + image_id])
                except:
                    pass
303

W
wuzewu 已提交
304
            padding_image, padding_info, padding_shape = padding_minibatch(batch_data)
W
wuzewu 已提交
305 306 307
            padding_image_tensor = PaddleTensor(padding_image.copy())
            padding_info_tensor = PaddleTensor(padding_info.copy())
            padding_shape_tensor = PaddleTensor(padding_shape.copy())
W
wuzewu 已提交
308
            feed_list = [padding_image_tensor, padding_info_tensor, padding_shape_tensor]
W
wuzewu 已提交
309 310 311 312
            if use_gpu:
                data_out = self.gpu_predictor.run(feed_list)
            else:
                data_out = self.cpu_predictor.run(feed_list)
313
            output = postprocess(
W
wuzewu 已提交
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
                paths=paths,
                images=images,
                data_out=data_out,
                score_thresh=score_thresh,
                label_names=self.label_names,
                output_dir=output_dir,
                handle_id=handle_id,
                visualization=visualization)
            res += output
        return res

    def add_module_config_arg(self):
        """
        Add the command config options
        """
        self.arg_config_group.add_argument(
W
wuzewu 已提交
330
            '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not")
W
wuzewu 已提交
331

W
wuzewu 已提交
332
        self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction")
W
wuzewu 已提交
333 334 335 336 337

    def add_module_input_arg(self):
        """
        Add the command input options
        """
W
wuzewu 已提交
338
        self.arg_input_group.add_argument('--input_path', type=str, default=None, help="input data")
W
wuzewu 已提交
339

W
wuzewu 已提交
340
        self.arg_input_group.add_argument('--input_file', type=str, default=None, help="file contain input data")
W
wuzewu 已提交
341 342 343 344 345 346 347 348 349 350 351 352

    def check_input_data(self, args):
        input_data = []
        if args.input_path:
            input_data = [args.input_path]
        elif args.input_file:
            if not os.path.exists(args.input_file):
                raise RuntimeError("File %s is not exist." % args.input_file)
            else:
                input_data = txt_parser.parse(args.input_file, use_strip=True)
        return input_data

353 354 355 356 357 358
    @serving
    def serving_method(self, images, **kwargs):
        """
        Run as a service.
        """
        images_decode = [base64_to_cv2(image) for image in images]
W
wuzewu 已提交
359
        results = self.object_detection(images=images_decode, **kwargs)
360 361
        return results

W
wuzewu 已提交
362 363 364 365 366 367 368
    @runnable
    def run_cmd(self, argvs):
        self.parser = argparse.ArgumentParser(
            description="Run the {}".format(self.name),
            prog="hub run {}".format(self.name),
            usage='%(prog)s',
            add_help=True)
W
wuzewu 已提交
369
        self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required")
W
wuzewu 已提交
370
        self.arg_config_group = self.parser.add_argument_group(
W
wuzewu 已提交
371
            title="Config options", description="Run configuration for controlling module behavior, not required.")
W
wuzewu 已提交
372 373 374 375 376 377 378 379 380 381 382
        self.add_module_config_arg()

        self.add_module_input_arg()
        args = self.parser.parse_args(argvs)
        input_data = self.check_input_data(args)
        if len(input_data) == 0:
            self.parser.print_help()
            exit(1)
        else:
            for image_path in input_data:
                if not os.path.exists(image_path):
W
wuzewu 已提交
383 384
                    raise RuntimeError("File %s or %s is not exist." % image_path)
        return self.object_detection(paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size)