module.py 18.3 KB
Newer Older
W
wuzewu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import ast
import argparse
from collections import OrderedDict
from functools import partial
from math import ceil

import numpy as np
import paddle.fluid as fluid
import paddlehub as hub
16
from paddlehub.module.module import moduleinfo, runnable, serving
W
wuzewu 已提交
17 18
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.io.parser import txt_parser
19
from paddlehub.common.paddle_helper import add_vars_prefix
W
wuzewu 已提交
20

21 22
from faster_rcnn_resnet50_coco2017.processor import load_label_info, postprocess, base64_to_cv2
from faster_rcnn_resnet50_coco2017.data_feed import test_reader, padding_minibatch
W
wuzewu 已提交
23
from faster_rcnn_resnet50_coco2017.resnet import ResNet, ResNetC5
24 25 26 27
from faster_rcnn_resnet50_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, RPNHead
from faster_rcnn_resnet50_coco2017.bbox_head import MultiClassNMS, BBoxHead, SmoothL1Loss
from faster_rcnn_resnet50_coco2017.bbox_assigner import BBoxAssigner
from faster_rcnn_resnet50_coco2017.roi_extractor import RoIAlign
W
wuzewu 已提交
28 29 30 31 32 33 34 35 36


@moduleinfo(
    name="faster_rcnn_resnet50_coco2017",
    version="1.1.0",
    type="cv/object_detection",
    summary=
    "Baidu's Faster R-CNN model for object detection with backbone ResNet50, trained with dataset COCO2017",
    author="paddlepaddle",
W
wuzewu 已提交
37
    author_email="paddle-dev@baidu.com")
W
wuzewu 已提交
38 39 40 41 42
class FasterRCNNResNet50(hub.Module):
    def _initialize(self):
        # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333]
        self.default_pretrained_model_path = os.path.join(
            self.directory, "faster_rcnn_resnet50_model")
43
        self.label_names = load_label_info(
W
wuzewu 已提交
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
            os.path.join(self.directory, "label_file.txt"))
        self._set_config()

    def _set_config(self):
        """
        predictor config setting
        """
        cpu_config = AnalysisConfig(self.default_pretrained_model_path)
        cpu_config.disable_glog_info()
        cpu_config.disable_gpu()
        self.cpu_predictor = create_paddle_predictor(cpu_config)

        try:
            _places = os.environ["CUDA_VISIBLE_DEVICES"]
            int(_places[0])
            use_gpu = True
        except:
            use_gpu = False
        if use_gpu:
            gpu_config = AnalysisConfig(self.default_pretrained_model_path)
            gpu_config.disable_glog_info()
            gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0)
            self.gpu_predictor = create_paddle_predictor(gpu_config)

    def context(self,
                num_classes=81,
                trainable=True,
                pretrained=True,
                phase='train'):
        """
74 75 76
        Distill the Head Features, so as to perform transfer learning.

        Args:
W
wuzewu 已提交
77
            num_classes (int): number of categories
78 79 80 81 82 83 84 85 86 87
            trainable (bool): whether to set parameters trainable.
            pretrained (bool): whether to load default pretrained model.
            phase (str): optional choices are 'train' and 'predict'.

        Returns:
             inputs (dict): the input variables.
             outputs (dict): the output variables.
             context_prog (Program): the program to execute transfer learning.
        """
        context_prog = fluid.Program()
W
wuzewu 已提交
88
        startup_program = fluid.Program()
89
        with fluid.program_guard(context_prog, startup_program):
W
wuzewu 已提交
90 91
            with fluid.unique_name.guard():
                image = fluid.layers.data(
W
wuzewu 已提交
92
                    name='image', shape=[-1, 3, -1, -1], dtype='float32')
W
wuzewu 已提交
93 94 95 96 97 98 99 100
                # backbone
                backbone = ResNet(
                    norm_type='affine_channel',
                    depth=50,
                    feature_maps=4,
                    freeze_at=2)
                body_feats = backbone(image)

101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
                # var_prefix
                var_prefix = '@HUB_{}@'.format(self.name)
                im_info = fluid.layers.data(
                    name='im_info', shape=[3], dtype='float32', lod_level=0)
                im_shape = fluid.layers.data(
                    name='im_shape', shape=[3], dtype='float32', lod_level=0)
                body_feat_names = list(body_feats.keys())
                # rpn_head: RPNHead
                rpn_head = self.rpn_head()
                rois = rpn_head.get_proposals(body_feats, im_info, mode=phase)
                # train
                if phase == 'train':
                    gt_bbox = fluid.layers.data(
                        name='gt_bbox', shape=[4], dtype='float32', lod_level=1)
                    is_crowd = fluid.layers.data(
                        name='is_crowd', shape=[1], dtype='int32', lod_level=1)
                    gt_class = fluid.layers.data(
                        name='gt_class', shape=[1], dtype='int32', lod_level=1)
                    rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd)
                    # bbox_assigner: BBoxAssigner
                    bbox_assigner = self.bbox_assigner(num_classes)
                    outs = fluid.layers.generate_proposal_labels(
                        rpn_rois=rois,
                        gt_classes=gt_class,
                        is_crowd=is_crowd,
                        gt_boxes=gt_bbox,
                        im_info=im_info,
                        batch_size_per_im=bbox_assigner.batch_size_per_im,
                        fg_fraction=bbox_assigner.fg_fraction,
                        fg_thresh=bbox_assigner.fg_thresh,
                        bg_thresh_hi=bbox_assigner.bg_thresh_hi,
                        bg_thresh_lo=bbox_assigner.bg_thresh_lo,
                        bbox_reg_weights=bbox_assigner.bbox_reg_weights,
                        class_nums=bbox_assigner.class_nums,
                        use_random=bbox_assigner.use_random)
                    rois = outs[0]

                body_feat = body_feats[body_feat_names[-1]]
                # roi_extractor: RoIAlign
                roi_extractor = self.roi_extractor()
                roi_feat = fluid.layers.roi_align(
                    input=body_feat,
                    rois=rois,
                    pooled_height=roi_extractor.pooled_height,
                    pooled_width=roi_extractor.pooled_width,
                    spatial_scale=roi_extractor.spatial_scale,
                    sampling_ratio=roi_extractor.sampling_ratio)
                # head_feat
                bbox_head = self.bbox_head(num_classes)
                head_feat = bbox_head.head(roi_feat)
                if isinstance(head_feat, OrderedDict):
                    head_feat = list(head_feat.values())[0]
                if phase == 'train':
                    inputs = {
                        'image': var_prefix + image.name,
                        'im_info': var_prefix + im_info.name,
                        'im_shape': var_prefix + im_shape.name,
                        'gt_class': var_prefix + gt_class.name,
                        'gt_bbox': var_prefix + gt_bbox.name,
                        'is_crowd': var_prefix + is_crowd.name
                    }
                    outputs = {
W
wuzewu 已提交
163
                        'head_features':
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
                        var_prefix + head_feat.name,
                        'rpn_cls_loss':
                        var_prefix + rpn_loss['rpn_cls_loss'].name,
                        'rpn_reg_loss':
                        var_prefix + rpn_loss['rpn_reg_loss'].name,
                        'generate_proposal_labels':
                        [var_prefix + var.name for var in outs]
                    }
                elif phase == 'predict':
                    pred = bbox_head.get_prediction(roi_feat, rois, im_info,
                                                    im_shape)
                    inputs = {
                        'image': var_prefix + image.name,
                        'im_info': var_prefix + im_info.name,
                        'im_shape': var_prefix + im_shape.name
                    }
                    outputs = {
W
wuzewu 已提交
181
                        'head_features': var_prefix + head_feat.name,
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
                        'rois': var_prefix + rois.name,
                        'bbox_out': var_prefix + pred.name
                    }
                add_vars_prefix(context_prog, var_prefix)
                add_vars_prefix(startup_program, var_prefix)

                global_vars = context_prog.global_block().vars
                inputs = {
                    key: global_vars[value]
                    for key, value in inputs.items()
                }
                outputs = {
                    key: global_vars[value] if not isinstance(value, list) else
                    [global_vars[var] for var in value]
                    for key, value in outputs.items()
                }

                for param in context_prog.global_block().iter_parameters():
                    param.trainable = trainable
W
wuzewu 已提交
201 202 203

                place = fluid.CPUPlace()
                exe = fluid.Executor(place)
W
wuzewu 已提交
204
                exe.run(startup_program)
W
wuzewu 已提交
205 206 207
                if pretrained:

                    def _if_exist(var):
W
wuzewu 已提交
208 209 210
                        if num_classes != 81:
                            if 'bbox_pred' in var.name or 'cls_score' in var.name:
                                return False
W
wuzewu 已提交
211 212 213 214 215 216 217 218 219 220 221
                        return os.path.exists(
                            os.path.join(self.default_pretrained_model_path,
                                         var.name))

                    fluid.io.load_vars(
                        exe,
                        self.default_pretrained_model_path,
                        predicate=_if_exist)
                return inputs, outputs, context_prog

    def rpn_head(self):
222 223
        return RPNHead(
            anchor_generator=AnchorGenerator(
W
wuzewu 已提交
224 225 226 227
                anchor_sizes=[32, 64, 128, 256, 512],
                aspect_ratios=[0.5, 1.0, 2.0],
                stride=[16.0, 16.0],
                variance=[1.0, 1.0, 1.0, 1.0]),
228
            rpn_target_assign=RPNTargetAssign(
W
wuzewu 已提交
229 230 231 232 233
                rpn_batch_size_per_im=256,
                rpn_fg_fraction=0.5,
                rpn_negative_overlap=0.3,
                rpn_positive_overlap=0.7,
                rpn_straddle_thresh=0.0),
234
            train_proposal=GenerateProposals(
W
wuzewu 已提交
235 236 237 238
                min_size=0.0,
                nms_thresh=0.7,
                post_nms_top_n=12000,
                pre_nms_top_n=2000),
239
            test_proposal=GenerateProposals(
W
wuzewu 已提交
240 241 242 243 244 245
                min_size=0.0,
                nms_thresh=0.7,
                post_nms_top_n=6000,
                pre_nms_top_n=1000))

    def roi_extractor(self):
246
        return RoIAlign(resolution=14, sampling_ratio=0, spatial_scale=0.0625)
W
wuzewu 已提交
247 248

    def bbox_head(self, num_classes):
249
        return BBoxHead(
W
wuzewu 已提交
250
            head=ResNetC5(depth=50, norm_type='affine_channel'),
251
            nms=MultiClassNMS(
W
wuzewu 已提交
252
                keep_top_k=100, nms_threshold=0.5, score_threshold=0.05),
253
            bbox_loss=SmoothL1Loss(),
W
wuzewu 已提交
254 255 256
            num_classes=num_classes)

    def bbox_assigner(self, num_classes):
257
        return BBoxAssigner(
W
wuzewu 已提交
258 259 260 261 262 263 264 265
            batch_size_per_im=512,
            bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
            bg_thresh_hi=0.5,
            bg_thresh_lo=0.0,
            fg_fraction=0.25,
            fg_thresh=0.5,
            class_nums=num_classes)

W
wuzewu 已提交
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
    def save_inference_model(self,
                             dirname,
                             model_filename=None,
                             params_filename=None,
                             combined=True):
        if combined:
            model_filename = "__model__" if not model_filename else model_filename
            params_filename = "__params__" if not params_filename else params_filename
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)

        program, feeded_var_names, target_vars = fluid.io.load_inference_model(
            dirname=self.default_pretrained_model_path, executor=exe)

        fluid.io.save_inference_model(
            dirname=dirname,
            main_program=program,
            executor=exe,
            feeded_var_names=feeded_var_names,
            target_vars=target_vars,
            model_filename=model_filename,
            params_filename=params_filename)

W
wuzewu 已提交
289 290 291 292 293 294 295 296 297 298 299
    def object_detection(self,
                         paths=None,
                         images=None,
                         data=None,
                         use_gpu=False,
                         batch_size=1,
                         output_dir='detection_result',
                         score_thresh=0.5,
                         visualization=True):
        """API of Object Detection.

300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
        Args:
            paths (list[str]): The paths of images.
            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
            batch_size (int): batch size.
            use_gpu (bool): Whether to use gpu.
            output_dir (str): The path to store output images.
            visualization (bool): Whether to save image or not.
            score_thresh (float): threshold for object detecion.

        Returns:
            res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
                data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
                    left (float): The X coordinate of the upper left corner of the bounding box;
                    top (float): The Y coordinate of the upper left corner of the bounding box;
                    right (float): The X coordinate of the lower right corner of the bounding box;
                    bottom (float): The Y coordinate of the lower right corner of the bounding box;
                    label (str): The label of detection result;
                    confidence (float): The confidence of detection result.
                save_path (str, optional): The path to save output images.
W
wuzewu 已提交
319
        """
W
wuzewu 已提交
320 321 322 323 324 325
        if use_gpu:
            try:
                _places = os.environ["CUDA_VISIBLE_DEVICES"]
                int(_places[0])
            except:
                raise RuntimeError(
326
                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id."
W
wuzewu 已提交
327
                )
328
        paths = paths if paths else list()
W
wuzewu 已提交
329
        if data and 'image' in data:
330 331 332 333
            paths += data['image']

        all_images = list()
        for yield_return in test_reader(paths, images):
W
wuzewu 已提交
334 335 336 337 338 339 340 341 342 343 344 345 346
            all_images.append(yield_return)

        images_num = len(all_images)
        loop_num = ceil(images_num / batch_size)
        res = []
        for iter_id in range(loop_num):
            batch_data = []
            handle_id = iter_id * batch_size
            for image_id in range(batch_size):
                try:
                    batch_data.append(all_images[handle_id + image_id])
                except:
                    pass
347 348

            padding_image, padding_info, padding_shape = padding_minibatch(
W
wuzewu 已提交
349 350 351 352 353 354 355 356 357 358 359
                batch_data)
            padding_image_tensor = PaddleTensor(padding_image.copy())
            padding_info_tensor = PaddleTensor(padding_info.copy())
            padding_shape_tensor = PaddleTensor(padding_shape.copy())
            feed_list = [
                padding_image_tensor, padding_info_tensor, padding_shape_tensor
            ]
            if use_gpu:
                data_out = self.gpu_predictor.run(feed_list)
            else:
                data_out = self.cpu_predictor.run(feed_list)
360
            output = postprocess(
W
wuzewu 已提交
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
                paths=paths,
                images=images,
                data_out=data_out,
                score_thresh=score_thresh,
                label_names=self.label_names,
                output_dir=output_dir,
                handle_id=handle_id,
                visualization=visualization)
            res += output
        return res

    def add_module_config_arg(self):
        """
        Add the command config options
        """
        self.arg_config_group.add_argument(
            '--use_gpu',
            type=ast.literal_eval,
            default=False,
            help="whether use GPU or not")

        self.arg_config_group.add_argument(
            '--batch_size',
            type=int,
            default=1,
            help="batch size for prediction")

    def add_module_input_arg(self):
        """
        Add the command input options
        """
        self.arg_input_group.add_argument(
            '--input_path', type=str, default=None, help="input data")

        self.arg_input_group.add_argument(
            '--input_file',
            type=str,
            default=None,
            help="file contain input data")

    def check_input_data(self, args):
        input_data = []
        if args.input_path:
            input_data = [args.input_path]
        elif args.input_file:
            if not os.path.exists(args.input_file):
                raise RuntimeError("File %s is not exist." % args.input_file)
            else:
                input_data = txt_parser.parse(args.input_file, use_strip=True)
        return input_data

412 413 414 415 416 417
    @serving
    def serving_method(self, images, **kwargs):
        """
        Run as a service.
        """
        images_decode = [base64_to_cv2(image) for image in images]
W
wuzewu 已提交
418
        results = self.object_detection(images=images_decode, **kwargs)
419 420
        return results

W
wuzewu 已提交
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
    @runnable
    def run_cmd(self, argvs):
        self.parser = argparse.ArgumentParser(
            description="Run the {}".format(self.name),
            prog="hub run {}".format(self.name),
            usage='%(prog)s',
            add_help=True)
        self.arg_input_group = self.parser.add_argument_group(
            title="Input options", description="Input data. Required")
        self.arg_config_group = self.parser.add_argument_group(
            title="Config options",
            description=
            "Run configuration for controlling module behavior, not required.")
        self.add_module_config_arg()

        self.add_module_input_arg()
        args = self.parser.parse_args(argvs)
        input_data = self.check_input_data(args)
        if len(input_data) == 0:
            self.parser.print_help()
            exit(1)
        else:
            for image_path in input_data:
                if not os.path.exists(image_path):
                    raise RuntimeError(
                        "File %s or %s is not exist." % image_path)
        return self.object_detection(
            paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size)