提交 b00deb54 编写于 作者: K Kaipeng Deng 提交者: GitHub

Add Yolov3 model based on PascalVOC and add voc metrics by Python. (#2801)

* add voc_eval and yolo_darknet_voc

* add yolov3_darknet_voc in MODEL_ZOO

* fix default im_size

* fix MODEL_ZOO note

* fix is_bbox_normalized

* extract map to map_utils.py

* update yolov3_dorknet_voc mixup

* add yolov3_r34_voc

* add yolov3_mobilenet_v1_voc

* fix drop empty in VAL mode

* use cfg.num_classes

* assert metric valid

* enable difficulty can be None

* add comment for bbox_eval

* num_classes in retinanet
上级 2a5fd326
......@@ -10,6 +10,7 @@ log_smooth_window: 20
snapshot_iter: 10000
metric: COCO
save_dir: output
num_classes: 81
RetinaNet:
backbone: ResNet
......@@ -38,7 +39,6 @@ RetinaHead:
prior_prob: 0.01
base_scale: 4
num_scales_per_octave: 3
num_classes: 81
anchor_generator:
aspect_ratios: [1.0, 2.0, 0.5]
variance: [1.0, 1.0, 1.0, 1.0]
......
......@@ -10,6 +10,7 @@ log_smooth_window: 20
snapshot_iter: 10000
metric: COCO
save_dir: output
num_classes: 81
RetinaNet:
backbone: ResNet
......@@ -38,7 +39,6 @@ RetinaHead:
prior_prob: 0.01
base_scale: 4
num_scales_per_octave: 3
num_classes: 81
anchor_generator:
aspect_ratios: [1.0, 2.0, 0.5]
variance: [1.0, 1.0, 1.0, 1.0]
......
......@@ -30,6 +30,7 @@ SSD:
MobileNet:
norm_decay: 0.
conv_group_scale: 1
conv_learning_rate: 0.1
extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
with_extra_blocks: true
......
......@@ -10,6 +10,7 @@ snapshot_iter: 2000
metric: COCO
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar
weights: output/yolov3_darknet/model_final
num_classes: 80
YOLOv3:
backbone: DarkNet
......@@ -35,7 +36,6 @@ YOLOv3Head:
nms_top_k: 1000
normalized: false
score_threshold: 0.01
num_classes: 80
LearningRate:
base_lr: 0.001
......
architecture: YOLOv3
train_feed: YoloTrainFeed
eval_feed: YoloEvalFeed
test_feed: YoloTestFeed
use_gpu: true
max_iters: 70000
log_smooth_window: 20
save_dir: output
snapshot_iter: 2000
metric: VOC
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar
weights: output/yolov3_darknet_voc/model_final
num_classes: 20
YOLOv3:
backbone: DarkNet
yolo_head: YOLOv3Head
DarkNet:
norm_type: sync_bn
norm_decay: 0.
depth: 53
YOLOv3Head:
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
norm_decay: 0.
ignore_thresh: 0.7
label_smooth: false
nms:
background_label: -1
keep_top_k: 100
nms_threshold: 0.45
nms_top_k: 1000
normalized: false
score_threshold: 0.01
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 55000
- 62000
- !LinearWarmup
start_factor: 0.
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
YoloTrainFeed:
batch_size: 8
dataset:
dataset_dir: dataset/voc
annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
image_dir: VOCdevkit/VOC_all/JPEGImages
use_default_label: true
num_workers: 8
bufsize: 128
use_process: true
mixup_epoch: 250
YoloEvalFeed:
batch_size: 8
dataset:
dataset_dir: dataset/voc
annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
image_dir: VOCdevkit/VOC_all/JPEGImages
use_default_label: true
YoloTestFeed:
batch_size: 1
dataset:
use_default_label: true
......@@ -10,6 +10,7 @@ snapshot_iter: 2000
metric: COCO
pretrain_weights: http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar
weights: output/yolov3_mobilenet_v1/model_final
num_classes: 80
YOLOv3:
backbone: MobileNet
......@@ -36,7 +37,6 @@ YOLOv3Head:
nms_top_k: 1000
normalized: false
score_threshold: 0.01
num_classes: 80
LearningRate:
base_lr: 0.001
......
architecture: YOLOv3
train_feed: YoloTrainFeed
eval_feed: YoloEvalFeed
test_feed: YoloTestFeed
use_gpu: true
max_iters: 70000
log_smooth_window: 20
save_dir: output
snapshot_iter: 2000
metric: VOC
pretrain_weights: http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar
weights: output/yolov3_mobilenet_v1_voc/model_final
num_classes: 20
YOLOv3:
backbone: MobileNet
yolo_head: YOLOv3Head
MobileNet:
norm_type: sync_bn
norm_decay: 0.
conv_group_scale: 1
with_extra_blocks: false
YOLOv3Head:
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
norm_decay: 0.
ignore_thresh: 0.7
label_smooth: false
nms:
background_label: -1
keep_top_k: 100
nms_threshold: 0.45
nms_top_k: 1000
normalized: false
score_threshold: 0.01
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 55000
- 62000
- !LinearWarmup
start_factor: 0.
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
YoloTrainFeed:
batch_size: 8
dataset:
dataset_dir: dataset/voc
annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
image_dir: VOCdevkit/VOC_all/JPEGImages
use_default_label: true
num_workers: 8
bufsize: 128
use_process: true
mixup_epoch: 250
YoloEvalFeed:
batch_size: 8
dataset:
dataset_dir: dataset/voc
annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
image_dir: VOCdevkit/VOC_all/JPEGImages
use_default_label: true
YoloTestFeed:
batch_size: 1
dataset:
use_default_label: true
......@@ -10,6 +10,7 @@ snapshot_iter: 2000
metric: COCO
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar
weights: output/yolov3_r34/model_final
num_classes: 80
YOLOv3:
backbone: ResNet
......@@ -38,7 +39,6 @@ YOLOv3Head:
nms_top_k: 1000
normalized: false
score_threshold: 0.01
num_classes: 80
LearningRate:
base_lr: 0.001
......
architecture: YOLOv3
train_feed: YoloTrainFeed
eval_feed: YoloEvalFeed
test_feed: YoloTestFeed
use_gpu: true
max_iters: 70000
log_smooth_window: 20
save_dir: output
snapshot_iter: 2000
metric: VOC
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar
weights: output/yolov3_r34_voc/model_final
num_classes: 20
YOLOv3:
backbone: ResNet
yolo_head: YOLOv3Head
ResNet:
norm_type: sync_bn
freeze_at: 0
freeze_norm: false
norm_decay: 0.
depth: 34
feature_maps: [3, 4, 5]
YOLOv3Head:
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
norm_decay: 0.
ignore_thresh: 0.7
label_smooth: false
nms:
background_label: -1
keep_top_k: 100
nms_threshold: 0.45
nms_top_k: 1000
normalized: false
score_threshold: 0.01
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 55000
- 62000
- !LinearWarmup
start_factor: 0.
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
YoloTrainFeed:
batch_size: 8
dataset:
dataset_dir: dataset/voc
annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
image_dir: VOCdevkit/VOC_all/JPEGImages
use_default_label: true
num_workers: 8
bufsize: 128
use_process: true
mixup_epoch: 250
YoloEvalFeed:
batch_size: 8
dataset:
dataset_dir: dataset/voc
annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
image_dir: VOCdevkit/VOC_all/JPEGImages
use_default_label: true
YoloTestFeed:
batch_size: 1
dataset:
use_default_label: true
......@@ -69,6 +69,20 @@ The backbone models pretrained on ImageNet are available. All backbone models ar
| ResNet34 | 416 | 8 | 270e | 34.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar) |
| ResNet34 | 320 | 8 | 270e | 31.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar) |
### Yolo v3 on Pascal VOC
| Backbone | Size | Image/gpu | Lr schd | Box AP | Download |
| :----------- | :--: | :-----: | :-----: | :----: | :-------: |
| DarkNet53 | 608 | 8 | 270e | 83.5 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet_voc.tar) |
| DarkNet53 | 416 | 8 | 270e | 83.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet_voc.tar) |
| DarkNet53 | 320 | 8 | 270e | 82.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet_voc.tar) |
| MobileNet-V1 | 608 | 8 | 270e | 76.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar) |
| MobileNet-V1 | 416 | 8 | 270e | 76.7 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar) |
| MobileNet-V1 | 320 | 8 | 270e | 75.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar) |
| ResNet34 | 608 | 8 | 270e | 82.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34_voc.tar) |
| ResNet34 | 416 | 8 | 270e | 81.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34_voc.tar) |
| ResNet34 | 320 | 8 | 270e | 80.1 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34_voc.tar) |
**NOTE**: Yolo v3 is trained in 8 GPU with total batch size as 64 and trained 270 epoches. Yolo v3 training data augmentations: mixup,
randomly color distortion, randomly cropping, randomly expansion, randomly interpolation method, randomly flippling. Yolo v3 used randomly
reshaped minibatch in training, inferences can be performed on different image sizes with the same model weights, and we provided evaluation
......
......@@ -30,7 +30,8 @@ from ppdet.data.transform.operators import (
Permute)
from ppdet.data.transform.arrange_sample import (ArrangeRCNN, ArrangeTestRCNN,
ArrangeSSD, ArrangeTestSSD,
ArrangeYOLO, ArrangeTestYOLO)
ArrangeYOLO, ArrangeEvalYOLO,
ArrangeTestYOLO)
__all__ = [
'PadBatch', 'MultiScale', 'RandomShape', 'DataSet', 'CocoDataSet',
......@@ -891,7 +892,8 @@ class YoloEvalFeed(DataFeed):
def __init__(self,
dataset=CocoDataSet(COCO_VAL_ANNOTATION,
COCO_VAL_IMAGE_DIR).__dict__,
fields=['image', 'im_shape', 'im_id'],
fields=['image', 'im_size', 'im_id', 'gt_box',
'gt_label', 'is_difficult'],
image_shape=[3, 608, 608],
sample_transforms=[
DecodeImage(to_rgb=True),
......@@ -912,7 +914,7 @@ class YoloEvalFeed(DataFeed):
num_workers=8,
num_max_boxes=50,
use_process=False):
sample_transforms.append(ArrangeTestYOLO())
sample_transforms.append(ArrangeEvalYOLO())
super(YoloEvalFeed, self).__init__(
dataset,
fields,
......@@ -926,7 +928,6 @@ class YoloEvalFeed(DataFeed):
with_background=with_background,
num_workers=num_workers,
use_process=use_process)
self.num_max_boxes = num_max_boxes
self.mode = 'VAL'
self.bufsize = 128
......@@ -938,7 +939,7 @@ class YoloTestFeed(DataFeed):
def __init__(self,
dataset=SimpleDataSet(COCO_VAL_ANNOTATION,
COCO_VAL_IMAGE_DIR).__dict__,
fields=['image', 'im_shape', 'im_id'],
fields=['image', 'im_size', 'im_id'],
image_shape=[3, 608, 608],
sample_transforms=[
DecodeImage(to_rgb=True),
......@@ -974,6 +975,5 @@ class YoloTestFeed(DataFeed):
with_background=with_background,
num_workers=num_workers,
use_process=use_process)
self.num_max_boxes = num_max_boxes
self.mode = 'TEST'
self.bufsize = 128
......@@ -63,7 +63,10 @@ class Reader(object):
worker_args = {k.lower(): v for k, v in worker_args.items()}
mapped_ds = map(sc, mapper, worker_args)
batched_ds = batch(mapped_ds, batchsize, drop_last)
# In VAL mode, gt_bbox, gt_label can be empty, and should
# not be dropped
batched_ds = batch(mapped_ds, batchsize, drop_last,
drop_empty=(mode!="VAL"))
trans_conf = {k.lower(): v for k, v in self._trans_conf[mode].items()}
need_keys = {
......
......@@ -105,7 +105,7 @@ def map(ds, mapper, worker_args=None):
return MappedDataset(ds, mapper)
def batch(ds, batchsize, drop_last=False):
def batch(ds, batchsize, drop_last=False, drop_empty=True):
"""
Batch data samples to batches
Args:
......@@ -116,7 +116,10 @@ def batch(ds, batchsize, drop_last=False):
a batched dataset
"""
return BatchedDataset(ds, batchsize, drop_last=drop_last)
return BatchedDataset(ds,
batchsize,
drop_last=drop_last,
drop_empty=drop_empty)
def batch_map(ds, config):
......
......@@ -228,10 +228,44 @@ class ArrangeYOLO(BaseOperator):
return outs
@register_op
class ArrangeEvalYOLO(BaseOperator):
"""
Transform dict to the tuple format needed for evaluation.
"""
def __init__(self):
super(ArrangeEvalYOLO, self).__init__()
def __call__(self, sample, context=None):
"""
Args:
sample: a dict which contains image
info and annotation info.
context: a dict which contains additional info.
Returns:
sample: a tuple containing the following items:
(image, gt_bbox, gt_class, gt_score,
is_crowd, im_info, gt_masks)
"""
im = sample['image']
if len(sample['gt_bbox']) != len(sample['gt_class']):
raise ValueError("gt num mismatch: bbox and class.")
im_id = sample['im_id']
h = sample['h']
w = sample['w']
im_shape = np.array((h, w))
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
difficult = sample['difficult']
outs = (im, im_shape, im_id, gt_bbox, gt_class, difficult)
return outs
@register_op
class ArrangeTestYOLO(BaseOperator):
"""
Transform dict to the tuple format needed for training.
Transform dict to the tuple format needed for inference.
"""
def __init__(self):
......
......@@ -66,12 +66,14 @@ class BatchedDataset(ProxiedDataset):
ds (instance of Dataset): dataset to be batched
batchsize (int): sample number for each batch
drop_last (bool): drop last samples when not enough for one batch
drop_empty (bool): drop samples which have empty field
"""
def __init__(self, ds, batchsize, drop_last=False):
def __init__(self, ds, batchsize, drop_last=False, drop_empty=True):
super(BatchedDataset, self).__init__(ds)
self._batchsz = batchsize
self._drop_last = drop_last
self._drop_empty = drop_empty
def next(self):
"""proxy to self._ds.next"""
......@@ -95,7 +97,7 @@ class BatchedDataset(ProxiedDataset):
for _ in range(self._batchsz):
try:
out = self._ds.next()
while has_empty(out):
while self._drop_empty and has_empty(out):
out = self._ds.next()
batch.append(out)
except StopIteration:
......
......@@ -41,6 +41,7 @@ class YOLOv3Head(object):
nms (object): an instance of `MultiClassNMS`
"""
__inject__ = ['nms']
__shared__ = ['num_classes']
def __init__(self,
norm_decay=0.,
......@@ -277,13 +278,13 @@ class YOLOv3Head(object):
return sum(losses)
def get_prediction(self, input, im_shape):
def get_prediction(self, input, im_size):
"""
Get prediction result of YOLOv3 network
Args:
input (list): List of Variables, output of backbone stages
im_shape (Variable): Variable of shape([h, w]) of each image
im_size (Variable): Variable of size([h, w]) of each image
Returns:
pred (Variable): The prediction result after non-max suppress.
......@@ -298,7 +299,7 @@ class YOLOv3Head(object):
for i, output in enumerate(outputs):
box, score = fluid.layers.yolo_box(
x=output,
img_size=im_shape,
img_size=im_size,
anchors=self.mask_anchors[i],
class_num=self.num_classes,
conf_thresh=self.nms.score_threshold,
......
......@@ -59,8 +59,8 @@ class YOLOv3(object):
gt_score)
}
else:
im_shape = feed_vars['im_shape']
return self.yolo_head.get_prediction(body_feats, im_shape)
im_size = feed_vars['im_size']
return self.yolo_head.get_prediction(body_feats, im_size)
def train(self, feed_vars):
return self.build(feed_vars, mode='train')
......
......@@ -42,12 +42,14 @@ class MobileNet(object):
norm_type='bn',
norm_decay=0.,
conv_group_scale=1,
conv_learning_rate=1.0,
with_extra_blocks=False,
extra_block_filters=[[256, 512], [128, 256], [128, 256],
[64, 128]]):
self.norm_type = norm_type
self.norm_decay = norm_decay
self.conv_group_scale = conv_group_scale
self.conv_learning_rate = conv_learning_rate
self.with_extra_blocks = with_extra_blocks
self.extra_block_filters = extra_block_filters
......@@ -62,7 +64,7 @@ class MobileNet(object):
use_cudnn=True,
name=None):
parameter_attr = ParamAttr(
learning_rate=0.1,
learning_rate=self.conv_learning_rate,
initializer=fluid.initializer.MSRA(),
name=name + "_weights")
conv = fluid.layers.conv2d(
......
......@@ -32,7 +32,8 @@ feed_var_def = [
{'name': 'gt_mask', 'shape': [2], 'dtype': 'float32', 'lod_level': 3},
{'name': 'is_difficult', 'shape': [1], 'dtype': 'int32', 'lod_level': 1},
{'name': 'gt_score', 'shape': [1], 'dtype': 'float32', 'lod_level': 0},
{'name': 'im_shape', 'shape': [3], 'dtype': 'float32', 'lod_level': 0},
{'name': 'im_shape', 'shape': [3], 'dtype': 'float32', 'lod_level': 0},
{'name': 'im_size', 'shape': [2], 'dtype': 'int32', 'lod_level': 0},
]
# yapf: enable
......@@ -47,7 +48,8 @@ def create_feed(feed, use_pyreader=True):
'lod_level': 0
}
# YOLO var dim is fixed
# tensor padding with 0 is used instead of LoD tensor when
# num_max_boxes is set
if getattr(feed, 'num_max_boxes', None) is not None:
feed_var_map['gt_label']['shape'] = [feed.num_max_boxes]
feed_var_map['gt_score']['shape'] = [feed.num_max_boxes]
......@@ -55,8 +57,6 @@ def create_feed(feed, use_pyreader=True):
feed_var_map['gt_label']['lod_level'] = 0
feed_var_map['gt_score']['lod_level'] = 0
feed_var_map['gt_box']['lod_level'] = 0
feed_var_map['im_shape']['shape'] = [2]
feed_var_map['im_shape']['dtype'] = 'int32'
feed_vars = OrderedDict([(key, fluid.layers.data(
name=feed_var_map[key]['name'],
......
......@@ -21,6 +21,8 @@ import numpy as np
import paddle.fluid as fluid
from ppdet.utils.voc_eval import bbox_eval as voc_bbox_eval
__all__ = ['parse_fetches', 'eval_run', 'eval_results']
logger = logging.getLogger(__name__)
......@@ -88,7 +90,13 @@ def eval_run(exe, compile_program, pyreader, keys, values, cls):
return results
def eval_results(results, feed, metric, resolution=None, output_file=None):
def eval_results(results,
feed,
metric,
num_classes,
resolution=None,
is_bbox_normalized=False,
output_file=None):
"""Evaluation for evaluation program results"""
if metric == 'COCO':
from ppdet.utils.coco_eval import proposal_eval, bbox_eval, mask_eval
......@@ -110,5 +118,9 @@ def eval_results(results, feed, metric, resolution=None, output_file=None):
output = '{}_mask.json'.format(output_file)
mask_eval(results, anno_file, output, resolution)
else:
res = np.mean(results[-1]['accum_map'][0])
logger.info('Test mAP: {}'.format(res))
if 'accum_map' in results[-1]:
res = np.mean(results[-1]['accum_map'][0])
logger.info('mAP: {:.2f}'.format(res * 100.))
elif 'bbox' in results[0]:
voc_bbox_eval(results, num_classes,
is_bbox_normalized=is_bbox_normalized)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import sys
import numpy as np
import logging
logger = logging.getLogger(__name__)
__all__ = [
'bbox_area', 'jaccard_overlap', 'DetectionMAP'
]
def bbox_area(bbox, is_bbox_normalized):
"""
Calculate area of a bounding box
"""
norm = 1. - float(is_bbox_normalized)
width = bbox[2] - bbox[0] + norm
height = bbox[3] - bbox[1] + norm
return width * height
def jaccard_overlap(pred, gt, is_bbox_normalized=False):
"""
Calculate jaccard overlap ratio between two bounding box
"""
if pred[0] >= gt[2] or pred[2] <= gt[0] or \
pred[1] >= gt[3] or pred[3] <= gt[1]:
return 0.
inter_xmin = max(pred[0], gt[0])
inter_ymin = max(pred[1], gt[1])
inter_xmax = min(pred[2], gt[2])
inter_ymax = min(pred[3], gt[3])
inter_size = bbox_area([inter_xmin, inter_ymin,
inter_xmax, inter_ymax],
is_bbox_normalized)
pred_size = bbox_area(pred, is_bbox_normalized)
gt_size = bbox_area(gt, is_bbox_normalized)
overlap = float(inter_size) / (
pred_size + gt_size - inter_size)
return overlap
class DetectionMAP(object):
"""
Calculate detection mean average precision.
Currently support two types: 11point and integral
Args:
class_num (int): the class number.
overlap_thresh (float): The threshold of overlap
ratio between prediction bounding box and
ground truth bounding box for deciding
true/false positive. Default 0.5.
map_type (str): calculation method of mean average
precision, currently support '11point' and
'integral'. Default '11point'.
is_bbox_normalized (bool): whther bounding boxes
is normalized to range[0, 1]. Default False.
evaluate_difficult (bool): whether to evaluate
difficult bounding boxes. Default False.
"""
def __init__(self,
class_num,
overlap_thresh=0.5,
map_type='11point',
is_bbox_normalized=False,
evaluate_difficult=False):
self.class_num = class_num
self.overlap_thresh = overlap_thresh
assert map_type in ['11point', 'integral'], \
"map_type currently only support '11point' "\
"and 'integral'"
self.map_type = map_type
self.is_bbox_normalized = is_bbox_normalized
self.evaluate_difficult = evaluate_difficult
self.reset()
def update(self, bbox, gt_box, gt_label, difficult=None):
"""
Update metric statics from given prediction and ground
truth infomations.
"""
if difficult is None:
difficult = np.zeros_like(gt_label)
# record class gt count
for gtl, diff in zip(gt_label, difficult):
if self.evaluate_difficult or int(diff) == 0:
self.class_gt_counts[int(gtl[0])] += 1
# record class score positive
visited = [False] * len(gt_label)
for b in bbox:
label, score, xmin, ymin, xmax, ymax = b.tolist()
pred = [xmin, ymin, xmax, ymax]
max_idx = -1
max_overlap = -1.0
for i, gl in enumerate(gt_label):
if int(gl) == int(label):
overlap = jaccard_overlap(pred, gt_box[i],
self.is_bbox_normalized)
if overlap > max_overlap:
max_overlap = overlap
max_idx = i
if max_overlap > self.overlap_thresh:
if self.evaluate_difficult or \
int(difficult[max_idx]) == 0:
if not visited[max_idx]:
self.class_score_poss[
int(label)].append([score, 1.0])
visited[max_idx] = True
else:
self.class_score_poss[
int(label)].append([score, 0.0])
else:
self.class_score_poss[
int(label)].append([score, 0.0])
def reset(self):
"""
Reset metric statics
"""
self.class_score_poss = [[] for _ in range(self.class_num)]
self.class_gt_counts = [0] * self.class_num
self.mAP = None
def accumulate(self):
"""
Accumulate metric results and calculate mAP
"""
mAP = 0.
valid_cnt = 0
for score_pos, count in zip(self.class_score_poss,
self.class_gt_counts):
if count == 0 or len(score_pos) == 0:
continue
accum_tp_list, accum_fp_list = \
self._get_tp_fp_accum(score_pos)
precision = []
recall = []
for ac_tp, ac_fp in zip(accum_tp_list, accum_fp_list):
precision.append(float(ac_tp) / (ac_tp + ac_fp))
recall.append(float(ac_tp) / count)
if self.map_type == '11point':
max_precisions = [0.] * 11
start_idx = len(precision) - 1
for j in range(10, -1, -1):
for i in range(start_idx, -1, -1):
if recall[i] < float(j) / 10.:
start_idx = i
if j > 0:
max_precisions[j - 1] = max_precisions[j]
break
else:
if max_precisions[j] < precision[i]:
max_precisions[j] = precision[i]
mAP += sum(max_precisions) / 11.
valid_cnt += 1
elif self.map_type == 'integral':
import math
ap = 0.
prev_recall = 0.
for i in range(len(precision)):
recall_gap = math.fabs(recall[i] - prev_recall)
if recall_gap > 1e-6:
ap += precision[i] * recall_gap
prev_recall = recall[i]
mAP += ap
valid_cnt += 1
else:
logger.error("Unspported mAP type {}".format(map_type))
sys.exit(1)
self.mAP = mAP / float(valid_cnt) if valid_cnt > 0 else mAP
def get_map(self):
"""
Get mAP result
"""
if self.mAP is None:
logger.error("mAP is not calculated.")
return self.mAP
def _get_tp_fp_accum(self, score_pos_list):
"""
Calculate accumulating true/false positive results from
[score, pos] records
"""
sorted_list = sorted(score_pos_list,
key=lambda s: s[0],
reverse=True)
accum_tp = 0
accum_fp = 0
accum_tp_list = []
accum_fp_list = []
for (score, pos) in sorted_list:
accum_tp += int(pos)
accum_tp_list.append(accum_tp)
accum_fp += 1 - int(pos)
accum_fp_list.append(accum_fp)
return accum_tp_list, accum_fp_list
......@@ -22,16 +22,83 @@ import sys
import numpy as np
from ..data.source.voc_loader import pascalvoc_label
from .map_utils import DetectionMAP
from .coco_eval import bbox2out
import logging
logger = logging.getLogger(__name__)
__all__ = [
'bbox2out', 'get_category_info'
'bbox_eval', 'bbox2out', 'get_category_info'
]
def bbox_eval(results,
class_num,
overlap_thresh=0.5,
map_type='11point',
is_bbox_normalized=False,
evaluate_difficult=False):
"""
Bounding box evaluation for VOC dataset
Args:
results (list): prediction bounding box results.
class_num (int): evaluation class number.
overlap_thresh (float): the postive threshold of
bbox overlap
map_type (string): method for mAP calcualtion,
can only be '11point' or 'integral'
is_bbox_normalized (bool): whether bbox is normalized
to range [0, 1].
evaluate_difficult (bool): whether to evaluate
difficult gt bbox.
"""
assert 'bbox' in results[0]
logger.info("Start evaluate...")
detection_map = DetectionMAP(class_num=class_num,
overlap_thresh=overlap_thresh,
map_type=map_type,
is_bbox_normalized=is_bbox_normalized,
evaluate_difficult=evaluate_difficult)
for t in results:
bboxes = t['bbox'][0]
bbox_lengths = t['bbox'][1][0]
if bboxes.shape == (1, 1) or bboxes is None:
continue
gt_boxes = t['gt_box'][0]
gt_box_lengths = t['gt_box'][1][0]
gt_labels = t['gt_label'][0]
assert len(gt_boxes) == len(gt_labels)
difficults = t['is_difficult'][0] if not evaluate_difficult \
else None
if not evaluate_difficult:
assert len(gt_labels) == len(difficults)
bbox_idx = 0
gt_box_idx = 0
for i in range(len(bbox_lengths)):
bbox_num = bbox_lengths[i]
gt_box_num = gt_box_lengths[i]
bbox = bboxes[bbox_idx: bbox_idx + bbox_num]
gt_box = gt_boxes[gt_box_idx: gt_box_idx + gt_box_num]
gt_label = gt_labels[gt_box_idx: gt_box_idx + gt_box_num]
difficult = None if difficults is None else \
difficults[gt_box_idx: gt_box_idx + gt_box_num]
detection_map.update(bbox, gt_box, gt_label, difficult)
bbox_idx += bbox_num
gt_box_idx += gt_box_num
logger.info("Accumulating evaluatation results...")
detection_map.accumulate()
logger.info("mAP({:.2f}, {}) = {:.2f}".format(overlap_thresh,
map_type, 100. * detection_map.get_map()))
def get_category_info(anno_file=None,
with_background=True,
use_default_label=False):
......
......@@ -93,18 +93,29 @@ def main():
if 'weights' in cfg:
checkpoint.load_pretrain(exe, eval_prog, cfg.weights)
assert cfg.metric in ['COCO', 'VOC'], \
"unknown metric type {}".format(cfg.metric)
extra_keys = []
if 'metric' in cfg and cfg.metric == 'COCO':
if cfg.metric == 'COCO':
extra_keys = ['im_info', 'im_id', 'im_shape']
if cfg.metric == 'VOC':
extra_keys = ['gt_box', 'gt_label', 'is_difficult']
keys, values, cls = parse_fetches(fetches, eval_prog, extra_keys)
# whether output bbox is normalized in model output layer
is_bbox_normalized = False
if hasattr(model, 'is_bbox_normalized') and \
callable(model.is_bbox_normalized):
is_bbox_normalized = model.is_bbox_normalized()
results = eval_run(exe, compile_program, pyreader, keys, values, cls)
# evaluation
resolution = None
if 'mask' in results[0]:
resolution = model.mask_head.resolution
eval_results(results, eval_feed, cfg.metric, resolution, FLAGS.output_file)
eval_results(results, eval_feed, cfg.metric, cfg.num_classes,
resolution, is_bbox_normalized, FLAGS.output_file)
if __name__ == '__main__':
......
......@@ -169,6 +169,8 @@ def main():
save_infer_model(FLAGS, exe, feed_vars, test_fetches, infer_prog)
# parse infer fetches
assert cfg.metric in ['COCO', 'VOC'], \
"unknown metric type {}".format(cfg.metric)
extra_keys = []
if cfg['metric'] == 'COCO':
extra_keys = ['im_info', 'im_id', 'im_shape']
......
......@@ -156,6 +156,12 @@ def main():
elif cfg.pretrain_weights:
checkpoint.load_pretrain(exe, train_prog, cfg.pretrain_weights)
# whether output bbox is normalized in model output layer
is_bbox_normalized = False
if hasattr(model, 'is_bbox_normalized') and \
callable(model.is_bbox_normalized):
is_bbox_normalized = model.is_bbox_normalized()
train_stats = TrainingStats(cfg.log_smooth_window, train_keys)
train_pyreader.start()
start_time = time.time()
......@@ -191,8 +197,8 @@ def main():
resolution = None
if 'mask' in results[0]:
resolution = model.mask_head.resolution
eval_results(results, eval_feed, cfg.metric, resolution,
FLAGS.output_file)
eval_results(results, eval_feed, cfg.metric, cfg.num_classes,
resolution, is_bbox_normalized, FLAGS.output_file)
train_pyreader.reset()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册