未验证 提交 b4727677 编写于 作者: W wangxinxin08 提交者: GitHub

refactor s2anet (#6604)

* refactor s2anet to support batch_size > 1

* fix problem of inference

* support batch_size > 1 for training

* fix empty results

* fix dota eval

* fix configs of s2anet_head

* modify s2anet_spine_1x to 73 mAP
上级 42a4d707
...@@ -13,6 +13,7 @@ EvalDataset: ...@@ -13,6 +13,7 @@ EvalDataset:
image_dir: trainval_split/images image_dir: trainval_split/images
anno_path: trainval_split/s2anet_trainval_paddle_coco.json anno_path: trainval_split/s2anet_trainval_paddle_coco.json
dataset_dir: dataset/DOTA_1024_s2anet/ dataset_dir: dataset/DOTA_1024_s2anet/
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_rbox']
TestDataset: TestDataset:
!ImageFolder !ImageFolder
......
...@@ -13,6 +13,7 @@ EvalDataset: ...@@ -13,6 +13,7 @@ EvalDataset:
image_dir: images image_dir: images
anno_path: annotations/valid.json anno_path: annotations/valid.json
dataset_dir: dataset/spine_coco dataset_dir: dataset/spine_coco
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_rbox']
TestDataset: TestDataset:
!ImageFolder !ImageFolder
......
...@@ -7,8 +7,7 @@ weights: output/s2anet_r50_fpn_1x_dota/model_final.pdparams ...@@ -7,8 +7,7 @@ weights: output/s2anet_r50_fpn_1x_dota/model_final.pdparams
S2ANet: S2ANet:
backbone: ResNet backbone: ResNet
neck: FPN neck: FPN
s2anet_head: S2ANetHead head: S2ANetHead
s2anet_bbox_post_process: S2ANetBBoxPostProcess
ResNet: ResNet:
depth: 50 depth: 50
...@@ -33,23 +32,21 @@ S2ANetHead: ...@@ -33,23 +32,21 @@ S2ANetHead:
stacked_convs: 2 stacked_convs: 2
feat_in: 256 feat_in: 256
feat_out: 256 feat_out: 256
num_classes: 15
align_conv_type: 'AlignConv' # AlignConv Conv align_conv_type: 'AlignConv' # AlignConv Conv
align_conv_size: 3 align_conv_size: 3
use_sigmoid_cls: True use_sigmoid_cls: True
reg_loss_weight: [1.0, 1.0, 1.0, 1.0, 1.1]
RBoxAssigner: cls_loss_weight: [1.1, 1.05]
pos_iou_thr: 0.5
neg_iou_thr: 0.4
min_iou_thr: 0.0
ignore_iof_thr: -2
S2ANetBBoxPostProcess:
nms_pre: 2000 nms_pre: 2000
min_bbox_size: 0.0
nms: nms:
name: MultiClassNMS name: MultiClassNMS
keep_top_k: -1 keep_top_k: -1
score_threshold: 0.05 score_threshold: 0.05
nms_threshold: 0.1 nms_threshold: 0.1
normalized: False normalized: False
RBoxAssigner:
pos_iou_thr: 0.5
neg_iou_thr: 0.4
min_iou_thr: 0.0
ignore_iof_thr: -2
worker_num: 0 worker_num: 4
TrainReader: TrainReader:
sample_transforms: sample_transforms:
- Decode: {} - Decode: {}
- Rbox2Poly: {} - Rbox2Poly: {}
# Resize can process rbox - RandomRFlip: {}
- Resize: {target_size: [1024, 1024], interp: 2, keep_ratio: False} - RResize: {target_size: [1024, 1024], keep_ratio: True, interp: 2}
- RandomFlip: {prob: 0.5} - Poly2RBox: {rbox_type: 'le135'}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms: batch_transforms:
- PadBatch: {pad_to_stride: 32} - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
batch_size: 1 - Permute: {}
- PadRGT: {}
- PadBatch: {pad_to_stride: 32}
batch_size: 2
shuffle: true shuffle: true
drop_last: true drop_last: true
EvalReader: EvalReader:
sample_transforms: sample_transforms:
- Decode: {} - Decode: {}
- Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True} - RResize: {target_size: [1024, 1024], keep_ratio: True, interp: 2}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {} - Permute: {}
batch_transforms: batch_transforms:
- PadBatch: {pad_to_stride: 32} - PadBatch: {pad_to_stride: 32}
batch_size: 1 batch_size: 2
shuffle: false shuffle: false
drop_last: false drop_last: false
collate_batch: false
TestReader: TestReader:
sample_transforms: sample_transforms:
- Decode: {} - Decode: {}
- Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True} - Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {} - Permute: {}
batch_transforms: batch_transforms:
- PadBatch: {pad_to_stride: 32} - PadBatch: {pad_to_stride: 32}
batch_size: 1 batch_size: 1
shuffle: false shuffle: false
drop_last: false drop_last: false
...@@ -7,23 +7,19 @@ _BASE_: [ ...@@ -7,23 +7,19 @@ _BASE_: [
] ]
weights: output/s2anet_1x_spine/model_final weights: output/s2anet_1x_spine/model_final
pretrain_weights: https://paddledet.bj.bcebos.com/models/s2anet_alignconv_2x_dota.pdparams
# for 8 card # for 8 card
LearningRate: LearningRate:
base_lr: 0.01 base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [7, 10]
- !LinearWarmup
start_factor: 0.3333333333333333
epochs: 5
S2ANetHead: S2ANetHead:
anchor_strides: [8, 16, 32, 64, 128]
anchor_scales: [4]
anchor_ratios: [1.0]
anchor_assign: RBoxAssigner
stacked_convs: 2
feat_in: 256
feat_out: 256
num_classes: 9
align_conv_type: 'AlignConv' # AlignConv Conv
align_conv_size: 3
use_sigmoid_cls: True
reg_loss_weight: [1.0, 1.0, 1.0, 1.0, 1.05] reg_loss_weight: [1.0, 1.0, 1.0, 1.0, 1.05]
cls_loss_weight: [1.05, 1.0] cls_loss_weight: [1.05, 1.0]
reg_loss_type: 'l1'
...@@ -8,19 +8,3 @@ _BASE_: [ ...@@ -8,19 +8,3 @@ _BASE_: [
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/s2anet_alignconv_2x_dota/model_final weights: output/s2anet_alignconv_2x_dota/model_final
S2ANetHead:
anchor_strides: [8, 16, 32, 64, 128]
anchor_scales: [4]
anchor_ratios: [1.0]
anchor_assign: RBoxAssigner
stacked_convs: 2
feat_in: 256
feat_out: 256
num_classes: 15
align_conv_type: 'AlignConv' # AlignConv Conv
align_conv_size: 3
use_sigmoid_cls: True
reg_loss_weight: [1.0, 1.0, 1.0, 1.0, 1.1]
cls_loss_weight: [1.1, 1.05]
reg_loss_type: 'l1'
...@@ -16,16 +16,4 @@ ResNet: ...@@ -16,16 +16,4 @@ ResNet:
num_stages: 4 num_stages: 4
S2ANetHead: S2ANetHead:
anchor_strides: [8, 16, 32, 64, 128] align_conv_type: 'Conv'
anchor_scales: [4]
anchor_ratios: [1.0]
anchor_assign: RBoxAssigner
stacked_convs: 2
feat_in: 256
feat_out: 256
num_classes: 15
align_conv_type: 'Conv' # AlignConv Conv
align_conv_size: 3
use_sigmoid_cls: True
reg_loss_weight: [1.0, 1.0, 1.0, 1.0, 1.1]
cls_loss_weight: [1.1, 1.05]
...@@ -180,7 +180,6 @@ class COCODataSet(DetDataset): ...@@ -180,7 +180,6 @@ class COCODataSet(DetDataset):
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
if is_rbox_anno: if is_rbox_anno:
gt_rbox = np.zeros((num_bbox, 5), dtype=np.float32) gt_rbox = np.zeros((num_bbox, 5), dtype=np.float32)
gt_theta = np.zeros((num_bbox, 1), dtype=np.int32)
gt_class = np.zeros((num_bbox, 1), dtype=np.int32) gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
is_crowd = np.zeros((num_bbox, 1), dtype=np.int32) is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
gt_poly = [None] * num_bbox gt_poly = [None] * num_bbox
......
...@@ -16,11 +16,13 @@ from . import operators ...@@ -16,11 +16,13 @@ from . import operators
from . import batch_operators from . import batch_operators
from . import keypoint_operators from . import keypoint_operators
from . import mot_operators from . import mot_operators
from . import rotated_operators
from .operators import * from .operators import *
from .batch_operators import * from .batch_operators import *
from .keypoint_operators import * from .keypoint_operators import *
from .mot_operators import * from .mot_operators import *
from .rotated_operators import *
__all__ = [] __all__ = []
__all__ += registered_ops __all__ += registered_ops
......
...@@ -48,6 +48,7 @@ __all__ = [ ...@@ -48,6 +48,7 @@ __all__ = [
'Gt2GFLTarget', 'Gt2GFLTarget',
'Gt2CenterNetTarget', 'Gt2CenterNetTarget',
'PadGT', 'PadGT',
'PadRGT',
] ]
...@@ -109,12 +110,6 @@ class PadBatch(BaseOperator): ...@@ -109,12 +110,6 @@ class PadBatch(BaseOperator):
padding_segm[:, :im_h, :im_w] = gt_segm padding_segm[:, :im_h, :im_w] = gt_segm
data['gt_segm'] = padding_segm data['gt_segm'] = padding_segm
if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
# ploy to rbox
polys = data['gt_rbox2poly']
rbox = bbox_utils.poly2rbox(polys)
data['gt_rbox'] = rbox
return samples return samples
...@@ -981,12 +976,6 @@ class PadMaskBatch(BaseOperator): ...@@ -981,12 +976,6 @@ class PadMaskBatch(BaseOperator):
padding_mask[:im_h, :im_w] = 1. padding_mask[:im_h, :im_w] = 1.
data['pad_mask'] = padding_mask data['pad_mask'] = padding_mask
if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
# ploy to rbox
polys = data['gt_rbox2poly']
rbox = bbox_utils.poly2rbox(polys)
data['gt_rbox'] = rbox
return samples return samples
...@@ -1122,3 +1111,57 @@ class PadGT(BaseOperator): ...@@ -1122,3 +1111,57 @@ class PadGT(BaseOperator):
pad_diff[:num_gt] = sample['difficult'] pad_diff[:num_gt] = sample['difficult']
sample['difficult'] = pad_diff sample['difficult'] = pad_diff
return samples return samples
@register_op
class PadRGT(BaseOperator):
"""
Pad 0 to `gt_class`, `gt_bbox`, `gt_score`...
The num_max_boxes is the largest for batch.
Args:
return_gt_mask (bool): If true, return `pad_gt_mask`,
1 means bbox, 0 means no bbox.
"""
def __init__(self, return_gt_mask=True):
super(PadRGT, self).__init__()
self.return_gt_mask = return_gt_mask
def pad_field(self, sample, field, num_gt):
name, shape, dtype = field
if name in sample:
pad_v = np.zeros(shape, dtype=dtype)
if num_gt > 0:
pad_v[:num_gt] = sample[name]
sample[name] = pad_v
def __call__(self, samples, context=None):
num_max_boxes = max([len(s['gt_bbox']) for s in samples])
for sample in samples:
if self.return_gt_mask:
sample['pad_gt_mask'] = np.zeros(
(num_max_boxes, 1), dtype=np.float32)
if num_max_boxes == 0:
continue
num_gt = len(sample['gt_bbox'])
pad_gt_class = np.zeros((num_max_boxes, 1), dtype=np.int32)
pad_gt_bbox = np.zeros((num_max_boxes, 4), dtype=np.float32)
if num_gt > 0:
pad_gt_class[:num_gt] = sample['gt_class']
pad_gt_bbox[:num_gt] = sample['gt_bbox']
sample['gt_class'] = pad_gt_class
sample['gt_bbox'] = pad_gt_bbox
# pad_gt_mask
if 'pad_gt_mask' in sample:
sample['pad_gt_mask'][:num_gt] = 1
# gt_score
names = ['gt_score', 'is_crowd', 'difficult', 'gt_poly', 'gt_rbox']
dims = [1, 1, 1, 8, 5]
dtypes = [np.float32, np.int32, np.int32, np.float32, np.float32]
for name, dim, dtype in zip(names, dims, dtypes):
self.pad_field(sample, [name, (num_max_boxes, dim), dtype],
num_gt)
return samples
...@@ -492,3 +492,72 @@ def get_border(border, size): ...@@ -492,3 +492,72 @@ def get_border(border, size):
while size - border // i <= border // i: while size - border // i <= border // i:
i *= 2 i *= 2
return border // i return border // i
def norm_angle(angle, range=[-np.pi / 4, np.pi]):
return (angle - range[0]) % range[1] + range[0]
def poly2rbox_le135(poly):
"""convert poly to rbox [-pi / 4, 3 * pi / 4]
Args:
poly: [x1, y1, x2, y2, x3, y3, x4, y4]
Returns:
rbox: [cx, cy, w, h, angle]
"""
poly = np.array(poly[:8], dtype=np.float32)
pt1 = (poly[0], poly[1])
pt2 = (poly[2], poly[3])
pt3 = (poly[4], poly[5])
pt4 = (poly[6], poly[7])
edge1 = np.sqrt((pt1[0] - pt2[0]) * (pt1[0] - pt2[0]) + (pt1[1] - pt2[1]) *
(pt1[1] - pt2[1]))
edge2 = np.sqrt((pt2[0] - pt3[0]) * (pt2[0] - pt3[0]) + (pt2[1] - pt3[1]) *
(pt2[1] - pt3[1]))
width = max(edge1, edge2)
height = min(edge1, edge2)
rbox_angle = 0
if edge1 > edge2:
rbox_angle = np.arctan2(float(pt2[1] - pt1[1]), float(pt2[0] - pt1[0]))
elif edge2 >= edge1:
rbox_angle = np.arctan2(float(pt4[1] - pt1[1]), float(pt4[0] - pt1[0]))
rbox_angle = norm_angle(rbox_angle)
x_ctr = float(pt1[0] + pt3[0]) / 2
y_ctr = float(pt1[1] + pt3[1]) / 2
return x_ctr, y_ctr, width, height, rbox_angle
def poly2rbox_oc(poly):
"""convert poly to rbox (0, pi / 2]
Args:
poly: [x1, y1, x2, y2, x3, y3, x4, y4]
Returns:
rbox: [cx, cy, w, h, angle]
"""
points = np.array(poly, dtype=np.float32).reshape((-1, 2))
(cx, cy), (w, h), angle = cv2.minAreaRect(points)
# using the new OpenCV Rotated BBox definition since 4.5.1
# if angle < 0, opencv is older than 4.5.1, angle is in [-90, 0)
if angle < 0:
angle += 90
w, h = h, w
# convert angle to [0, 90)
if angle == -0.0:
angle = 0.0
if angle == 90.0:
angle = 0.0
w, h = h, w
angle = angle / 180 * np.pi
return cx, cy, w, h, angle
...@@ -41,7 +41,6 @@ import threading ...@@ -41,7 +41,6 @@ import threading
MUTEX = threading.Lock() MUTEX = threading.Lock()
from ppdet.core.workspace import serializable from ppdet.core.workspace import serializable
from ppdet.modeling import bbox_utils
from ..reader import Compose from ..reader import Compose
from .op_helper import (satisfy_sample_constraint, filter_and_process, from .op_helper import (satisfy_sample_constraint, filter_and_process,
...@@ -657,18 +656,6 @@ class RandomFlip(BaseOperator): ...@@ -657,18 +656,6 @@ class RandomFlip(BaseOperator):
bbox[:, 2] = width - oldx1 bbox[:, 2] = width - oldx1
return bbox return bbox
def apply_rbox(self, bbox, width):
oldx1 = bbox[:, 0].copy()
oldx2 = bbox[:, 2].copy()
oldx3 = bbox[:, 4].copy()
oldx4 = bbox[:, 6].copy()
bbox[:, 0] = width - oldx1
bbox[:, 2] = width - oldx2
bbox[:, 4] = width - oldx3
bbox[:, 6] = width - oldx4
bbox = [bbox_utils.get_best_begin_point_single(e) for e in bbox]
return bbox
def apply(self, sample, context=None): def apply(self, sample, context=None):
"""Filp the image and bounding box. """Filp the image and bounding box.
Operators: Operators:
...@@ -700,10 +687,6 @@ class RandomFlip(BaseOperator): ...@@ -700,10 +687,6 @@ class RandomFlip(BaseOperator):
if 'gt_segm' in sample and sample['gt_segm'].any(): if 'gt_segm' in sample and sample['gt_segm'].any():
sample['gt_segm'] = sample['gt_segm'][:, :, ::-1] sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
if 'gt_rbox2poly' in sample and sample['gt_rbox2poly'].any():
sample['gt_rbox2poly'] = self.apply_rbox(sample['gt_rbox2poly'],
width)
sample['flipped'] = True sample['flipped'] = True
sample['image'] = im sample['image'] = im
return sample return sample
...@@ -841,16 +824,6 @@ class Resize(BaseOperator): ...@@ -841,16 +824,6 @@ class Resize(BaseOperator):
[im_scale_x, im_scale_y], [im_scale_x, im_scale_y],
[resize_w, resize_h]) [resize_w, resize_h])
# apply rbox
if 'gt_rbox2poly' in sample:
if np.array(sample['gt_rbox2poly']).shape[1] != 8:
logger.warning(
"gt_rbox2poly's length shoule be 8, but actually is {}".
format(len(sample['gt_rbox2poly'])))
sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
[im_scale_x, im_scale_y],
[resize_w, resize_h])
# apply polygon # apply polygon
if 'gt_poly' in sample and len(sample['gt_poly']) > 0: if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2], sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
...@@ -2111,33 +2084,6 @@ class Poly2Mask(BaseOperator): ...@@ -2111,33 +2084,6 @@ class Poly2Mask(BaseOperator):
return sample return sample
@register_op
class Rbox2Poly(BaseOperator):
"""
Convert rbbox format to poly format.
"""
def __init__(self):
super(Rbox2Poly, self).__init__()
def apply(self, sample, context=None):
assert 'gt_rbox' in sample
assert sample['gt_rbox'].shape[1] == 5
rrects = sample['gt_rbox']
x_ctr = rrects[:, 0]
y_ctr = rrects[:, 1]
width = rrects[:, 2]
height = rrects[:, 3]
x1 = x_ctr - width / 2.0
y1 = y_ctr - height / 2.0
x2 = x_ctr + width / 2.0
y2 = y_ctr + height / 2.0
sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
polys = bbox_utils.rbox2poly_np(rrects)
sample['gt_rbox2poly'] = polys
return sample
@register_op @register_op
class AugmentHSV(BaseOperator): class AugmentHSV(BaseOperator):
""" """
...@@ -2456,16 +2402,6 @@ class RandomResizeCrop(BaseOperator): ...@@ -2456,16 +2402,6 @@ class RandomResizeCrop(BaseOperator):
[im_scale_x, im_scale_y], [im_scale_x, im_scale_y],
[resize_w, resize_h]) [resize_w, resize_h])
# apply rbox
if 'gt_rbox2poly' in sample:
if np.array(sample['gt_rbox2poly']).shape[1] != 8:
logger.warn(
"gt_rbox2poly's length shoule be 8, but actually is {}".
format(len(sample['gt_rbox2poly'])))
sample['gt_rbox2poly'] = self.apply_bbox(sample['gt_rbox2poly'],
[im_scale_x, im_scale_y],
[resize_w, resize_h])
# apply polygon # apply polygon
if 'gt_poly' in sample and len(sample['gt_poly']) > 0: if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2], sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from numbers import Number, Integral
import cv2
import numpy as np
import math
import copy
from .operators import register_op, BaseOperator
from .op_helper import poly2rbox_le135, poly2rbox_oc
from ppdet.modeling import bbox_utils
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@register_op
class RRotate(BaseOperator):
""" Rotate Image, Polygon, Box
Args:
scale (float): rotate scale
angle (float): rotate angle
fill_value (int, tuple): fill color
auto_bound (bool): whether auto bound or not
"""
def __init__(self, scale=1.0, angle=0., fill_value=0., auto_bound=True):
super(RRotate, self).__init__()
self.scale = scale
self.angle = angle
self.fill_value = fill_value
self.auto_bound = auto_bound
def get_rotated_matrix(self, angle, scale, h, w):
center = ((w - 1) * 0.5, (h - 1) * 0.5)
matrix = cv2.getRotationMatrix2D(center, -angle, scale)
# calculate the new size
cos = np.abs(matrix[0, 0])
sin = np.abs(matrix[0, 1])
new_w = h * sin + w * cos
new_h = h * cos + w * sin
# calculate offset
n_w = int(np.round(new_w))
n_h = int(np.round(new_h))
if self.auto_bound:
ratio = min(w / n_w, h / n_h)
matrix = cv2.getRotationMatrix2D(center, -angle, ratio)
else:
matrix[0, 2] += (new_w - w) * 0.5
matrix[1, 2] += (new_h - h) * 0.5
w = n_w
h = n_h
return matrix, h, w
def get_rect_from_pts(self, pts, h, w):
""" get minimum rectangle of points
"""
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
min_x, min_y = np.min(pts[:, 0::2], axis=1), np.min(pts[:, 1::2],
axis=1)
max_x, max_y = np.max(pts[:, 0::2], axis=1), np.max(pts[:, 1::2],
axis=1)
min_x, min_y = np.clip(min_x, 0, w), np.clip(min_y, 0, h)
max_x, max_y = np.clip(max_x, 0, w), np.clip(max_y, 0, h)
boxes = np.stack([min_x, min_y, max_x, max_y], axis=-1)
return boxes
def apply_image(self, image, matrix, h, w):
return cv2.warpAffine(
image, matrix, (w, h), borderValue=self.fill_value)
def apply_pts(self, pts, matrix, h, w):
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
# n is number of samples and m is two times the number of points due to (x, y)
_, m = pts.shape
# transpose points
pts_ = pts.reshape(-1, 2).T
# pad 1 to convert the points to homogeneous coordinates
padding = np.ones((1, pts_.shape[1]), pts.dtype)
rotated_pts = np.matmul(matrix, np.concatenate((pts_, padding), axis=0))
return rotated_pts[:2, :].T.reshape(-1, m)
def apply(self, sample, context=None):
image = sample['image']
h, w = image.shape[:2]
matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w)
sample['image'] = self.apply_image(image, matrix, h, w)
polys = sample['gt_poly']
# TODO: segment or keypoint to be processed
if len(polys) > 0:
pts = self.apply_pts(polys, matrix, h, w)
sample['gt_poly'] = pts
sample['gt_bbox'] = self.get_rect_from_pts(pts, h, w)
return sample
@register_op
class RandomRRotate(BaseOperator):
""" Random Rotate Image
Args:
scale (float, tuple, list): rotate scale
scale_mode (str): mode of scale, [range, value, None]
angle (float, tuple, list): rotate angle
angle_mode (str): mode of angle, [range, value, None]
fill_value (float, tuple, list): fill value
rotate_prob (float): probability of rotation
auto_bound (bool): whether auto bound or not
"""
def __init__(self,
scale=1.0,
scale_mode=None,
angle=0.,
angle_mode=None,
fill_value=0.,
rotate_prob=1.0,
auto_bound=True):
super(RandomRRotate, self).__init__()
self.scale = scale
self.scale_mode = scale_mode
self.angle = angle
self.angle_mode = angle_mode
self.fill_value = fill_value
self.rotate_prob = rotate_prob
self.auto_bound = auto_bound
def get_angle(self, angle, angle_mode):
assert not angle_mode or angle_mode in [
'range', 'value'
], 'angle mode should be in [range, value, None]'
if not angle_mode:
return angle
elif angle_mode == 'range':
low, high = angle
return np.random.rand() * (high - low) + low
elif angle_mode == 'value':
return np.random.choice(angle)
def get_scale(self, scale, scale_mode):
assert not scale_mode or scale_mode in [
'range', 'value'
], 'scale mode should be in [range, value, None]'
if not scale_mode:
return scale
elif scale_mode == 'range':
low, high = scale
return np.random.rand() * (high - low) + low
elif scale_mode == 'value':
return np.random.choice(scale)
def apply(self, sample, context=None):
if np.random.rand() > self.rotate_prob:
return sample
angle = self.get_angle(self.angle, self.angle_mode)
scale = self.get_scale(self.scale, self.scale_mode)
rotator = RRotate(scale, angle, self.fill_value, self.auto_bound)
return rotator(sample)
@register_op
class Poly2RBox(BaseOperator):
""" Polygon to Rotated Box, using new OpenCV definition since 4.5.1
Args:
filter_threshold (int, float): threshold to filter annotations
filter_mode (str): filter mode, ['area', 'edge']
rbox_type (str): rbox type, ['le135', 'oc']
"""
def __init__(self, filter_threshold=4, filter_mode=None, rbox_type='le135'):
super(Poly2RBox, self).__init__()
self.filter_fn = lambda size: self.filter(size, filter_threshold, filter_mode)
self.rbox_fn = poly2rbox_le135 if rbox_type == 'le135' else poly2rbox_oc
def filter(self, size, threshold, mode):
if mode == 'area':
if size[0] * size[1] < threshold:
return True
elif mode == 'edge':
if min(size) < threshold:
return True
return False
def get_rbox(self, polys):
valid_ids, rboxes, bboxes = [], [], []
for i, poly in enumerate(polys):
cx, cy, w, h, angle = self.rbox_fn(poly)
if self.filter_fn((w, h)):
continue
rboxes.append(np.array([cx, cy, w, h, angle], dtype=np.float32))
valid_ids.append(i)
xmin, ymin = min(poly[0::2]), min(poly[1::2])
xmax, ymax = max(poly[0::2]), max(poly[1::2])
bboxes.append(np.array([xmin, ymin, xmax, ymax], dtype=np.float32))
if len(valid_ids) == 0:
rboxes = np.zeros((0, 5), dtype=np.float32)
bboxes = np.zeros((0, 4), dtype=np.float32)
else:
rboxes = np.stack(rboxes)
bboxes = np.stack(bboxes)
return rboxes, bboxes, valid_ids
def apply(self, sample, context=None):
rboxes, bboxes, valid_ids = self.get_rbox(sample['gt_poly'])
sample['gt_rbox'] = rboxes
sample['gt_bbox'] = bboxes
for k in ['gt_class', 'gt_score', 'gt_poly', 'is_crowd', 'difficult']:
if k in sample:
sample[k] = sample[k][valid_ids]
return sample
@register_op
class Poly2Array(BaseOperator):
""" convert gt_poly to np.array for rotated bboxes
"""
def __init__(self):
super(Poly2Array, self).__init__()
def apply(self, sample, context=None):
if 'gt_poly' in sample:
logger.info('gt_poly shape: {}'.format(sample['gt_poly']))
sample['gt_poly'] = np.array(
sample['gt_poly'], dtype=np.float32).reshape((-1, 8))
return sample
@register_op
class RResize(BaseOperator):
def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
"""
Resize image to target size. if keep_ratio is True,
resize the image's long side to the maximum of target_size
if keep_ratio is False, resize the image to target size(h, w)
Args:
target_size (int|list): image target size
keep_ratio (bool): whether keep_ratio or not, default true
interp (int): the interpolation method
"""
super(RResize, self).__init__()
self.keep_ratio = keep_ratio
self.interp = interp
if not isinstance(target_size, (Integral, Sequence)):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
format(type(target_size)))
if isinstance(target_size, Integral):
target_size = [target_size, target_size]
self.target_size = target_size
def apply_image(self, image, scale):
im_scale_x, im_scale_y = scale
return cv2.resize(
image,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
def apply_pts(self, pts, scale, size):
im_scale_x, im_scale_y = scale
resize_w, resize_h = size
pts[:, 0::2] *= im_scale_x
pts[:, 1::2] *= im_scale_y
pts[:, 0::2] = np.clip(pts[:, 0::2], 0, resize_w)
pts[:, 1::2] = np.clip(pts[:, 1::2], 0, resize_h)
return pts
def apply(self, sample, context=None):
""" Resize the image numpy.
"""
im = sample['image']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
raise ImageError('{}: image is not 3-dimensional.'.format(self))
# apply image
im_shape = im.shape
if self.keep_ratio:
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
target_size_min = np.min(self.target_size)
target_size_max = np.max(self.target_size)
im_scale = min(target_size_min / im_size_min,
target_size_max / im_size_max)
resize_h = im_scale * float(im_shape[0])
resize_w = im_scale * float(im_shape[1])
im_scale_x = im_scale
im_scale_y = im_scale
else:
resize_h, resize_w = self.target_size
im_scale_y = resize_h / im_shape[0]
im_scale_x = resize_w / im_shape[1]
im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
sample['image'] = im.astype(np.float32)
sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
if 'scale_factor' in sample:
scale_factor = sample['scale_factor']
sample['scale_factor'] = np.asarray(
[scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
dtype=np.float32)
else:
sample['scale_factor'] = np.asarray(
[im_scale_y, im_scale_x], dtype=np.float32)
# apply bbox
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'],
[im_scale_x, im_scale_y],
[resize_w, resize_h])
# apply polygon
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_pts(sample['gt_poly'],
[im_scale_x, im_scale_y],
[resize_w, resize_h])
return sample
@register_op
class RandomRFlip(BaseOperator):
def __init__(self, prob=0.5):
"""
Args:
prob (float): the probability of flipping image
"""
super(RandomRFlip, self).__init__()
self.prob = prob
if not (isinstance(self.prob, float)):
raise TypeError("{}: input type is invalid.".format(self))
def apply_image(self, image):
return image[:, ::-1, :]
def apply_pts(self, pts, width):
oldx = pts[:, 0::2].copy()
pts[:, 0::2] = width - oldx - 1
return pts
def apply(self, sample, context=None):
"""Filp the image and bounding box.
Operators:
1. Flip the image numpy.
2. Transform the bboxes' x coordinates.
(Must judge whether the coordinates are normalized!)
3. Transform the segmentations' x coordinates.
(Must judge whether the coordinates are normalized!)
Output:
sample: the image, bounding box and segmentation part
in sample are flipped.
"""
if np.random.uniform(0, 1) < self.prob:
im = sample['image']
height, width = im.shape[:2]
im = self.apply_image(im)
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], width)
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_pts(sample['gt_poly'], width)
sample['flipped'] = True
sample['image'] = im
return sample
@register_op
class VisibleRBox(BaseOperator):
"""
In debug mode, visualize images according to `gt_box`.
(Currently only supported when not cropping and flipping image.)
"""
def __init__(self, output_dir='debug'):
super(VisibleRBox, self).__init__()
self.output_dir = output_dir
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
def apply(self, sample, context=None):
image = Image.fromarray(sample['image'].astype(np.uint8))
out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
width = sample['w']
height = sample['h']
# gt_poly = sample['gt_rbox']
gt_poly = sample['gt_poly']
gt_class = sample['gt_class']
draw = ImageDraw.Draw(image)
for i in range(gt_poly.shape[0]):
x1, y1, x2, y2, x3, y3, x4, y4 = gt_poly[i]
draw.line(
[(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
width=2,
fill='green')
# draw label
xmin = min(x1, x2, x3, x4)
ymin = min(y1, y2, y3, y4)
text = str(gt_class[i][0])
tw, th = draw.textsize(text)
draw.rectangle(
[(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
if 'gt_keypoint' in sample.keys():
gt_keypoint = sample['gt_keypoint']
if self.is_normalized:
for i in range(gt_keypoint.shape[1]):
if i % 2:
gt_keypoint[:, i] = gt_keypoint[:, i] * height
else:
gt_keypoint[:, i] = gt_keypoint[:, i] * width
for i in range(gt_keypoint.shape[0]):
keypoint = gt_keypoint[i]
for j in range(int(keypoint.shape[0] / 2)):
x1 = round(keypoint[2 * j]).astype(np.int32)
y1 = round(keypoint[2 * j + 1]).astype(np.int32)
draw.ellipse(
(x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
save_path = os.path.join(self.output_dir, out_file_name)
image.save(save_path, quality=95)
return sample
@register_op
class Rbox2Poly(BaseOperator):
"""
Convert rbbox format to poly format.
"""
def __init__(self):
super(Rbox2Poly, self).__init__()
def apply(self, sample, context=None):
assert 'gt_rbox' in sample
assert sample['gt_rbox'].shape[1] == 5
rrects = sample['gt_rbox']
x_ctr = rrects[:, 0]
y_ctr = rrects[:, 1]
width = rrects[:, 2]
height = rrects[:, 3]
x1 = x_ctr - width / 2.0
y1 = y_ctr - height / 2.0
x2 = x_ctr + width / 2.0
y2 = y_ctr + height / 2.0
sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
polys = bbox_utils.rbox2poly_np(rrects)
sample['gt_poly'] = polys
return sample
...@@ -138,8 +138,7 @@ def calc_rbox_iou(pred, gt_rbox): ...@@ -138,8 +138,7 @@ def calc_rbox_iou(pred, gt_rbox):
def prune_zero_padding(gt_box, gt_label, difficult=None): def prune_zero_padding(gt_box, gt_label, difficult=None):
valid_cnt = 0 valid_cnt = 0
for i in range(len(gt_box)): for i in range(len(gt_box)):
if gt_box[i, 0] == 0 and gt_box[i, 1] == 0 and \ if (gt_box[i] == 0).all():
gt_box[i, 2] == 0 and gt_box[i, 3] == 0:
break break
valid_cnt += 1 valid_cnt += 1
return (gt_box[:valid_cnt], gt_label[:valid_cnt], difficult[:valid_cnt] return (gt_box[:valid_cnt], gt_label[:valid_cnt], difficult[:valid_cnt]
...@@ -331,8 +330,9 @@ class DetectionMAP(object): ...@@ -331,8 +330,9 @@ class DetectionMAP(object):
num_columns = min(6, len(results_per_category) * 2) num_columns = min(6, len(results_per_category) * 2)
results_flatten = list(itertools.chain(*results_per_category)) results_flatten = list(itertools.chain(*results_per_category))
headers = ['category', 'AP'] * (num_columns // 2) headers = ['category', 'AP'] * (num_columns // 2)
results_2d = itertools.zip_longest( results_2d = itertools.zip_longest(* [
*[results_flatten[i::num_columns] for i in range(num_columns)]) results_flatten[i::num_columns] for i in range(num_columns)
])
table_data = [headers] table_data = [headers]
table_data += [result for result in results_2d] table_data += [result for result in results_2d]
table = AsciiTable(table_data) table = AsciiTable(table_data)
......
...@@ -347,22 +347,12 @@ class WiderFaceMetric(Metric): ...@@ -347,22 +347,12 @@ class WiderFaceMetric(Metric):
class RBoxMetric(Metric): class RBoxMetric(Metric):
def __init__(self, anno_file, **kwargs): def __init__(self, anno_file, **kwargs):
assert os.path.isfile(anno_file), \
"anno_file {} not a file".format(anno_file)
assert os.path.exists(anno_file), "anno_file {} not exists".format(
anno_file)
self.anno_file = anno_file self.anno_file = anno_file
self.gt_anno = json.load(open(self.anno_file)) self.clsid2catid, self.catid2name = get_categories('COCO', anno_file)
cats = self.gt_anno['categories'] self.catid2clsid = {v: k for k, v in self.clsid2catid.items()}
self.clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
self.catid2clsid = {cat['id']: i for i, cat in enumerate(cats)}
self.catid2name = {cat['id']: cat['name'] for cat in cats}
self.classwise = kwargs.get('classwise', False) self.classwise = kwargs.get('classwise', False)
self.output_eval = kwargs.get('output_eval', None) self.output_eval = kwargs.get('output_eval', None)
# TODO: bias should be unified
self.bias = kwargs.get('bias', 0)
self.save_prediction_only = kwargs.get('save_prediction_only', False) self.save_prediction_only = kwargs.get('save_prediction_only', False)
self.iou_type = kwargs.get('IouType', 'bbox')
self.overlap_thresh = kwargs.get('overlap_thresh', 0.5) self.overlap_thresh = kwargs.get('overlap_thresh', 0.5)
self.map_type = kwargs.get('map_type', '11point') self.map_type = kwargs.get('map_type', '11point')
self.evaluate_difficult = kwargs.get('evaluate_difficult', False) self.evaluate_difficult = kwargs.get('evaluate_difficult', False)
...@@ -379,7 +369,7 @@ class RBoxMetric(Metric): ...@@ -379,7 +369,7 @@ class RBoxMetric(Metric):
self.reset() self.reset()
def reset(self): def reset(self):
self.result_bbox = [] self.results = []
self.detection_map.reset() self.detection_map.reset()
def update(self, inputs, outputs): def update(self, inputs, outputs):
...@@ -389,35 +379,45 @@ class RBoxMetric(Metric): ...@@ -389,35 +379,45 @@ class RBoxMetric(Metric):
outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
im_id = inputs['im_id'] im_id = inputs['im_id']
outs['im_id'] = im_id.numpy() if isinstance(im_id, im_id = im_id.numpy() if isinstance(im_id, paddle.Tensor) else im_id
paddle.Tensor) else im_id outs['im_id'] = im_id
infer_results = get_infer_results( infer_results = get_infer_results(outs, self.clsid2catid)
outs, self.clsid2catid, bias=self.bias) infer_results = infer_results['bbox'] if 'bbox' in infer_results else []
self.result_bbox += infer_results[ self.results += infer_results
'bbox'] if 'bbox' in infer_results else [] if self.save_prediction_only:
bbox = [b['bbox'] for b in self.result_bbox] return
score = [b['score'] for b in self.result_bbox]
label = [b['category_id'] for b in self.result_bbox] gt_boxes = inputs['gt_rbox']
label = [self.catid2clsid[e] for e in label] gt_labels = inputs['gt_class']
gt_box = [ for i in range(len(gt_boxes)):
e['bbox'] for e in self.gt_anno['annotations'] gt_box = gt_boxes[i].numpy() if isinstance(
if e['image_id'] == outs['im_id'] gt_boxes[i], paddle.Tensor) else gt_boxes[i]
] gt_label = gt_labels[i].numpy() if isinstance(
gt_label = [ gt_labels[i], paddle.Tensor) else gt_labels[i]
e['category_id'] for e in self.gt_anno['annotations'] gt_box, gt_label, _ = prune_zero_padding(gt_box, gt_label)
if e['image_id'] == outs['im_id'] bbox = [
] res['bbox'] for res in infer_results
gt_label = [self.catid2clsid[e] for e in gt_label] if int(res['image_id']) == int(im_id[i])
self.detection_map.update(bbox, score, label, gt_box, gt_label) ]
score = [
res['score'] for res in infer_results
if int(res['image_id']) == int(im_id[i])
]
label = [
self.catid2clsid[int(res['category_id'])]
for res in infer_results
if int(res['image_id']) == int(im_id[i])
]
self.detection_map.update(bbox, score, label, gt_box, gt_label)
def accumulate(self): def accumulate(self):
if len(self.result_bbox) > 0: if len(self.results) > 0:
output = "bbox.json" output = "bbox.json"
if self.output_eval: if self.output_eval:
output = os.path.join(self.output_eval, output) output = os.path.join(self.output_eval, output)
with open(output, 'w') as f: with open(output, 'w') as f:
json.dump(self.result_bbox, f) json.dump(self.results, f)
logger.info('The bbox result is saved to bbox.json.') logger.info('The bbox result is saved to bbox.json.')
if self.save_prediction_only: if self.save_prediction_only:
......
...@@ -26,26 +26,21 @@ __all__ = ['S2ANet'] ...@@ -26,26 +26,21 @@ __all__ = ['S2ANet']
@register @register
class S2ANet(BaseArch): class S2ANet(BaseArch):
__category__ = 'architecture' __category__ = 'architecture'
__inject__ = [ __inject__ = ['head']
's2anet_head',
's2anet_bbox_post_process',
]
def __init__(self, backbone, neck, s2anet_head, s2anet_bbox_post_process): def __init__(self, backbone, neck, head):
""" """
S2ANet, see https://arxiv.org/pdf/2008.09397.pdf S2ANet, see https://arxiv.org/pdf/2008.09397.pdf
Args: Args:
backbone (object): backbone instance backbone (object): backbone instance
neck (object): `FPN` instance neck (object): `FPN` instance
s2anet_head (object): `S2ANetHead` instance head (object): `Head` instance
s2anet_bbox_post_process (object): `S2ANetBBoxPostProcess` instance
""" """
super(S2ANet, self).__init__() super(S2ANet, self).__init__()
self.backbone = backbone self.backbone = backbone
self.neck = neck self.neck = neck
self.s2anet_head = s2anet_head self.s2anet_head = head
self.s2anet_bbox_post_process = s2anet_bbox_post_process
@classmethod @classmethod
def from_config(cls, cfg, *args, **kwargs): def from_config(cls, cfg, *args, **kwargs):
...@@ -55,42 +50,28 @@ class S2ANet(BaseArch): ...@@ -55,42 +50,28 @@ class S2ANet(BaseArch):
out_shape = neck and neck.out_shape or backbone.out_shape out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape} kwargs = {'input_shape': out_shape}
s2anet_head = create(cfg['s2anet_head'], **kwargs) head = create(cfg['head'], **kwargs)
s2anet_bbox_post_process = create(cfg['s2anet_bbox_post_process'],
**kwargs)
return { return {'backbone': backbone, 'neck': neck, "head": head}
'backbone': backbone,
'neck': neck,
"s2anet_head": s2anet_head,
"s2anet_bbox_post_process": s2anet_bbox_post_process,
}
def _forward(self): def _forward(self):
body_feats = self.backbone(self.inputs) body_feats = self.backbone(self.inputs)
if self.neck is not None: if self.neck is not None:
body_feats = self.neck(body_feats) body_feats = self.neck(body_feats)
self.s2anet_head(body_feats)
if self.training: if self.training:
loss = self.s2anet_head.get_loss(self.inputs) loss = self.s2anet_head(body_feats, self.inputs)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss return loss
else: else:
im_shape = self.inputs['im_shape'] head_outs = self.s2anet_head(body_feats)
scale_factor = self.inputs['scale_factor']
nms_pre = self.s2anet_bbox_post_process.nms_pre
pred_scores, pred_bboxes = self.s2anet_head.get_prediction(nms_pre)
# post_process # post_process
pred_bboxes, bbox_num = self.s2anet_bbox_post_process(pred_scores, bboxes, bbox_num = self.s2anet_head.get_bboxes(head_outs)
pred_bboxes)
# rescale the prediction back to origin image # rescale the prediction back to origin image
pred_bboxes = self.s2anet_bbox_post_process.get_pred( im_shape = self.inputs['im_shape']
pred_bboxes, bbox_num, im_shape, scale_factor) scale_factor = self.inputs['scale_factor']
bboxes = self.s2anet_head.get_pred(bboxes, bbox_num, im_shape,
scale_factor)
# output # output
output = {'bbox': pred_bboxes, 'bbox_num': bbox_num} output = {'bbox': bboxes, 'bbox_num': bbox_num}
return output return output
def get_loss(self, ): def get_loss(self, ):
......
此差异已折叠。
...@@ -39,6 +39,81 @@ def _to_list(l): ...@@ -39,6 +39,81 @@ def _to_list(l):
return [l] return [l]
class AlignConv(nn.Layer):
def __init__(self, in_channels, out_channels, kernel_size=3, groups=1):
super(AlignConv, self).__init__()
self.kernel_size = kernel_size
self.align_conv = paddle.vision.ops.DeformConv2D(
in_channels,
out_channels,
kernel_size=self.kernel_size,
padding=(self.kernel_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
bias_attr=None)
@paddle.no_grad()
def get_offset(self, anchors, featmap_size, stride):
"""
Args:
anchors: [B, L, 5] xc,yc,w,h,angle
featmap_size: (feat_h, feat_w)
stride: 8
Returns:
"""
batch = anchors.shape[0]
dtype = anchors.dtype
feat_h, feat_w = featmap_size
pad = (self.kernel_size - 1) // 2
idx = paddle.arange(-pad, pad + 1, dtype=dtype)
yy, xx = paddle.meshgrid(idx, idx)
xx = paddle.reshape(xx, [-1])
yy = paddle.reshape(yy, [-1])
# get sampling locations of default conv
xc = paddle.arange(0, feat_w, dtype=dtype)
yc = paddle.arange(0, feat_h, dtype=dtype)
yc, xc = paddle.meshgrid(yc, xc)
xc = paddle.reshape(xc, [-1, 1])
yc = paddle.reshape(yc, [-1, 1])
x_conv = xc + xx
y_conv = yc + yy
# get sampling locations of anchors
x_ctr, y_ctr, w, h, a = paddle.split(anchors, 5, axis=-1)
x_ctr = x_ctr / stride
y_ctr = y_ctr / stride
w_s = w / stride
h_s = h / stride
cos, sin = paddle.cos(a), paddle.sin(a)
dw, dh = w_s / self.kernel_size, h_s / self.kernel_size
x, y = dw * xx, dh * yy
xr = cos * x - sin * y
yr = sin * x + cos * y
x_anchor, y_anchor = xr + x_ctr, yr + y_ctr
# get offset filed
offset_x = x_anchor - x_conv
offset_y = y_anchor - y_conv
offset = paddle.stack([offset_y, offset_x], axis=-1)
offset = offset.reshape(
[batch, feat_h, feat_w, self.kernel_size * self.kernel_size * 2])
offset = offset.transpose([0, 3, 1, 2])
return offset
def forward(self, x, refine_anchors, featmap_size, stride):
batch = paddle.shape(x)[0].numpy()
offset = self.get_offset(refine_anchors, featmap_size, stride)
if self.training:
x = F.relu(self.align_conv(x, offset.detach()))
else:
x = F.relu(self.align_conv(x, offset))
return x
class DeformableConvV2(nn.Layer): class DeformableConvV2(nn.Layer):
def __init__(self, def __init__(self,
in_channels, in_channels,
......
...@@ -27,8 +27,8 @@ except Exception: ...@@ -27,8 +27,8 @@ except Exception:
__all__ = [ __all__ = [
'BBoxPostProcess', 'MaskPostProcess', 'FCOSPostProcess', 'BBoxPostProcess', 'MaskPostProcess', 'FCOSPostProcess',
'S2ANetBBoxPostProcess', 'JDEBBoxPostProcess', 'CenterNetPostProcess', 'JDEBBoxPostProcess', 'CenterNetPostProcess', 'DETRBBoxPostProcess',
'DETRBBoxPostProcess', 'SparsePostProcess' 'SparsePostProcess'
] ]
...@@ -294,109 +294,6 @@ class FCOSPostProcess(object): ...@@ -294,109 +294,6 @@ class FCOSPostProcess(object):
return bbox_pred, bbox_num return bbox_pred, bbox_num
@register
class S2ANetBBoxPostProcess(nn.Layer):
__shared__ = ['num_classes']
__inject__ = ['nms']
def __init__(self, num_classes=15, nms_pre=2000, min_bbox_size=0, nms=None):
super(S2ANetBBoxPostProcess, self).__init__()
self.num_classes = num_classes
self.nms_pre = nms_pre
self.min_bbox_size = min_bbox_size
self.nms = nms
self.origin_shape_list = []
self.fake_pred_cls_score_bbox = paddle.to_tensor(
np.array(
[[-1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
dtype='float32'))
self.fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
def forward(self, pred_scores, pred_bboxes):
"""
pred_scores : [N, M] score
pred_bboxes : [N, 5] xc, yc, w, h, a
im_shape : [N, 2] im_shape
scale_factor : [N, 2] scale_factor
"""
pred_ploys0 = rbox2poly(pred_bboxes)
pred_ploys = paddle.unsqueeze(pred_ploys0, axis=0)
# pred_scores [NA, 16] --> [16, NA]
pred_scores0 = paddle.transpose(pred_scores, [1, 0])
pred_scores = paddle.unsqueeze(pred_scores0, axis=0)
pred_cls_score_bbox, bbox_num, _ = self.nms(pred_ploys, pred_scores,
self.num_classes)
# Prevent empty bbox_pred from decode or NMS.
# Bboxes and score before NMS may be empty due to the score threshold.
if pred_cls_score_bbox.shape[0] <= 0 or pred_cls_score_bbox.shape[
1] <= 1:
pred_cls_score_bbox = self.fake_pred_cls_score_bbox
bbox_num = self.fake_bbox_num
pred_cls_score_bbox = paddle.reshape(pred_cls_score_bbox, [-1, 10])
return pred_cls_score_bbox, bbox_num
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
"""
Rescale, clip and filter the bbox from the output of NMS to
get final prediction.
Args:
bboxes(Tensor): bboxes [N, 10]
bbox_num(Tensor): bbox_num
im_shape(Tensor): [1 2]
scale_factor(Tensor): [1 2]
Returns:
bbox_pred(Tensor): The output is the prediction with shape [N, 8]
including labels, scores and bboxes. The size of
bboxes are corresponding to the original image.
"""
origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
origin_shape_list = []
scale_factor_list = []
# scale_factor: scale_y, scale_x
for i in range(bbox_num.shape[0]):
expand_shape = paddle.expand(origin_shape[i:i + 1, :],
[bbox_num[i], 2])
scale_y, scale_x = scale_factor[i][0], scale_factor[i][1]
scale = paddle.concat([
scale_x, scale_y, scale_x, scale_y, scale_x, scale_y, scale_x,
scale_y
])
expand_scale = paddle.expand(scale, [bbox_num[i], 8])
origin_shape_list.append(expand_shape)
scale_factor_list.append(expand_scale)
origin_shape_list = paddle.concat(origin_shape_list)
scale_factor_list = paddle.concat(scale_factor_list)
# bboxes: [N, 10], label, score, bbox
pred_label_score = bboxes[:, 0:2]
pred_bbox = bboxes[:, 2:]
# rescale bbox to original image
pred_bbox = pred_bbox.reshape([-1, 8])
scaled_bbox = pred_bbox / scale_factor_list
origin_h = origin_shape_list[:, 0]
origin_w = origin_shape_list[:, 1]
bboxes = scaled_bbox
zeros = paddle.zeros_like(origin_h)
x1 = paddle.maximum(paddle.minimum(bboxes[:, 0], origin_w - 1), zeros)
y1 = paddle.maximum(paddle.minimum(bboxes[:, 1], origin_h - 1), zeros)
x2 = paddle.maximum(paddle.minimum(bboxes[:, 2], origin_w - 1), zeros)
y2 = paddle.maximum(paddle.minimum(bboxes[:, 3], origin_h - 1), zeros)
x3 = paddle.maximum(paddle.minimum(bboxes[:, 4], origin_w - 1), zeros)
y3 = paddle.maximum(paddle.minimum(bboxes[:, 5], origin_h - 1), zeros)
x4 = paddle.maximum(paddle.minimum(bboxes[:, 6], origin_w - 1), zeros)
y4 = paddle.maximum(paddle.minimum(bboxes[:, 7], origin_h - 1), zeros)
pred_bbox = paddle.stack([x1, y1, x2, y2, x3, y3, x4, y4], axis=-1)
pred_result = paddle.concat([pred_label_score, pred_bbox], axis=1)
return pred_result
@register @register
class JDEBBoxPostProcess(nn.Layer): class JDEBBoxPostProcess(nn.Layer):
__shared__ = ['num_classes'] __shared__ = ['num_classes']
......
...@@ -19,10 +19,11 @@ import math ...@@ -19,10 +19,11 @@ import math
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import numpy as np
from ppdet.core.workspace import register from ppdet.core.workspace import register
__all__ = ['AnchorGenerator', 'RetinaAnchorGenerator'] __all__ = ['AnchorGenerator', 'RetinaAnchorGenerator', 'S2ANetAnchorGenerator']
@register @register
...@@ -153,3 +154,113 @@ class RetinaAnchorGenerator(AnchorGenerator): ...@@ -153,3 +154,113 @@ class RetinaAnchorGenerator(AnchorGenerator):
strides=strides, strides=strides,
variance=variance, variance=variance,
offset=offset) offset=offset)
@register
class S2ANetAnchorGenerator(nn.Layer):
"""
AnchorGenerator by paddle
"""
def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
super(S2ANetAnchorGenerator, self).__init__()
self.base_size = base_size
self.scales = paddle.to_tensor(scales)
self.ratios = paddle.to_tensor(ratios)
self.scale_major = scale_major
self.ctr = ctr
self.base_anchors = self.gen_base_anchors()
@property
def num_base_anchors(self):
return self.base_anchors.shape[0]
def gen_base_anchors(self):
w = self.base_size
h = self.base_size
if self.ctr is None:
x_ctr = 0.5 * (w - 1)
y_ctr = 0.5 * (h - 1)
else:
x_ctr, y_ctr = self.ctr
h_ratios = paddle.sqrt(self.ratios)
w_ratios = 1 / h_ratios
if self.scale_major:
ws = (w * w_ratios[:] * self.scales[:]).reshape([-1])
hs = (h * h_ratios[:] * self.scales[:]).reshape([-1])
else:
ws = (w * self.scales[:] * w_ratios[:]).reshape([-1])
hs = (h * self.scales[:] * h_ratios[:]).reshape([-1])
base_anchors = paddle.stack(
[
x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
],
axis=-1)
base_anchors = paddle.round(base_anchors)
return base_anchors
def _meshgrid(self, x, y, row_major=True):
yy, xx = paddle.meshgrid(y, x)
yy = yy.reshape([-1])
xx = xx.reshape([-1])
if row_major:
return xx, yy
else:
return yy, xx
def forward(self, featmap_size, stride=16):
# featmap_size*stride project it to original area
feat_h = featmap_size[0]
feat_w = featmap_size[1]
shift_x = paddle.arange(0, feat_w, 1, 'int32') * stride
shift_y = paddle.arange(0, feat_h, 1, 'int32') * stride
shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
shifts = paddle.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1)
all_anchors = self.base_anchors[:, :] + shifts[:, :]
all_anchors = all_anchors.cast(paddle.float32).reshape(
[feat_h * feat_w, 4])
all_anchors = self.rect2rbox(all_anchors)
return all_anchors
def valid_flags(self, featmap_size, valid_size):
feat_h, feat_w = featmap_size
valid_h, valid_w = valid_size
assert valid_h <= feat_h and valid_w <= feat_w
valid_x = paddle.zeros([feat_w], dtype='int32')
valid_y = paddle.zeros([feat_h], dtype='int32')
valid_x[:valid_w] = 1
valid_y[:valid_h] = 1
valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
valid = valid_xx & valid_yy
valid = paddle.reshape(valid, [-1, 1])
valid = paddle.expand(valid, [-1, self.num_base_anchors]).reshape([-1])
return valid
def rect2rbox(self, bboxes):
"""
:param bboxes: shape (L, 4) (xmin, ymin, xmax, ymax)
:return: dbboxes: shape (L, 5) (x_ctr, y_ctr, w, h, angle)
"""
x1, y1, x2, y2 = paddle.split(bboxes, 4, axis=-1)
x_ctr = (x1 + x2) / 2.0
y_ctr = (y1 + y2) / 2.0
edges1 = paddle.abs(x2 - x1)
edges2 = paddle.abs(y2 - y1)
rbox_w = paddle.maximum(edges1, edges2)
rbox_h = paddle.minimum(edges1, edges2)
# set angle
inds = edges1 < edges2
inds = paddle.cast(inds, paddle.float32)
rboxes_angle = inds * np.pi / 2.0
rboxes = paddle.concat(
(x_ctr, y_ctr, rbox_w, rbox_h, rboxes_angle), axis=-1)
return rboxes
...@@ -365,21 +365,11 @@ class RBoxAssigner(object): ...@@ -365,21 +365,11 @@ class RBoxAssigner(object):
def assign_anchor(self, def assign_anchor(self,
anchors, anchors,
gt_bboxes, gt_bboxes,
gt_lables, gt_labels,
pos_iou_thr, pos_iou_thr,
neg_iou_thr, neg_iou_thr,
min_iou_thr=0.0, min_iou_thr=0.0,
ignore_iof_thr=-2): ignore_iof_thr=-2):
"""
Args:
anchors:
gt_bboxes:[M, 5] rc,yc,w,h,angle
gt_lables:
Returns:
"""
assert anchors.shape[1] == 4 or anchors.shape[1] == 5 assert anchors.shape[1] == 4 or anchors.shape[1] == 5
assert gt_bboxes.shape[1] == 4 or gt_bboxes.shape[1] == 5 assert gt_bboxes.shape[1] == 4 or gt_bboxes.shape[1] == 5
anchors_xc_yc = anchors anchors_xc_yc = anchors
...@@ -428,12 +418,12 @@ class RBoxAssigner(object): ...@@ -428,12 +418,12 @@ class RBoxAssigner(object):
# (4) assign max_iou as pos_ids >=0 # (4) assign max_iou as pos_ids >=0
anchor_gt_bbox_iou_inds = anchor_gt_bbox_inds[gt_bbox_anchor_iou_inds] anchor_gt_bbox_iou_inds = anchor_gt_bbox_inds[gt_bbox_anchor_iou_inds]
# gt_bbox_anchor_iou_inds = np.logical_and(gt_bbox_anchor_iou_inds, anchor_gt_bbox_iou >= min_iou_thr) # gt_bbox_anchor_iou_inds = np.logical_and(gt_bbox_anchor_iou_inds, anchor_gt_bbox_iou >= min_iou_thr)
labels[gt_bbox_anchor_iou_inds] = gt_lables[anchor_gt_bbox_iou_inds] labels[gt_bbox_anchor_iou_inds] = gt_labels[anchor_gt_bbox_iou_inds]
# (5) assign >= pos_iou_thr as pos_ids # (5) assign >= pos_iou_thr as pos_ids
iou_pos_iou_thr_ids = anchor_gt_bbox_iou >= pos_iou_thr iou_pos_iou_thr_ids = anchor_gt_bbox_iou >= pos_iou_thr
iou_pos_iou_thr_ids_box_inds = anchor_gt_bbox_inds[iou_pos_iou_thr_ids] iou_pos_iou_thr_ids_box_inds = anchor_gt_bbox_inds[iou_pos_iou_thr_ids]
labels[iou_pos_iou_thr_ids] = gt_lables[iou_pos_iou_thr_ids_box_inds] labels[iou_pos_iou_thr_ids] = gt_labels[iou_pos_iou_thr_ids_box_inds]
return anchor_gt_bbox_inds, anchor_gt_bbox_iou, labels return anchor_gt_bbox_inds, anchor_gt_bbox_iou, labels
def __call__(self, anchors, gt_bboxes, gt_labels, is_crowd): def __call__(self, anchors, gt_bboxes, gt_labels, is_crowd):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册