未验证 提交 847c4c58 编写于 作者: W Wenyu 提交者: GitHub

Update box head (#6804) (#6823)

* add flag loss_normalize_pos

* add faster vitdet

* update faster vitdet
上级 32be7960
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'./_base_/reader.yml',
'./_base_/optimizer_base_1x.yml'
]
weights: output/faster_rcnn_vit_base_fpn_cae_1x_coco/model_final
# runtime
log_iter: 100
snapshot_epoch: 1
find_unused_parameters: True
use_gpu: true
norm_type: sync_bn
OptimizerBuilder:
optimizer:
weight_decay: 0.05
# reader
worker_num: 2
TrainReader:
batch_size: 1
# model
architecture: FasterRCNN
FasterRCNN:
backbone: VisionTransformer
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
bbox_post_process: BBoxPostProcess
VisionTransformer:
patch_size: 16
embed_dim: 768
depth: 12
num_heads: 12
mlp_ratio: 4
qkv_bias: True
drop_rate: 0.0
drop_path_rate: 0.2
init_values: 0.1
final_norm: False
use_rel_pos_bias: False
use_sincos_pos_emb: True
epsilon: 0.000001 # 1e-6
out_indices: [3, 5, 7, 11]
with_fpn: True
pretrained: https://bj.bcebos.com/v1/paddledet/models/pretrained/vit_base_cae_pretrained.pdparams
FPN:
out_channel: 256
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 1000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
BBoxHead:
head: TwoFCHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
use_random: True
TwoFCHead:
out_channel: 1024
BBoxPostProcess:
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
...@@ -184,7 +184,8 @@ class BBoxHead(nn.Layer): ...@@ -184,7 +184,8 @@ class BBoxHead(nn.Layer):
with_pool=False, with_pool=False,
num_classes=80, num_classes=80,
bbox_weight=[10., 10., 5., 5.], bbox_weight=[10., 10., 5., 5.],
bbox_loss=None): bbox_loss=None,
loss_normalize_pos=False):
super(BBoxHead, self).__init__() super(BBoxHead, self).__init__()
self.head = head self.head = head
self.roi_extractor = roi_extractor self.roi_extractor = roi_extractor
...@@ -196,6 +197,7 @@ class BBoxHead(nn.Layer): ...@@ -196,6 +197,7 @@ class BBoxHead(nn.Layer):
self.num_classes = num_classes self.num_classes = num_classes
self.bbox_weight = bbox_weight self.bbox_weight = bbox_weight
self.bbox_loss = bbox_loss self.bbox_loss = bbox_loss
self.loss_normalize_pos = loss_normalize_pos
self.bbox_score = nn.Linear( self.bbox_score = nn.Linear(
in_channel, in_channel,
...@@ -250,8 +252,13 @@ class BBoxHead(nn.Layer): ...@@ -250,8 +252,13 @@ class BBoxHead(nn.Layer):
deltas = self.bbox_delta(feat) deltas = self.bbox_delta(feat)
if self.training: if self.training:
loss = self.get_loss(scores, deltas, targets, rois, loss = self.get_loss(
self.bbox_weight) scores,
deltas,
targets,
rois,
self.bbox_weight,
loss_normalize_pos=self.loss_normalize_pos)
return loss, bbox_feat return loss, bbox_feat
else: else:
pred = self.get_prediction(scores, deltas) pred = self.get_prediction(scores, deltas)
......
...@@ -250,7 +250,7 @@ class CascadeHead(BBoxHead): ...@@ -250,7 +250,7 @@ class CascadeHead(BBoxHead):
if self.training: if self.training:
deltas = deltas[paddle.arange(deltas.shape[0]), labels] deltas = deltas[paddle.arange(deltas.shape[0]), labels]
else: else:
deltas = deltas[(deltas * F.one_hot( deltas = deltas[((deltas + 10000) * F.one_hot(
labels, num_classes=self.num_classes).unsqueeze(-1) != 0 labels, num_classes=self.num_classes).unsqueeze(-1) != 0
).nonzero(as_tuple=True)].reshape( ).nonzero(as_tuple=True)].reshape(
[deltas.shape[0], 4]) [deltas.shape[0], 4])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册