未验证 提交 4b053712 编写于 作者: C CodesFarmer 提交者: GitHub

Add IoU-loss for yolov3 (#192)

* add iou loss to yolov3
* modify the comment and delete redundant yml file
* fix the low speed bug in dropblock module
上级 97088feb
architecture: YOLOv3
use_gpu: true
max_iters: 55000
log_smooth_window: 20
save_dir: output
snapshot_iter: 10000
metric: COCO
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_obj365_pretrained.tar
weights: output/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco/model_final
num_classes: 80
use_fine_grained_loss: true
YOLOv3:
backbone: ResNet
yolo_head: YOLOv3Head
use_fine_grained_loss: true
ResNet:
norm_type: sync_bn
freeze_at: 0
freeze_norm: false
norm_decay: 0.
depth: 50
feature_maps: [3, 4, 5]
variant: d
dcn_v2_stages: [5]
YOLOv3Head:
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
norm_decay: 0.
yolo_loss: YOLOv3Loss
nms:
background_label: -1
keep_top_k: 100
nms_threshold: 0.45
nms_top_k: 1000
normalized: false
score_threshold: 0.01
YOLOv3Loss:
batch_size: 8
ignore_thresh: 0.7
label_smooth: false
use_fine_grained_loss: true
iou_loss: IouLoss
IouLoss:
loss_weight: 2.5
max_height: 608
max_width: 608
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 40000
- 50000
- !LinearWarmup
start_factor: 0.
steps: 4000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
_READER_: '../yolov3_reader.yml'
......@@ -18,8 +18,10 @@ from . import yolo_loss
from . import smooth_l1_loss
from . import giou_loss
from . import diou_loss
from . import iou_loss
from .yolo_loss import *
from .smooth_l1_loss import *
from .giou_loss import *
from .diou_loss import *
from .iou_loss import *
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import NumpyArrayInitializer
from paddle import fluid
from ppdet.core.workspace import register, serializable
__all__ = ['IouLoss']
@register
@serializable
class IouLoss(object):
"""
iou loss, see https://arxiv.org/abs/1908.03851
loss = 1.0 - iou * iou
Args:
loss_weight (float): iou loss weight, default is 2.5
max_height (int): max height of input to support random shape input
max_width (int): max width of input to support random shape input
"""
def __init__(self,
loss_weight=2.5,
max_height=608,
max_width=608):
self._loss_weight = loss_weight
self._MAX_HI = max_height
self._MAX_WI = max_width
def __call__(self, x, y, w, h, tx, ty, tw, th,
anchors, downsample_ratio, batch_size, eps=1.e-10):
'''
Args:
x | y | w | h ([Variables]): the output of yolov3 for encoded x|y|w|h
tx |ty |tw |th ([Variables]): the target of yolov3 for encoded x|y|w|h
anchors ([float]): list of anchors for current output layer
downsample_ratio (float): the downsample ratio for current output layer
batch_size (int): training batch size
eps (float): the decimal to prevent the denominator eqaul zero
'''
x1, y1, x2, y2 = self._bbox_transform(x, y, w, h, anchors,
downsample_ratio, batch_size, False)
x1g, y1g, x2g, y2g = self._bbox_transform(tx, ty, tw, th,
anchors, downsample_ratio, batch_size, True)
x2 = fluid.layers.elementwise_max(x1, x2)
y2 = fluid.layers.elementwise_max(y1, y2)
xkis1 = fluid.layers.elementwise_max(x1, x1g)
ykis1 = fluid.layers.elementwise_max(y1, y1g)
xkis2 = fluid.layers.elementwise_min(x2, x2g)
ykis2 = fluid.layers.elementwise_min(y2, y2g)
xc1 = fluid.layers.elementwise_min(x1, x1g)
yc1 = fluid.layers.elementwise_min(y1, y1g)
xc2 = fluid.layers.elementwise_max(x2, x2g)
yc2 = fluid.layers.elementwise_max(y2, y2g)
intsctk = (xkis2 - xkis1) * (ykis2 - ykis1)
intsctk = intsctk * fluid.layers.greater_than(
xkis2, xkis1) * fluid.layers.greater_than(ykis2, ykis1)
unionk = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsctk + eps
iouk = intsctk / unionk
loss_iou = 1. - iouk * iouk
loss_iou = loss_iou * self._loss_weight
return loss_iou
def _bbox_transform(self, dcx, dcy, dw, dh, anchors, downsample_ratio, batch_size, is_gt):
grid_x = int(self._MAX_WI / downsample_ratio)
grid_y = int(self._MAX_HI / downsample_ratio)
an_num = len(anchors) // 2
shape_fmp = fluid.layers.shape(dcx)
shape_fmp.stop_gradient = True
# generate the grid_w x grid_h center of feature map
idx_i = np.array([[i for i in range(grid_x)]])
idx_j = np.array([[j for j in range(grid_y)]]).transpose()
gi_np = np.repeat(idx_i, grid_y, axis=0)
gi_np = np.reshape(gi_np, newshape=[1, 1, grid_y, grid_x])
gi_np = np.tile(gi_np, reps=[batch_size, an_num, 1, 1])
gj_np = np.repeat(idx_j, grid_x, axis=1)
gj_np = np.reshape(gj_np, newshape=[1, 1, grid_y, grid_x])
gj_np = np.tile(gj_np, reps=[batch_size, an_num, 1, 1])
gi_max = self._create_tensor_from_numpy(gi_np.astype(np.float32))
gi = fluid.layers.crop(x=gi_max, shape=dcx)
gi.stop_gradient = True
gj_max = self._create_tensor_from_numpy(gj_np.astype(np.float32))
gj = fluid.layers.crop(x=gj_max, shape=dcx)
gj.stop_gradient = True
grid_x_act = fluid.layers.cast(shape_fmp[3], dtype="float32")
grid_x_act.stop_gradient = True
grid_y_act = fluid.layers.cast(shape_fmp[2], dtype="float32")
grid_y_act.stop_gradient = True
if is_gt:
cx = fluid.layers.elementwise_add(dcx, gi) / grid_x_act
cx.gradient = True
cy = fluid.layers.elementwise_add(dcy, gj) / grid_y_act
cy.gradient = True
else:
dcx_sig = fluid.layers.sigmoid(dcx)
cx = fluid.layers.elementwise_add(dcx_sig, gi) / grid_x_act
dcy_sig = fluid.layers.sigmoid(dcy)
cy = fluid.layers.elementwise_add(dcy_sig, gj) / grid_y_act
anchor_w_ = [anchors[i] for i in range(0, len(anchors)) if i % 2 == 0]
anchor_w_np = np.array(anchor_w_)
anchor_w_np = np.reshape(anchor_w_np, newshape=[1, an_num, 1, 1])
anchor_w_np = np.tile(anchor_w_np, reps=[batch_size, 1, grid_y, grid_x])
anchor_w_max = self._create_tensor_from_numpy(anchor_w_np.astype(np.float32))
anchor_w = fluid.layers.crop(x=anchor_w_max, shape=dcx)
anchor_w.stop_gradient = True
anchor_h_ = [anchors[i] for i in range(0, len(anchors)) if i % 2 == 1]
anchor_h_np = np.array(anchor_h_)
anchor_h_np = np.reshape(anchor_h_np, newshape=[1, an_num, 1, 1])
anchor_h_np = np.tile(anchor_h_np, reps=[batch_size, 1, grid_y, grid_x])
anchor_h_max = self._create_tensor_from_numpy(anchor_h_np.astype(np.float32))
anchor_h = fluid.layers.crop(x=anchor_h_max, shape=dcx)
anchor_h.stop_gradient = True
# e^tw e^th
exp_dw = fluid.layers.exp(dw)
exp_dh = fluid.layers.exp(dh)
pw = fluid.layers.elementwise_mul(exp_dw, anchor_w) / \
(grid_x_act * downsample_ratio)
ph = fluid.layers.elementwise_mul(exp_dh, anchor_h) / \
(grid_y_act * downsample_ratio)
if is_gt:
exp_dw.stop_gradient = True
exp_dh.stop_gradient = True
pw.stop_gradient = True
ph.stop_gradient = True
x1 = cx - 0.5 * pw
y1 = cy - 0.5 * ph
x2 = cx + 0.5 * pw
y2 = cy + 0.5 * ph
if is_gt:
x1.stop_gradient = True
y1.stop_gradient = True
x2.stop_gradient = True
y2.stop_gradient = True
return x1, y1, x2, y2
def _create_tensor_from_numpy(self, numpy_array):
paddle_array = fluid.layers.create_parameter(
attr=ParamAttr(),
shape=numpy_array.shape,
dtype=numpy_array.dtype,
default_initializer=NumpyArrayInitializer(numpy_array))
paddle_array.stop_gradient = True
return paddle_array
......@@ -34,17 +34,20 @@ class YOLOv3Loss(object):
use_fine_grained_loss (bool): whether use fine grained YOLOv3 loss
instead of fluid.layers.yolov3_loss
"""
__inject__ = ['iou_loss']
__shared__ = ['use_fine_grained_loss']
def __init__(self,
batch_size=8,
ignore_thresh=0.7,
label_smooth=True,
use_fine_grained_loss=False):
use_fine_grained_loss=False,
iou_loss=None):
self._batch_size = batch_size
self._ignore_thresh = ignore_thresh
self._label_smooth = label_smooth
self._use_fine_grained_loss = use_fine_grained_loss
self._iou_loss = iou_loss
def __call__(self, outputs, gt_box, gt_label, gt_score, targets, anchors,
anchor_masks, mask_anchors, num_classes, prefix_name):
......@@ -104,7 +107,10 @@ class YOLOv3Loss(object):
"YOLOv3 output layer number not equal target number"
downsample = 32
if self._iou_loss is None:
loss_xys, loss_whs, loss_objs, loss_clss = [], [], [], []
else:
loss_xys, loss_whs, loss_ious, loss_objs, loss_clss = [], [], [], [], []
for i, (output, target,
anchors) in enumerate(zip(outputs, targets, mask_anchors)):
an_num = len(anchors) // 2
......@@ -124,6 +130,12 @@ class YOLOv3Loss(object):
loss_w = fluid.layers.reduce_sum(loss_w, dim=[1, 2, 3])
loss_h = fluid.layers.abs(h - th) * tscale_tobj
loss_h = fluid.layers.reduce_sum(loss_h, dim=[1, 2, 3])
if self._iou_loss is not None:
loss_iou = self._iou_loss(x, y, w, h, tx, ty, tw, th,
anchors, downsample, self._batch_size)
loss_iou = loss_iou * tscale_tobj
loss_iou = fluid.layers.reduce_sum(loss_iou, dim=[1, 2, 3])
loss_ious.append(fluid.layers.reduce_mean(loss_iou))
loss_obj_pos, loss_obj_neg = self._calc_obj_loss(
output, obj, tobj, gt_box, self._batch_size, anchors,
......@@ -140,13 +152,15 @@ class YOLOv3Loss(object):
loss_clss.append(fluid.layers.reduce_mean(loss_cls))
downsample //= 2
return {
losses_all = {
"loss_xy": fluid.layers.sum(loss_xys),
"loss_wh": fluid.layers.sum(loss_whs),
"loss_obj": fluid.layers.sum(loss_objs),
"loss_cls": fluid.layers.sum(loss_clss),
}
if self._iou_loss is not None:
losses_all["loss_iou"] = fluid.layers.sum(loss_ious)
return losses_all
def _split_output(self, output, an_num, num_classes):
"""
......
......@@ -155,14 +155,12 @@ def DropBlock(input, block_size, keep_prob, is_test):
mask = 1.0 - mask_flag
elem_numel = fluid.layers.reduce_prod(input_shape)
elem_numel = fluid.layers.cast(elem_numel, dtype="float32")
elem_numel_tmp = fluid.layers.reshape(elem_numel, [1, 1, 1, 1])
elem_numel_m = fluid.layers.expand_as(elem_numel_tmp, input)
elem_numel_m = fluid.layers.cast(elem_numel, dtype="float32")
elem_numel_m.stop_gradient = True
elem_sum = fluid.layers.reduce_sum(mask)
elem_sum_tmp = fluid.layers.cast(elem_sum, dtype="float32")
elem_sum_tmp = fluid.layers.reshape(elem_sum_tmp, [1, 1, 1, 1])
elem_sum_m = fluid.layers.expand_as(elem_sum_tmp, input)
elem_sum_m = fluid.layers.cast(elem_sum, dtype="float32")
elem_sum_m.stop_gradient = True
output = input * mask * elem_numel_m / elem_sum_m
return output
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册