提交 aa9ff438 编写于 作者: D dengkaipeng

update yolov4

上级 8a95c4b2
......@@ -25,7 +25,7 @@ YOLOv4Head:
anchor_masks: [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
background_label: -1
keep_top_k: -1
keep_top_k: 100
nms_threshold: 0.45
nms_top_k: -1
normalized: true
......@@ -40,21 +40,21 @@ YOLOv3Loss:
# size here should be set as same value as TrainReader.batch_size
batch_size: 8
ignore_thresh: 0.7
label_smooth: true
label_smooth: false
downsample: [8,16,32]
scale_x_y: [1.2, 1.1, 1.05]
iou_loss: IouLoss
match_score: true
ignore_class_score_thresh: 0.25
loss_weight: 0.07
max_height: 608
max_width: 608
ciou_term: true
loss_square: true
loss_square: false
base_lr: 0.0001
base_lr: 0.0013
- !PiecewiseDecay
gamma: 0.1
......@@ -77,8 +77,9 @@ OptimizerBuilder:
_READER_: '../yolov3_reader.yml'
fields: ['image', 'gt_bbox', 'gt_class', 'gt_score', 'im_id']
num_max_boxes: 50
fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
num_max_boxes: 90
use_fine_grained_loss: true
image_dir: train2017
......@@ -88,23 +89,25 @@ TrainReader:
- !DecodeImage
to_rgb: True
- !ColorDistort {}
- !RandomExpand
fill_value: [123.675, 116.28, 103.53]
- !RandomCrop {}
- !RandomFlipImage
is_normalized: false
with_mosaic: True
- !MosaicImage
offset: 0.3
mosaic_scale: [0.8, 1.0]
sample_scale: [0.3, 1.0]
sample_flip: 0.5
use_cv2: true
interp: 2
- !NormalizeBox {}
- !PadBox
num_max_boxes: 50
num_max_boxes: 90
- !BboxXYXY2XYWH {}
- !RandomShape
sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
random_inter: True
- !NormalizeImage
mean: [0.,0.,0.]
std: [1.,1.,1.]
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
is_scale: True
is_channel_first: false
- !Permute
......@@ -119,7 +122,10 @@ TrainReader:
[36, 75], [76, 55], [72, 146],
[142, 110], [192, 243], [459, 401]]
downsample_ratios: [8, 16, 32]
iou_thresh: 0.213
batch_size: 8
mosaic_prob: 0.3
mosaic_epoch: 200
shuffle: true
drop_last: true
worker_num: 8
architecture: YOLOv4
use_gpu: true
max_iters: 140000
max_iters: 70000
log_smooth_window: 20
save_dir: output
snapshot_iter: 1000
snapshot_iter: 2000
metric: VOC
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/CSPDarkNet53_pretrained.pdparams
weights: output/yolov4_cspdarknet_voc/model_final
num_classes: 20
use_fine_grained_loss: true
......@@ -38,29 +38,29 @@ YOLOv3Loss:
# for training batch_size setting, training batch_size setting
# is in configs/yolov3_reader.yml TrainReader.batch_size, batch
# size here should be set as same value as TrainReader.batch_size
batch_size: 4
batch_size: 8
ignore_thresh: 0.7
label_smooth: true
label_smooth: false
downsample: [8,16,32]
scale_x_y: [1.2, 1.1, 1.05]
iou_loss: IouLoss
match_score: true
ignore_class_score_thresh: 0.25
loss_weight: 0.07
max_height: 608
max_width: 608
ciou_term: true
loss_square: true
loss_square: false
base_lr: 0.0001
base_lr: 0.0013
- !PiecewiseDecay
gamma: 0.1
- 110000
- 130000
- 56000
- 62000
- !LinearWarmup
start_factor: 0.
steps: 1000
......@@ -77,8 +77,9 @@ OptimizerBuilder:
_READER_: '../yolov3_reader.yml'
fields: ['image', 'gt_bbox', 'gt_class', 'gt_score', 'im_id']
num_max_boxes: 50
fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
num_max_boxes: 90
use_fine_grained_loss: true
anno_path: trainval.txt
......@@ -87,23 +88,25 @@ TrainReader:
- !DecodeImage
to_rgb: True
- !ColorDistort {}
- !RandomExpand
fill_value: [123.675, 116.28, 103.53]
- !RandomCrop {}
- !RandomFlipImage
is_normalized: false
with_mosaic: True
- !MosaicImage
offset: 0.3
mosaic_scale: [0.8, 1.0]
sample_scale: [0.3, 1.0]
sample_flip: 0.5
use_cv2: true
interp: 2
- !NormalizeBox {}
- !PadBox
num_max_boxes: 50
num_max_boxes: 90
- !BboxXYXY2XYWH {}
- !RandomShape
sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
random_inter: True
- !NormalizeImage
mean: [0.,0.,0.]
std: [1.,1.,1.]
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
is_scale: True
is_channel_first: false
- !Permute
......@@ -118,7 +121,11 @@ TrainReader:
[36, 75], [76, 55], [72, 146],
[142, 110], [192, 243], [459, 401]]
downsample_ratios: [8, 16, 32]
batch_size: 4
num_classes: 20
iou_thresh: 0.213
batch_size: 8
mosaic_prob: 0.3
mosaic_epoch: 300
shuffle: true
drop_last: true
worker_num: 8
......@@ -141,10 +148,10 @@ EvalReader:
to_rgb: True
- !ResizeImage
target_size: 608
interp: 1
interp: 2
- !NormalizeImage
mean: [0., 0., 0.]
std: [1., 1., 1.]
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
is_scale: True
is_channel_first: false
- !PadBox
......@@ -152,12 +159,15 @@ EvalReader:
- !Permute
to_bgr: false
channel_first: True
batch_size: 4
batch_size: 8
drop_empty: false
worker_num: 8
bufsize: 16
image_shape: [3, 608, 608]
fields: ['image', 'im_size', 'im_id']
use_default_label: true
......@@ -169,8 +179,8 @@ TestReader:
target_size: 608
interp: 1
- !NormalizeImage
mean: [0., 0., 0.]
std: [1., 1., 1.]
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
is_scale: True
is_channel_first: false
- !Permute
......@@ -165,6 +165,7 @@ class Reader(object):
drop_last (bool): whether drop last batch or not. Default False.
drop_empty (bool): whether drop sample when it's gt is empty or not.
Default True.
mosaic_epoch(int): mosaic epoc number
mixup_epoch (int): mixup epoc number. Default is -1, meaning
not use mixup.
class_aware_sampling (bool): whether use class-aware sampling or not.
......@@ -190,6 +191,8 @@ class Reader(object):
......@@ -240,6 +243,8 @@ class Reader(object):
self._drop_empty = drop_empty
# sampling
self._mosaic_epoch = mosaic_epoch
self.mosaic_prob = mosaic_prob
self._mixup_epoch = mixup_epoch
self._class_aware_sampling = class_aware_sampling
......@@ -285,6 +290,11 @@ class Reader(object):
if self._shuffle:
if self._mosaic_epoch > 0 and len(self.indexes) < 4:
logger.info("Disable mosaic for dataset samples "
"less than 4 samples")
self.mosaic_epoch = -1
if self._mixup_epoch > 0 and len(self.indexes) < 2:
logger.debug("Disable mixup for dataset samples "
"less than 2 samples")
......@@ -338,6 +348,20 @@ class Reader(object):
if self._load_img:
sample['image'] = self._load_image(sample['im_file'])
if np.random.uniform(0, 1) < self.mosaic_prob:
if self._epoch < self._mosaic_epoch:
num = len(self.indexes)
mosaic_idx = np.random.randint(1, num, size=3)
for i in range(len(mosaic_idx)):
mosaic_idx[i] = self.indexes[(
mosaic_idx[i] + self._pos - 1) % num]
mosaic_name = 'mosaic' + str(i)
sample[mosaic_name] = copy.deepcopy(self._roidbs[
if self._load_img:
sample[mosaic_name]['image'] = self._load_image(
if self._epoch < self._mixup_epoch:
num = len(self.indexes)
mix_idx = np.random.randint(1, num)
......@@ -261,7 +261,8 @@ class Gt2YoloTarget(BaseOperator):
iou = jaccard_overlap(
[0., 0., gw, gh],
[0., 0., an_hw[mask_i, 0], an_hw[mask_i, 1]])
if iou > self.iou_thresh:
if iou > self.iou_thresh and target[idx, 5, gj,
gi] == 0.:
# x, y, w, h, scale
target[idx, 0, gj, gi] = gx * grid_w - gi
target[idx, 1, gj, gi] = gy * grid_h - gj
......@@ -89,7 +89,7 @@ class BaseOperator(object):
class DecodeImage(BaseOperator):
def __init__(self, to_rgb=True, with_mixup=False):
def __init__(self, to_rgb=True, with_mosaic=False, with_mixup=False):
""" Transform the image data to numpy format.
......@@ -99,9 +99,12 @@ class DecodeImage(BaseOperator):
super(DecodeImage, self).__init__()
self.to_rgb = to_rgb
self.with_mosaic = with_mosaic
self.with_mixup = with_mixup
if not isinstance(self.to_rgb, bool):
raise TypeError("{}: input type is invalid.".format(self))
if not isinstance(self.with_mosaic, bool):
raise TypeError("{}: input type is invalid.".format(self))
if not isinstance(self.with_mixup, bool):
raise TypeError("{}: input type is invalid.".format(self))
......@@ -139,6 +142,17 @@ class DecodeImage(BaseOperator):
# make default im_info with [h, w, 1]
sample['im_info'] = np.array(
[im.shape[0], im.shape[1], 1.], dtype=np.float32)
# decode mosaic
if self.with_mosaic and ('mosaic0' in sample or 'mosaic1' in sample or
'mosaic2' in sample):
if 'mosaic0' in sample:
if 'mosaic1' in sample:
if 'mosaic2' in sample:
# decode mixup image
if self.with_mixup and 'mixup' in sample:
self.__call__(sample['mixup'], context)
......@@ -1030,6 +1044,468 @@ class Permute(BaseOperator):
return samples
class MosaicImage(BaseOperator):
def __init__(self,
mosaic_scale=[0.5, 2.0],
sample_scale=[0.5, 2.0],
super(MosaicImage, self).__init__()
self.offset = offset
self.mosaic_prob = mosaic_prob
self.mosaic_scale = mosaic_scale
self.sample_scale = sample_scale
self.sample_flip = sample_flip
self.use_cv2 = use_cv2
self.interp = interp
self.crop = MosaicCrop()
if not (isinstance(self.mosaic_prob, float) and isinstance(
self.offset, float) and isinstance(self.mosaic_scale, list) and
isinstance(self.sample_scale, list) and
isinstance(self.sample_flip, float)):
raise TypeError("{}: input type is invalid.".format(self))
def _mosaic_img(self, img1, img2, img3, img4, h, w, cut_h, cut_w):
img_row1 = np.concatenate([img1, img2], axis=1)
img_row2 = np.concatenate([img3, img4], axis=1)
im = np.concatenate((img_row1, img_row2))
return im
def _mosaic_gt_bbox(self, sample, cut_h, cut_w):
gt_bbox1 = sample['gt_bbox']
gt_bbox2 = sample['mosaic0']['gt_bbox']
gt_bbox3 = sample['mosaic1']['gt_bbox']
gt_bbox4 = sample['mosaic2']['gt_bbox']
new_gt_bbox = []
if len(gt_bbox1):
for box in gt_bbox1:
if len(gt_bbox2):
for box in gt_bbox2:
box[0] += cut_w
box[2] += cut_w
if len(gt_bbox3):
for box in gt_bbox3:
box[1] += cut_h
box[3] += cut_h
if len(gt_bbox4):
for box in gt_bbox4:
box[0] += cut_w
box[1] += cut_h
box[2] += cut_w
box[3] += cut_h
gt_bbox = np.array(new_gt_bbox)
return gt_bbox
def _mosaic_gt_score(self, sample):
gt_score1 = sample['gt_score']
gt_score2 = sample['mosaic0']['gt_score']
gt_score3 = sample['mosaic1']['gt_score']
gt_score4 = sample['mosaic2']['gt_score']
new_gt_score = []
if len(gt_score1):
for score in gt_score1:
if len(gt_score2):
for score in gt_score2:
if len(gt_score3):
for score in gt_score3:
if len(gt_score4):
for score in gt_score4:
gt_score = np.array(new_gt_score)
return gt_score
def _mosaic_gt_class(self, sample):
gt_class1 = sample['gt_class']
gt_class2 = sample['mosaic0']['gt_class']
gt_class3 = sample['mosaic1']['gt_class']
gt_class4 = sample['mosaic2']['gt_class']
new_gt_class = []
if len(gt_class1):
for cla in gt_class1:
if len(gt_class2):
for cla in gt_class2:
if len(gt_class3):
for cla in gt_class3:
if len(gt_class4):
for cla in gt_class4:
gt_class = np.array(new_gt_class)
return gt_class
def _mosaic_is_crowd(self, sample):
is_crowd1 = sample['is_crowd']
is_crowd2 = sample['mosaic0']['is_crowd']
is_crowd3 = sample['mosaic1']['is_crowd']
is_crowd4 = sample['mosaic2']['is_crowd']
new_is_crowd = []
if len(is_crowd1):
for crowd in is_crowd1:
if len(is_crowd2):
for crowd in is_crowd2:
if len(is_crowd3):
for crowd in is_crowd3:
if len(is_crowd4):
for crowd in is_crowd4:
is_crowd = np.array(new_is_crowd)
return is_crowd
def draw_bbox(self, img, gt_bbox, c=255):
for bbox in gt_bbox:
x1, y1, h, w = bbox
cv2.rectangle(img, (x1, y1), (h, w), (0, 0, c), 2)
return img
def sample_scale_fun(self, sample, sample_scale, min_h, min_w):
h = sample['h']
w = sample['w']
new_scale = sample_scale[:]
scale_min = max(min_h / h, min_w / w)
if scale_min > new_scale[1]:
scale = round(scale_min + 0.05, 1)
new_scale[0] = max(new_scale[0], scale_min)
scale = round(random.uniform(*new_scale) + 0.05, 1)
# scale = round(random.uniform(max(sample_scale[0], scale_min), sample_scale[1]), 1)
# int can not ensure new_h or new_w great than min_h or min_w
# new_h = int(sample['h'] * scale)
# new_w = int(sample['w'] * scale)
new_h = int(round(sample['h'] * scale + 0.5))
new_w = int(round(sample['w'] * scale + 0.5))
im = np.array(sample['image'])
if new_h < min_h or new_w < min_w:
print('!!scale error!!', scale, h, min_h, w, min_w)
if self.use_cv2:
im = cv2.resize(im, (new_w, new_h), interpolation=self.interp)
im = im.astype('uint8')
im = Image.fromarray(im)
im = im.resize((new_w, new_h), self.interp)
im = np.array(im)
sample['h'] = new_h
sample['w'] = new_w
sample['image'] = im
sample['gt_bbox'] = sample['gt_bbox'] * scale
return sample
def sample_flip_fun(self, sample, flip_prob):
if random.uniform(0, 1) < flip_prob:
h = sample['h']
w = sample['w']
gt_bbox = sample['gt_bbox']
if gt_bbox.shape == 0:
return sample
old_x1 = gt_bbox[:, 0].copy()
old_x2 = gt_bbox[:, 2].copy()
gt_bbox[:, 0] = np.round(np.clip(w - old_x2 - 1, 0, w - 1), 2)
gt_bbox[:, 2] = np.round(np.clip(w - old_x1 - 1, 0, w - 1), 2)
if gt_bbox.shape[0] != 0 and (gt_bbox[:, 2] < gt_bbox[:, 0]).all():
m = "{}: invalid box, x2 should be greater than x1".format(self)
raise BboxError(m)
sample['gt_bbox'] = np.array(gt_bbox)
sample['image'] = sample['image'][:, ::-1, :]
return sample
def _org_img(self, sample):
img1 = sample['image'].copy()
gt1 = sample['gt_bbox']
img1 = self.draw_bbox(img1, gt1)
img2 = sample['mosaic0']['image'].copy()
gt2 = sample['mosaic0']['gt_bbox']
img2 = self.draw_bbox(img2, gt2)
img3 = sample['mosaic1']['image'].copy()
gt3 = sample['mosaic1']['gt_bbox']
img3 = self.draw_bbox(img3, gt3)
img4 = sample['mosaic2']['image'].copy()
gt4 = sample['mosaic2']['gt_bbox']
img4 = self.draw_bbox(img4, gt4)
img1 = cv2.resize(img1, (200, 200))
img2 = cv2.resize(img2, (200, 200))
img3 = cv2.resize(img3, (200, 200))
img4 = cv2.resize(img4, (200, 200))
img_row1 = np.concatenate([img1, img2], axis=1)
img_row2 = np.concatenate([img3, img4], axis=1)
img = np.concatenate((img_row1, img_row2))
return img
def __call__(self, sample, context=None):
if 'mosaic0' not in sample:
sample = self.crop(sample, 0, 0)
if self.sample_flip:
sample = self.sample_flip_fun(sample, self.sample_flip)
return sample
h = sample['h']
w = sample['w']
if self.mosaic_scale[0]:
scale = round(random.uniform(*self.mosaic_scale), 1)
new_h = int(h * scale)
new_w = int(w * scale)
cut_h = np.random.randint(h * self.offset, h * (1 - self.offset))
cut_w = np.random.randint(w * self.offset, w * (1 - self.offset))
# org_img = self._org_img(sample)
if self.sample_scale[0]:
sample = self.sample_scale_fun(sample, self.sample_scale, cut_h,
sample['mosaic0'] = self.sample_scale_fun(
sample['mosaic0'], self.sample_scale, cut_h, new_w - cut_w)
sample['mosaic1'] = self.sample_scale_fun(
sample['mosaic1'], self.sample_scale, new_h - cut_h, cut_w)
sample['mosaic2'] = self.sample_scale_fun(
sample['mosaic2'], self.sample_scale, new_h - cut_h,
new_w - cut_w)
if self.sample_flip:
sample = self.sample_flip_fun(sample, self.sample_flip)
sample['mosaic0'] = self.sample_flip_fun(sample['mosaic0'],
sample['mosaic1'] = self.sample_flip_fun(sample['mosaic1'],
sample['mosaic2'] = self.sample_flip_fun(sample['mosaic2'],
sample = self.crop(sample, width=cut_w, height=cut_h)
sample['mosaic0'] = self.crop(
sample['mosaic0'], width=new_w - cut_w, height=cut_h)
sample['mosaic1'] = self.crop(
sample['mosaic1'], width=cut_w, height=new_h - cut_h)
sample['mosaic2'] = self.crop(
sample['mosaic2'], width=new_w - cut_w, height=new_h - cut_h)
img = self._mosaic_img(sample['image'], sample['mosaic0']['image'],\
sample['mosaic1']['image'], sample['mosaic2']['image'], new_h, new_w, cut_h, cut_w)
gt_bbox = self._mosaic_gt_bbox(sample, cut_h, cut_w)
gt_score = self._mosaic_gt_score(sample)
gt_class = self._mosaic_gt_class(sample)
is_crowd = self._mosaic_is_crowd(sample)
# image = self.draw_bbox(img, gt_bbox)
# image = cv2.resize(image, (400, 400))
# image = np.concatenate([image, org_img], axis = 1)
# savename = '/mosaicbbox/' + sample['im_file']
# cv2.imwrite(savename, image)
sample['h'] = new_h
sample['w'] = new_w
sample['image'] = img
sample['gt_bbox'] = gt_bbox
sample['gt_class'] = gt_class
sample['gt_score'] = gt_score
sample['is_crowd'] = is_crowd
return sample
class MosaicCrop(object):
"""Random crop image and bboxes.
aspect_ratio (list): aspect ratio of cropped region.
in [min, max] format.
thresholds (list): iou thresholds for decide a valid bbox crop.
scaling (list): ratio between a cropped region and the original image.
in [min, max] format.
num_attempts (int): number of tries before giving up.
allow_no_crop (bool): allow return without actually cropping them.
cover_all_box (bool): ensure all bboxes are covered in the final crop.
def __init__(self,
aspect_ratio=[.5, 2.],
thresholds=[.0, .1, .3, .5, .7, .9],
scaling=[.3, 1.],
super(MosaicCrop, self).__init__()
self.aspect_ratio = aspect_ratio
self.thresholds = thresholds
self.scaling = scaling
self.num_attempts = num_attempts
self.allow_no_crop = allow_no_crop
self.cover_all_box = cover_all_box
def __call__(self, sample, width=0, height=0, context=None):
if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
if width:
sample['image'] = sample['image'][0:height, 0:width]
return sample
h = sample['h']
w = sample['w']
gt_bbox = sample['gt_bbox']
# NOTE Original method attempts to generate one candidate for each
# threshold then randomly sample one from the resulting list.
# Here a short circuit approach is taken, i.e., randomly choose a
# threshold and attempt to find a valid crop, and simply return the
# first one found.
# The probability is not exactly the same, kinda resembling the
# "Monty Hall" problem. Actually carrying out the attempts will affect
# observability (just like opening doors in the "Monty Hall" game).
thresholds = list(self.thresholds)
if self.allow_no_crop and not width:
for thresh in thresholds:
if thresh == 'no_crop':
return sample
found = False
for i in range(self.num_attempts):
if width:
if w < width or h < height:
raise Exception('!!image size is not enough!!', w,
width, h, height)
if w == width: crop_x = 0
else: crop_x = np.random.randint(0, w - width)
if h == height: crop_y = 0
else: crop_y = np.random.randint(0, h - height)
crop_box = [crop_x, crop_y, crop_x + width, crop_y + height]
scale = np.random.uniform(*self.scaling)
min_ar, max_ar = self.aspect_ratio
aspect_ratio = np.random.uniform(
max(min_ar, scale**2), min(max_ar, scale**-2))
crop_h = int(h * scale / np.sqrt(aspect_ratio))
crop_w = int(w * scale * np.sqrt(aspect_ratio))
crop_y = np.random.randint(0, h - crop_h)
crop_x = np.random.randint(0, w - crop_w)
crop_box = [
crop_x, crop_y, crop_x + crop_w, crop_y + crop_h
iou = self._iou_matrix(
gt_bbox, np.array(
[crop_box], dtype=np.float32))
if iou.max() < thresh:
if self.cover_all_box and iou.min() < thresh:
cropped_box, valid_ids = self._crop_box_with_center_constraint(
gt_bbox, np.array(
crop_box, dtype=np.float32))
if valid_ids.size > 0:
found = True
if found:
sample['image'] = self._crop_image(sample['image'], crop_box)
sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
sample['gt_class'] = np.take(
sample['gt_class'], valid_ids, axis=0)
sample['w'] = crop_box[2] - crop_box[0]
sample['h'] = crop_box[3] - crop_box[1]
if 'gt_score' in sample:
sample['gt_score'] = np.take(
sample['gt_score'], valid_ids, axis=0)
if 'is_crowd' in sample:
sample['is_crowd'] = np.take(
sample['is_crowd'], valid_ids, axis=0)
return sample
if width:
crop_box = [0, 0, width, height]
sample['image'] = self._crop_image(sample['image'], crop_box)
sample['gt_bbox'] = np.array([])
sample['gt_class'] = np.array([])
sample['w'] = crop_box[2] - crop_box[0]
sample['h'] = crop_box[3] - crop_box[1]
if 'gt_score' in sample:
sample['gt_score'] = np.array([])
if 'is_crowd' in sample:
sample['is_crowd'] = np.array([])
return sample
return sample
def _iou_matrix(self, a, b):
tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
area_o = (area_a[:, np.newaxis] + area_b - area_i)
return area_i / (area_o + 1e-10)
def _crop_box_with_center_constraint(self, box, crop):
cropped_box = box.copy()
cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2])
cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:])
cropped_box[:, :2] -= crop[:2]
cropped_box[:, 2:] -= crop[:2]
centers = (box[:, :2] + box[:, 2:]) / 2
valid = np.logical_and(crop[:2] <= centers,
centers < crop[2:]).all(axis=1)
valid = np.logical_and(
valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
return cropped_box, np.where(valid)[0]
def _crop_image(self, img, crop):
x1, y1, x2, y2 = crop
return img[y1:y2, x1:x2, :]
class MixupImage(BaseOperator):
def __init__(self, alpha=1.5, beta=1.5):
......@@ -115,8 +115,8 @@ class IouLoss(object):
cx = (x1 + x2) / 2
cy = (y1 + y2) / 2
w = (x2 - x1) + fluid.layers.cast((x2 - x1) == 0, 'float32')
h = (y2 - y1) + fluid.layers.cast((y2 - y1) == 0, 'float32')
w = x2 - x1
h = (y2 - y1) + fluid.layers.cast((y2 - y1) == 0, 'float32') * eps
cxg = (x1g + x2g) / 2
cyg = (y1g + y2g) / 2
......@@ -50,7 +50,7 @@ class YOLOv3Loss(object):
downsample=[32, 16, 8],
self._batch_size = batch_size
self._ignore_thresh = ignore_thresh
self._label_smooth = label_smooth
......@@ -59,7 +59,7 @@ class YOLOv3Loss(object):
self._iou_aware_loss = iou_aware_loss
self.downsample = downsample
self.scale_x_y = scale_x_y
self.match_score = match_score
self.ignore_class_score_thresh = ignore_class_score_thresh
def __call__(self, outputs, gt_box, gt_label, gt_score, targets, anchors,
anchor_masks, mask_anchors, num_classes, prefix_name):
......@@ -167,7 +167,7 @@ class YOLOv3Loss(object):
self.scale_x_y, Sequence) else self.scale_x_y[i]
loss_obj_pos, loss_obj_neg = self._calc_obj_loss(
output, obj, tobj, gt_box, self._batch_size, anchors,
num_classes, downsample, self._ignore_thresh, scale_x_y)
num_classes, downsample, self._ignore_thresh, scale_x_y, cls)
loss_cls = fluid.layers.sigmoid_cross_entropy_with_logits(cls, tcls)
loss_cls = fluid.layers.elementwise_mul(loss_cls, tobj, axis=0)
......@@ -277,7 +277,7 @@ class YOLOv3Loss(object):
return (tx, ty, tw, th, tscale, tobj, tcls)
def _calc_obj_loss(self, output, obj, tobj, gt_box, batch_size, anchors,
num_classes, downsample, ignore_thresh, scale_x_y):
num_classes, downsample, ignore_thresh, scale_x_y, cls):
# A prediction bbox overlap any gt_bbox over ignore_thresh,
# objectness loss will be ignored, process as follows:
......@@ -329,14 +329,16 @@ class YOLOv3Loss(object):
max_iou = fluid.layers.reduce_max(iou, dim=-1)
iou_mask = fluid.layers.cast(max_iou <= ignore_thresh, dtype="float32")
if self.match_score:
max_prob = fluid.layers.reduce_max(prob, dim=-1)
iou_mask = iou_mask * fluid.layers.cast(
max_prob <= 0.25, dtype="float32")
output_shape = fluid.layers.shape(output)
an_num = len(anchors) // 2
iou_mask = fluid.layers.reshape(iou_mask, (-1, an_num, output_shape[2],
if self.ignore_class_score_thresh > 0.:
max_cls = fluid.layers.reduce_max(fluid.layers.sigmoid(cls), dim=-1)
iou_mask = fluid.layers.elementwise_max(
max_cls <= self.ignore_class_score_thresh, dtype="float32"),
iou_mask.stop_gradient = True
# NOTE: tobj holds gt_score, obj_mask holds object existence mask
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册