提交 825cea1d 编写于 作者: D dengkaipeng

add augment

上级 aa9ff438
...@@ -90,6 +90,7 @@ TrainReader: ...@@ -90,6 +90,7 @@ TrainReader:
- !DecodeImage - !DecodeImage
to_rgb: True to_rgb: True
with_mosaic: True with_mosaic: True
with_mixup: True
- !MosaicImage - !MosaicImage
offset: 0.3 offset: 0.3
mosaic_scale: [0.8, 1.0] mosaic_scale: [0.8, 1.0]
...@@ -97,6 +98,15 @@ TrainReader: ...@@ -97,6 +98,15 @@ TrainReader:
sample_flip: 0.5 sample_flip: 0.5
use_cv2: true use_cv2: true
interp: 2 interp: 2
- !MixupImage
alpha: 1.5
beta: 1.5
- !ColorDistort {}
- !RandomExpand
fill_value: [123.675, 116.28, 103.53]
- !RandomCrop {}
- !RandomFlipImage
is_normalized: false
- !NormalizeBox {} - !NormalizeBox {}
- !PadBox - !PadBox
num_max_boxes: 90 num_max_boxes: 90
......
...@@ -89,6 +89,7 @@ TrainReader: ...@@ -89,6 +89,7 @@ TrainReader:
- !DecodeImage - !DecodeImage
to_rgb: True to_rgb: True
with_mosaic: True with_mosaic: True
with_mixup: True
- !MosaicImage - !MosaicImage
offset: 0.3 offset: 0.3
mosaic_scale: [0.8, 1.0] mosaic_scale: [0.8, 1.0]
...@@ -96,6 +97,15 @@ TrainReader: ...@@ -96,6 +97,15 @@ TrainReader:
sample_flip: 0.5 sample_flip: 0.5
use_cv2: true use_cv2: true
interp: 2 interp: 2
- !MixupImage
alpha: 1.5
beta: 1.5
- !ColorDistort {}
- !RandomExpand
fill_value: [123.675, 116.28, 103.53]
- !RandomCrop {}
- !RandomFlipImage
is_normalized: false
- !NormalizeBox {} - !NormalizeBox {}
- !PadBox - !PadBox
num_max_boxes: 90 num_max_boxes: 90
...@@ -124,6 +134,7 @@ TrainReader: ...@@ -124,6 +134,7 @@ TrainReader:
num_classes: 20 num_classes: 20
iou_thresh: 0.213 iou_thresh: 0.213
batch_size: 8 batch_size: 8
mixup_epoch: 250
mosaic_prob: 0.3 mosaic_prob: 0.3
mosaic_epoch: 300 mosaic_epoch: 300
shuffle: true shuffle: true
......
...@@ -87,6 +87,11 @@ class BaseOperator(object): ...@@ -87,6 +87,11 @@ class BaseOperator(object):
return str(self._id) return str(self._id)
def is_mosaiced(context):
return isinstance(context, dict) and \
'mosaic' in context and context['mosaic']
@register_op @register_op
class DecodeImage(BaseOperator): class DecodeImage(BaseOperator):
def __init__(self, to_rgb=True, with_mosaic=False, with_mixup=False): def __init__(self, to_rgb=True, with_mosaic=False, with_mixup=False):
...@@ -670,6 +675,7 @@ class RandomDistort(BaseOperator): ...@@ -670,6 +675,7 @@ class RandomDistort(BaseOperator):
def __call__(self, sample, context): def __call__(self, sample, context):
"""random distort the image""" """random distort the image"""
ops = [ ops = [
self.random_brightness, self.random_contrast, self.random_brightness, self.random_contrast,
self.random_saturation, self.random_hue self.random_saturation, self.random_hue
...@@ -795,6 +801,7 @@ class CropImage(BaseOperator): ...@@ -795,6 +801,7 @@ class CropImage(BaseOperator):
Returns: Returns:
sample: the image, bounding box are replaced. sample: the image, bounding box are replaced.
""" """
assert 'image' in sample, "image data not found" assert 'image' in sample, "image data not found"
im = sample['image'] im = sample['image']
gt_bbox = sample['gt_bbox'] gt_bbox = sample['gt_bbox']
...@@ -1279,9 +1286,10 @@ class MosaicImage(BaseOperator): ...@@ -1279,9 +1286,10 @@ class MosaicImage(BaseOperator):
def __call__(self, sample, context=None): def __call__(self, sample, context=None):
if 'mosaic0' not in sample: if 'mosaic0' not in sample:
sample = self.crop(sample, 0, 0) # sample = self.crop(sample, 0, 0)
if self.sample_flip: # if self.sample_flip:
sample = self.sample_flip_fun(sample, self.sample_flip) # sample = self.sample_flip_fun(sample, self.sample_flip)
context['mosaic'] = False
return sample return sample
h = sample['h'] h = sample['h']
w = sample['w'] w = sample['w']
...@@ -1346,6 +1354,7 @@ class MosaicImage(BaseOperator): ...@@ -1346,6 +1354,7 @@ class MosaicImage(BaseOperator):
sample.pop('mosaic1') sample.pop('mosaic1')
sample.pop('mosaic2') sample.pop('mosaic2')
context['mosaic'] = True
return sample return sample
...@@ -1533,6 +1542,9 @@ class MixupImage(BaseOperator): ...@@ -1533,6 +1542,9 @@ class MixupImage(BaseOperator):
return img.astype('uint8') return img.astype('uint8')
def __call__(self, sample, context=None): def __call__(self, sample, context=None):
if is_mosaiced(context):
return sample
if 'mixup' not in sample: if 'mixup' not in sample:
return sample return sample
factor = np.random.beta(self.alpha, self.beta) factor = np.random.beta(self.alpha, self.beta)
...@@ -2044,6 +2056,9 @@ class RandomCrop(BaseOperator): ...@@ -2044,6 +2056,9 @@ class RandomCrop(BaseOperator):
return crop_segms return crop_segms
def __call__(self, sample, context=None): def __call__(self, sample, context=None):
if is_mosaiced(context):
return sample
if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0: if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
return sample return sample
......
...@@ -55,7 +55,8 @@ class CSPDarkNet(object): ...@@ -55,7 +55,8 @@ class CSPDarkNet(object):
return fluid.layers.log(1 + expf) return fluid.layers.log(1 + expf)
def _mish(self, input): def _mish(self, input):
return input * fluid.layers.tanh(self._softplus(input)) return fluid.layers.mish(input)
# return input * fluid.layers.tanh(self._softplus(input))
def _conv_norm(self, def _conv_norm(self,
input, input,
......
...@@ -64,7 +64,8 @@ class IouLoss(object): ...@@ -64,7 +64,8 @@ class IouLoss(object):
downsample_ratio, downsample_ratio,
batch_size, batch_size,
ioup=None, ioup=None,
eps=1.e-10): eps=1.e-10,
scale_x_y=1.0):
''' '''
Args: Args:
x | y | w | h ([Variables]): the output of yolov3 for encoded x|y|w|h x | y | w | h ([Variables]): the output of yolov3 for encoded x|y|w|h
...@@ -75,9 +76,9 @@ class IouLoss(object): ...@@ -75,9 +76,9 @@ class IouLoss(object):
eps (float): the decimal to prevent the denominator eqaul zero eps (float): the decimal to prevent the denominator eqaul zero
''' '''
pred = self._bbox_transform(x, y, w, h, anchors, downsample_ratio, pred = self._bbox_transform(x, y, w, h, anchors, downsample_ratio,
batch_size, False) batch_size, False, scale_x_y)
gt = self._bbox_transform(tx, ty, tw, th, anchors, downsample_ratio, gt = self._bbox_transform(tx, ty, tw, th, anchors, downsample_ratio,
batch_size, True) batch_size, True, 1.0)
iouk = self._iou(pred, gt, ioup, eps) iouk = self._iou(pred, gt, ioup, eps)
if self.loss_square: if self.loss_square:
loss_iou = 1. - iouk * iouk loss_iou = 1. - iouk * iouk
...@@ -145,7 +146,7 @@ class IouLoss(object): ...@@ -145,7 +146,7 @@ class IouLoss(object):
return diou_term + ciou_term return diou_term + ciou_term
def _bbox_transform(self, dcx, dcy, dw, dh, anchors, downsample_ratio, def _bbox_transform(self, dcx, dcy, dw, dh, anchors, downsample_ratio,
batch_size, is_gt): batch_size, is_gt, scale_x_y):
grid_x = int(self._MAX_WI / downsample_ratio) grid_x = int(self._MAX_WI / downsample_ratio)
grid_y = int(self._MAX_HI / downsample_ratio) grid_y = int(self._MAX_HI / downsample_ratio)
an_num = len(anchors) // 2 an_num = len(anchors) // 2
...@@ -179,8 +180,11 @@ class IouLoss(object): ...@@ -179,8 +180,11 @@ class IouLoss(object):
cy.gradient = True cy.gradient = True
else: else:
dcx_sig = fluid.layers.sigmoid(dcx) dcx_sig = fluid.layers.sigmoid(dcx)
cx = fluid.layers.elementwise_add(dcx_sig, gi) / grid_x_act
dcy_sig = fluid.layers.sigmoid(dcy) dcy_sig = fluid.layers.sigmoid(dcy)
if abs(scale_x_y - 1.0) > 1e-6:
dcx_sig = scale_x_y * dcx_sig - 0.5 * (scale_x_y - 1.)
dcy_sig = scale_x_y * dcy_sig - 0.5 * (scale_x_y - 1.)
cx = fluid.layers.elementwise_add(dcx_sig, gi) / grid_x_act
cy = fluid.layers.elementwise_add(dcy_sig, gj) / grid_y_act cy = fluid.layers.elementwise_add(dcy_sig, gj) / grid_y_act
anchor_w_ = [anchors[i] for i in range(0, len(anchors)) if i % 2 == 0] anchor_w_ = [anchors[i] for i in range(0, len(anchors)) if i % 2 == 0]
......
...@@ -147,9 +147,13 @@ class YOLOv3Loss(object): ...@@ -147,9 +147,13 @@ class YOLOv3Loss(object):
loss_w = fluid.layers.reduce_sum(loss_w, dim=[1, 2, 3]) loss_w = fluid.layers.reduce_sum(loss_w, dim=[1, 2, 3])
loss_h = fluid.layers.abs(h - th) * tscale_tobj loss_h = fluid.layers.abs(h - th) * tscale_tobj
loss_h = fluid.layers.reduce_sum(loss_h, dim=[1, 2, 3]) loss_h = fluid.layers.reduce_sum(loss_h, dim=[1, 2, 3])
scale_x_y = self.scale_x_y if not isinstance(
self.scale_x_y, Sequence) else self.scale_x_y[i]
if self._iou_loss is not None: if self._iou_loss is not None:
loss_iou = self._iou_loss(x, y, w, h, tx, ty, tw, th, anchors, loss_iou = self._iou_loss(x, y, w, h, tx, ty, tw, th, anchors,
downsample, self._batch_size) downsample, self._batch_size,
scale_x_y)
loss_iou = loss_iou * tscale_tobj loss_iou = loss_iou * tscale_tobj
loss_iou = fluid.layers.reduce_sum(loss_iou, dim=[1, 2, 3]) loss_iou = fluid.layers.reduce_sum(loss_iou, dim=[1, 2, 3])
loss_ious.append(fluid.layers.reduce_mean(loss_iou)) loss_ious.append(fluid.layers.reduce_mean(loss_iou))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册