提交 8ce95746 编写于 作者: W wangxinxin08

finish data preprocess ops

上级 2ae4ac30
...@@ -12,22 +12,27 @@ TrainReader: ...@@ -12,22 +12,27 @@ TrainReader:
sample_transforms: sample_transforms:
- !DecodeImage - !DecodeImage
to_rgb: True to_rgb: True
# with_mosaic: True with_mosaic: True
# - !MosaicImage - !Mosaic
# offset: 0.3 target_size: 640
# mosaic_scale: [0.8, 1.0] - !RandomPerspective
# sample_scale: [0.8, 1.0] degree: 0
# sample_flip: 0.5 translate: 0.1
# use_cv2: true scale: 0.5
# interp: 2 shear: 0.0
- !NormalizeBox {} perspective: 0.0
border: [-320, -320]
- !RandomFlipImage
prob: 0.5
is_normalized: false
- !RandomHSV
hgain: 0.015
sgain: 0.7
vgain: 0.4
- !PadBox - !PadBox
num_max_boxes: 50 num_max_boxes: 50
- !BboxXYXY2XYWH {} - !BboxXYXY2XYWH {}
batch_transforms: batch_transforms:
- !RandomShape
sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640]
random_inter: True
- !NormalizeImage - !NormalizeImage
mean: [0.0, 0.0, 0.0] mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0] std: [1.0, 1.0, 1.0]
...@@ -37,10 +42,6 @@ TrainReader: ...@@ -37,10 +42,6 @@ TrainReader:
to_bgr: false to_bgr: false
channel_first: True channel_first: True
# focus: false # focus: false
- !Gt2YoloTarget
anchor_masks: [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
[59, 119], [116, 90], [156, 198], [373, 326]]
downsample_ratios: [8, 16, 32] downsample_ratios: [8, 16, 32]
batch_size: 2 batch_size: 2
mosaic_prob: 0.3 mosaic_prob: 0.3
...@@ -49,6 +50,9 @@ TrainReader: ...@@ -49,6 +50,9 @@ TrainReader:
drop_last: true drop_last: true
worker_num: 8 worker_num: 8
bufsize: 16 bufsize: 16
target_size: 640
rect: false
pad: 0.5
use_process: true use_process: true
EvalReader: EvalReader:
......
...@@ -21,6 +21,7 @@ import copy ...@@ -21,6 +21,7 @@ import copy
import functools import functools
import collections import collections
import traceback import traceback
import random
import numpy as np import numpy as np
import logging import logging
...@@ -209,7 +210,8 @@ class Reader(object): ...@@ -209,7 +210,8 @@ class Reader(object):
memsize='3G', memsize='3G',
inputs_def=None, inputs_def=None,
devices_num=1, devices_num=1,
num_trainers=1): num_trainers=1,
mosaic=False):
self._dataset = dataset self._dataset = dataset
self._roidbs = self._dataset.get_roidb() self._roidbs = self._dataset.get_roidb()
if rect: if rect:
...@@ -234,7 +236,8 @@ class Reader(object): ...@@ -234,7 +236,8 @@ class Reader(object):
elif mini > 1: elif mini > 1:
shapes[i] = [1, 1 / mini] shapes[i] = [1, 1 / mini]
batch_shapes = np.ceil(np.array(shapes) * target_size / stride + pad) * stride batch_shapes = np.ceil(
np.array(shapes) * target_size / stride + pad) * stride
new_roidbs = [self._roidbs[j] for j in irect] new_roidbs = [self._roidbs[j] for j in irect]
self._roidbs = new_roidbs self._roidbs = new_roidbs
for i, j in enumerate(bi): for i, j in enumerate(bi):
...@@ -243,6 +246,8 @@ class Reader(object): ...@@ -243,6 +246,8 @@ class Reader(object):
self._fields = copy.deepcopy(inputs_def[ self._fields = copy.deepcopy(inputs_def[
'fields']) if inputs_def else None 'fields']) if inputs_def else None
self.mosaic = mosaic
# transform # transform
self._sample_transforms = Compose(sample_transforms, self._sample_transforms = Compose(sample_transforms,
{'fields': self._fields}) {'fields': self._fields})
...@@ -387,6 +392,17 @@ class Reader(object): ...@@ -387,6 +392,17 @@ class Reader(object):
if self._load_img: if self._load_img:
sample['image'] = self._load_image(sample['im_file']) sample['image'] = self._load_image(sample['im_file'])
if self.mosaic:
sample['mosaic'] = []
for idx in [
random.randint(0, len(self.indexes) - 1)
for _ in range(3)
]:
rec = copy.deepcopy(self._roidbs[idx])
if self._load_img:
rec['image'] = self._load_image(rec['im_file'])
sample['mosaic'].append(rec)
if self._epoch < self._mixup_epoch: if self._epoch < self._mixup_epoch:
num = len(self.indexes) num = len(self.indexes)
mix_idx = np.random.randint(1, num) mix_idx = np.random.randint(1, num)
......
...@@ -462,3 +462,56 @@ def gaussian2D(shape, sigma_x=1, sigma_y=1): ...@@ -462,3 +462,56 @@ def gaussian2D(shape, sigma_x=1, sigma_y=1):
sigma_y))) sigma_y)))
h[h < np.finfo(h.dtype).eps * h.max()] = 0 h[h < np.finfo(h.dtype).eps * h.max()] = 0
return h return h
def transform_bbox(bbox,
label,
M,
w,
h,
area_thr=0.25,
wh_thr=2,
ar_thr=20,
perspective=False):
"""
Transfrom bbox according to tranformation matrix M
"""
# rotate bbox
n = len(bbox)
xy = np.ones((n * 4, 3), dtype=np.float32)
xy[:, :2] = bbox[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)
# xy = xy @ M.T
xy = np.matmul(xy, M.T)
if perspective:
xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)
else:
xy = xy[:, :2].reshape(n, 8)
# get new bboxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
new_bbox = np.concatenate(
(x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip boxes
new_bbox, mask = clip_bbox(new_bbox, w, h, area_thr)
new_label = label[mask]
return new_bbox, new_label
def clip_bbox(bbox, w, h, area_thr=0.25, wh_thr=2, ar_thr=20):
"""
clip bbox according to w and h
"""
# clip boxes
area1 = (bbox[:, 2:4] - bbox[:, 0:2]).prod(1)
bbox[:, [0, 2]] = bbox[:, [0, 2]].clip(0, w)
bbox[:, [1, 3]] = bbox[:, [1, 3]].clip(0, h)
# compute
area2 = (bbox[:, 2:4] - bbox[:, 0:2]).prod(1)
area_ratio = area2 / (area1 + 1e-16)
wh = bbox[:, 2:4] - bbox[:, 0:2]
ar_ratio = np.maximum(wh[:, 1] / (wh[:, 0] + 1e-16),
wh[:, 0] / (wh[:, 1] + 1e-16))
mask = (area_ratio > area_thr) & (
(wh > wh_thr).all(1)) & (ar_ratio < ar_thr)
bbox = bbox[mask]
return bbox, mask
\ No newline at end of file
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册