提交 8ce95746 编写于 作者: W wangxinxin08

finish data preprocess ops

上级 2ae4ac30
......@@ -12,22 +12,27 @@ TrainReader:
sample_transforms:
- !DecodeImage
to_rgb: True
# with_mosaic: True
# - !MosaicImage
# offset: 0.3
# mosaic_scale: [0.8, 1.0]
# sample_scale: [0.8, 1.0]
# sample_flip: 0.5
# use_cv2: true
# interp: 2
- !NormalizeBox {}
with_mosaic: True
- !Mosaic
target_size: 640
- !RandomPerspective
degree: 0
translate: 0.1
scale: 0.5
shear: 0.0
perspective: 0.0
border: [-320, -320]
- !RandomFlipImage
prob: 0.5
is_normalized: false
- !RandomHSV
hgain: 0.015
sgain: 0.7
vgain: 0.4
- !PadBox
num_max_boxes: 50
- !BboxXYXY2XYWH {}
batch_transforms:
- !RandomShape
sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640]
random_inter: True
- !NormalizeImage
mean: [0.0, 0.0, 0.0]
std: [1.0, 1.0, 1.0]
......@@ -37,10 +42,6 @@ TrainReader:
to_bgr: false
channel_first: True
# focus: false
- !Gt2YoloTarget
anchor_masks: [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
[59, 119], [116, 90], [156, 198], [373, 326]]
downsample_ratios: [8, 16, 32]
batch_size: 2
mosaic_prob: 0.3
......@@ -49,6 +50,9 @@ TrainReader:
drop_last: true
worker_num: 8
bufsize: 16
target_size: 640
rect: false
pad: 0.5
use_process: true
EvalReader:
......
......@@ -21,6 +21,7 @@ import copy
import functools
import collections
import traceback
import random
import numpy as np
import logging
......@@ -209,7 +210,8 @@ class Reader(object):
memsize='3G',
inputs_def=None,
devices_num=1,
num_trainers=1):
num_trainers=1,
mosaic=False):
self._dataset = dataset
self._roidbs = self._dataset.get_roidb()
if rect:
......@@ -219,9 +221,9 @@ class Reader(object):
s = []
for i, rec in enumerate(self._roidbs):
s.append([rec['h'], rec['w']])
s = np.array(s)
ar = s[:, 0] / s[:, 1] # h / w
ar = s[:, 0] / s[:, 1] # h / w
irect = ar.argsort()
ar = ar[irect]
......@@ -233,16 +235,19 @@ class Reader(object):
shapes[i] = [maxi, 1]
elif mini > 1:
shapes[i] = [1, 1 / mini]
batch_shapes = np.ceil(np.array(shapes) * target_size / stride + pad) * stride
batch_shapes = np.ceil(
np.array(shapes) * target_size / stride + pad) * stride
new_roidbs = [self._roidbs[j] for j in irect]
self._roidbs = new_roidbs
for i, j in enumerate(bi):
self._roidbs[i].update({'new_shape': batch_shapes[j]})
self._roidbs[i].update({'new_shape': batch_shapes[j]})
self._fields = copy.deepcopy(inputs_def[
'fields']) if inputs_def else None
self.mosaic = mosaic
# transform
self._sample_transforms = Compose(sample_transforms,
{'fields': self._fields})
......@@ -387,6 +392,17 @@ class Reader(object):
if self._load_img:
sample['image'] = self._load_image(sample['im_file'])
if self.mosaic:
sample['mosaic'] = []
for idx in [
random.randint(0, len(self.indexes) - 1)
for _ in range(3)
]:
rec = copy.deepcopy(self._roidbs[idx])
if self._load_img:
rec['image'] = self._load_image(rec['im_file'])
sample['mosaic'].append(rec)
if self._epoch < self._mixup_epoch:
num = len(self.indexes)
mix_idx = np.random.randint(1, num)
......
......@@ -462,3 +462,56 @@ def gaussian2D(shape, sigma_x=1, sigma_y=1):
sigma_y)))
h[h < np.finfo(h.dtype).eps * h.max()] = 0
return h
def transform_bbox(bbox,
label,
M,
w,
h,
area_thr=0.25,
wh_thr=2,
ar_thr=20,
perspective=False):
"""
Transfrom bbox according to tranformation matrix M
"""
# rotate bbox
n = len(bbox)
xy = np.ones((n * 4, 3), dtype=np.float32)
xy[:, :2] = bbox[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)
# xy = xy @ M.T
xy = np.matmul(xy, M.T)
if perspective:
xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)
else:
xy = xy[:, :2].reshape(n, 8)
# get new bboxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
new_bbox = np.concatenate(
(x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip boxes
new_bbox, mask = clip_bbox(new_bbox, w, h, area_thr)
new_label = label[mask]
return new_bbox, new_label
def clip_bbox(bbox, w, h, area_thr=0.25, wh_thr=2, ar_thr=20):
"""
clip bbox according to w and h
"""
# clip boxes
area1 = (bbox[:, 2:4] - bbox[:, 0:2]).prod(1)
bbox[:, [0, 2]] = bbox[:, [0, 2]].clip(0, w)
bbox[:, [1, 3]] = bbox[:, [1, 3]].clip(0, h)
# compute
area2 = (bbox[:, 2:4] - bbox[:, 0:2]).prod(1)
area_ratio = area2 / (area1 + 1e-16)
wh = bbox[:, 2:4] - bbox[:, 0:2]
ar_ratio = np.maximum(wh[:, 1] / (wh[:, 0] + 1e-16),
wh[:, 0] / (wh[:, 1] + 1e-16))
mask = (area_ratio > area_thr) & (
(wh > wh_thr).all(1)) & (ar_ratio < ar_thr)
bbox = bbox[mask]
return bbox, mask
\ No newline at end of file
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册