提交 5305956e 编写于 作者: D dengkaipeng

add gtscore

上级 2c3a3b36
......@@ -140,7 +140,7 @@ def rescale_box_in_input_image(boxes, im_shape, input_size):
boxes[boxes<0] = 0
return boxes
def box_crop(boxes, labels, crop, img_shape):
def box_crop(boxes, labels, scores, crop, img_shape):
x, y, w, h = map(float, crop)
im_w, im_h = map(float, img_shape)
......@@ -160,10 +160,11 @@ def box_crop(boxes, labels, crop, img_shape):
mask = np.logical_and(mask, (boxes[:, :2] < boxes[:, 2:]).all(axis=1))
boxes = boxes * np.expand_dims(mask.astype('float32'), axis=1)
labels = labels * mask.astype('float32')
scores = scores * mask.astype('float32')
boxes[:, 0], boxes[:, 2] = (boxes[:, 0] + boxes[:, 2]) / 2 / w, (boxes[:, 2] - boxes[:, 0]) / w
boxes[:, 1], boxes[:, 3] = (boxes[:, 1] + boxes[:, 3]) / 2 / h, (boxes[:, 3] - boxes[:, 1]) / h
return boxes, labels, mask.sum()
return boxes, labels, scores, mask.sum()
def get_yolo_detection(preds, anchors, class_num, img_width, img_height):
"""Get yolo box, confidence score, class label from Darknet53 output"""
......
......@@ -51,7 +51,7 @@ def random_distort(img):
return img
def random_crop(img, boxes, labels, scales=[0.3, 1.0], max_ratio=2.0, constraints=None, max_trial=50):
def random_crop(img, boxes, labels, scores, scales=[0.3, 1.0], max_ratio=2.0, constraints=None, max_trial=50):
if len(boxes) == 0:
return img, boxes
......@@ -65,7 +65,7 @@ def random_crop(img, boxes, labels, scales=[0.3, 1.0], max_ratio=2.0, constraint
(0.0, 1.0)]
img = Image.fromarray(img)
w, h = map(float, img.size)
w, h = img.size
crops = [(0, 0, w, h)]
for min_iou, max_iou in constraints:
for _ in range(max_trial):
......@@ -79,8 +79,8 @@ def random_crop(img, boxes, labels, scales=[0.3, 1.0], max_ratio=2.0, constraint
crop_box = np.array([[
(crop_x + crop_w / 2.0) / w,
(crop_y + crop_h / 2.0) / h,
crop_w / w,
crop_h /h
crop_w / float(w),
crop_h /float(h)
]])
iou = box_utils.box_iou_xywh(crop_box, boxes)
......@@ -90,14 +90,14 @@ def random_crop(img, boxes, labels, scales=[0.3, 1.0], max_ratio=2.0, constraint
while crops:
crop = crops.pop(np.random.randint(0, len(crops)))
crop_boxes, crop_labels, box_num = box_utils.box_crop(boxes, labels, crop, (w, h))
crop_boxes, crop_labels, crop_scores, box_num = box_utils.box_crop(boxes, labels, scores, crop, (w, h))
if box_num < 1:
continue
img = img.crop((crop[0], crop[1], crop[0] + crop[2], crop[1] + crop[3])).resize(img.size, Image.LANCZOS)
img = np.asarray(img)
return img, crop_boxes, crop_labels
return img, crop_boxes, crop_labels, crop_scores
img = np.asarray(img)
return img, boxes, labels
return img, boxes, labels, scores
def random_flip(img, gtboxes, thresh=0.5):
if random.random() > thresh:
......@@ -151,13 +151,15 @@ def random_expand(img, gtboxes, max_ratio=4., fill=None, keep_ratio=True, thresh
return out_img.astype('uint8'), gtboxes
def image_mixup(img1, gtboxes1, gtlabels1, img2, gtboxes2, gtlabels2):
def image_mixup(img1, gtboxes1, gtlabels1, gtscores1, img2, gtboxes2, gtlabels2, gtscores2):
factor = np.random.beta(1.5, 1.5)
factor = max(0.0, min(1.0, factor))
if factor >= 1.0:
return img1, gtboxes1, gtlabels1
if factor <= 0.0:
return img2, gtboxes2, gtlabels2
gtscores1 = gtscores1 * factor
gtscores2 = gtscores2 * (1.0 - factor)
h = max(img1.shape[0], img2.shape[0])
w = max(img1.shape[1], img2.shape[1])
......@@ -166,10 +168,12 @@ def image_mixup(img1, gtboxes1, gtlabels1, img2, gtboxes2, gtlabels2):
img[:img2.shape[0], :img2.shape[1], :] += img2.astype('float32') * (1.0 - factor)
gtboxes = np.zeros_like(gtboxes1)
gtlabels = np.zeros_like(gtlabels1)
gtscores = np.zeros_like(gtscores1)
gt_valid_mask1 = np.logical_and(gtboxes1[:, 2] > 0, gtboxes1[:, 3] > 0)
gtboxes1 = gtboxes1[gt_valid_mask1]
gtlabels1 = gtlabels1[gt_valid_mask1]
gtscores1 = gtscores1[gt_valid_mask1]
gtboxes1[:, 0] = gtboxes1[:, 0] * img1.shape[1] / w
gtboxes1[:, 1] = gtboxes1[:, 1] * img1.shape[0] / h
gtboxes1[:, 2] = gtboxes1[:, 2] * img1.shape[1] / w
......@@ -178,23 +182,28 @@ def image_mixup(img1, gtboxes1, gtlabels1, img2, gtboxes2, gtlabels2):
gt_valid_mask2 = np.logical_and(gtboxes2[:, 2] > 0, gtboxes2[:, 3] > 0)
gtboxes2 = gtboxes2[gt_valid_mask2]
gtlabels2 = gtlabels2[gt_valid_mask2]
gtscores2 = gtscores2[gt_valid_mask2]
gtboxes2[:, 0] = gtboxes2[:, 0] * img2.shape[1] / w
gtboxes2[:, 1] = gtboxes2[:, 1] * img2.shape[0] / h
gtboxes2[:, 2] = gtboxes2[:, 2] * img2.shape[1] / w
gtboxes2[:, 3] = gtboxes2[:, 3] * img2.shape[0] / h
gtboxes_all = np.concatenate((gtboxes1, gtboxes2), axis=0)
gtlabels_all = np.concatenate((gtlabels1, gtlabels2), axis=0)
gtscores_all = np.concatenate((gtscores1, gtscores2), axis=0)
gt_num = min(len(gtboxes), len(gtboxes_all))
gtboxes[:gt_num] = gtboxes_all[:gt_num]
gtlabels[:gt_num] = gtlabels_all[:gt_num]
return img.astype('uint8'), gtboxes, gtlabels
gtscores[:gt_num] = gtscores_all[:gt_num]
return img.astype('uint8'), gtboxes, gtlabels, gtscores
def image_augment(img, gtboxes, gtlabels, size, means=None):
def image_augment(img, gtboxes, gtlabels, gtscores, size, means=None):
img = random_distort(img)
img, gtboxes = random_expand(img, gtboxes, fill=means)
img, gtboxes, gtlabels = random_crop(img, gtboxes, gtlabels)
img, gtboxes, gtlabels, gtscores = random_crop(img, gtboxes, gtlabels, gtscores)
img = random_interp(img, size)
img, gtboxes = random_flip(img, gtboxes)
return img.astype('float32'), gtboxes.astype('float32'), gtlabels.astype('int32')
return img.astype('float32'), gtboxes.astype('float32'), \
gtlabels.astype('int32'), gtscores.astype('float32')
......@@ -204,6 +204,7 @@ class YOLOv3(object):
x=out,
gtbox=self.gtbox,
gtlabel=self.gtlabel,
gtscore=self.gtscore,
anchors=anchors,
anchor_mask=anchor_mask,
class_num=class_num,
......@@ -232,11 +233,11 @@ class YOLOv3(object):
if self.use_pyreader and self.is_train:
self.py_reader = fluid.layers.py_reader(
capacity=64,
shapes = [[-1] + self.image_shape, [-1, cfg.max_box_num, 4], [-1, cfg.max_box_num]],
lod_levels=[0, 0, 0],
dtypes=['float32'] * 2 + ['int32'],
shapes = [[-1] + self.image_shape, [-1, cfg.max_box_num, 4], [-1, cfg.max_box_num], [-1, cfg.max_box_num]],
lod_levels=[0, 0, 0, 0],
dtypes=['float32'] * 2 + ['int32'] + ['float32'],
use_double_buffer=True)
self.image, self.gtbox, self.gtlabel = fluid.layers.read_file(self.py_reader)
self.image, self.gtbox, self.gtlabel, self.gtscore = fluid.layers.read_file(self.py_reader)
else:
self.image = fluid.layers.data(
name='image', shape=self.image_shape, dtype='float32'
......@@ -247,6 +248,9 @@ class YOLOv3(object):
self.gtlabel = fluid.layers.data(
name='gtlabel', shape=[cfg.max_box_num], dtype='int32'
)
self.gtscore = fluid.layers.data(
name='gtscore', shape=[cfg.max_box_num], dtype='float32'
)
self.im_shape = fluid.layers.data(
name="im_shape", shape=[2], dtype='int32')
self.im_id = fluid.layers.data(
......@@ -255,7 +259,7 @@ class YOLOv3(object):
def feeds(self):
if not self.is_train:
return [self.image, self.im_id, self.im_shape]
return [self.image, self.gtbox, self.gtlabel]
return [self.image, self.gtbox, self.gtlabel, self.gtscore]
def get_hyperparams(self):
return self.hyperparams
......
......@@ -41,7 +41,7 @@ class DataSetReader(object):
# cfg.data_dir = "dataset/coco"
# cfg.train_file_list = 'annotations/instances_val2017.json'
# cfg.train_data_dir = 'val2017'
cfg.dataset = "coco2017"
# cfg.dataset = "coco2017"
if 'coco2014' in cfg.dataset:
cfg.train_file_list = 'annotations/instances_train2014.json'
cfg.train_data_dir = 'train2014'
......@@ -170,16 +170,20 @@ class DataSetReader(object):
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
gt_boxes = img['gt_boxes'].copy()
gt_labels = img['gt_labels'].copy()
gt_scores = np.ones_like(gt_labels)
if mixup_img:
mixup_im = cv2.imread(mixup_img['image'])
mixup_im = cv2.cvtColor(mixup_im, cv2.COLOR_BGR2RGB)
mixup_gt_boxes = mixup_img['gt_boxes'].copy()
mixup_gt_labels = mixup_img['gt_labels'].copy()
im, gt_boxes, gt_labels = image_utils.image_mixup(im, gt_boxes, gt_labels, \
mixup_im, mixup_gt_boxes, mixup_gt_labels)
mixup_gt_scores = np.ones_like(mixup_gt_labels)
im, gt_boxes, gt_labels, gt_scores = image_utils.image_mixup(im, gt_boxes, \
gt_labels, gt_scores, mixup_im, mixup_gt_boxes, mixup_gt_labels, \
mixup_gt_scores)
im, gt_boxes, gt_labels, gt_scores = image_utils.image_augment(im, gt_boxes, gt_labels, gt_scores, size, mean)
im, gt_boxes, gt_labels = image_utils.image_augment(im, gt_boxes, gt_labels, size, mean)
# h, w, _ = im.shape
# im_scale_x = size / float(w)
# im_scale_y = size / float(h)
......@@ -190,7 +194,7 @@ class DataSetReader(object):
out_img = (im / 255.0 - mean) / std
out_img = out_img.transpose((2, 0, 1)).astype('float32')
return (out_img, gt_boxes, gt_labels)
return (out_img, gt_boxes, gt_labels, gt_scores)
def get_img_size(size, random_sizes=[]):
if len(random_sizes):
......@@ -222,9 +226,9 @@ class DataSetReader(object):
total_read_cnt += 1
if read_cnt % len(imgs) == 0 and shuffle:
np.random.shuffle(imgs)
im, gt_boxes, gt_labels = img_reader_with_augment(img, img_size, cfg.pixel_means, cfg.pixel_stds, mixup_img)
batch_out.append((im, gt_boxes, gt_labels))
# img_ids.append(img['id'])
im, gt_boxes, gt_labels, gt_scores = img_reader_with_augment(img, img_size, cfg.pixel_means, cfg.pixel_stds, mixup_img)
batch_out.append((im, gt_boxes, gt_labels, gt_scores))
# img_ids.append((img['id'], mixup_img['id'] if mixup_img else -1))
if len(batch_out) == batch_size:
# print("img_ids: ", img_ids)
......
......@@ -112,7 +112,7 @@ def parse_args():
# TRAIN TEST INFER
add_arg('input_size', int, 608, "Image input size of YOLOv3.")
add_arg('random_shape', bool, False, "Resize to random shape for train reader")
add_arg('no_mixup_iter', int, 4000, "Disable mixup in last N iter.")
add_arg('no_mixup_iter', int, 40000, "Disable mixup in last N iter.")
add_arg('valid_thresh', float, 0.01, "Valid confidence score for NMS.")
add_arg('nms_thresh', float, 0.45, "NMS threshold.")
add_arg('nms_topk', int, 400, "The number of boxes to perform NMS.")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册