提交 6c19ddfd 编写于 作者: D dengkaipeng

use yolo_box op

上级 635ca681
......@@ -26,10 +26,6 @@ from matplotlib import pyplot as plt
from PIL import Image
def sigmoid(x):
"""Perform sigmoid to input numpy array"""
return 1.0 / (1.0 + np.exp(-1.0 * x))
def coco_anno_box_to_center_relative(box, img_height, img_width):
"""
Convert COCO annotations box with format [x1, y1, w, h] to
......@@ -118,30 +114,6 @@ def box_iou_xyxy(box1, box2):
return inter_area / (b1_area + b2_area - inter_area)
def rescale_box_in_input_image(boxes, im_shape, input_size):
"""Scale (x1, x2, y1, y2) box of yolo output to input image"""
h, w = im_shape
# max_dim = max(h , w)
# boxes = boxes * max_dim / input_size
# dim_diff = np.abs(h - w)
# pad = dim_diff // 2
# if h <= w:
# boxes[:, 1] -= pad
# boxes[:, 3] -= pad
# else:
# boxes[:, 0] -= pad
# boxes[:, 2] -= pad
fx = w / input_size
fy = h / input_size
boxes[:, 0] *= fx
boxes[:, 1] *= fy
boxes[:, 2] *= fx
boxes[:, 3] *= fy
boxes[boxes<0] = 0
boxes[:, 2][boxes[:, 2] > (w - 1)] = w - 1
boxes[:, 3][boxes[:, 3] > (h - 1)] = h - 1
return boxes
def box_crop(boxes, labels, scores, crop, img_shape):
x, y, w, h = map(float, crop)
im_w, im_h = map(float, img_shape)
......@@ -169,161 +141,6 @@ def box_crop(boxes, labels, scores, crop, img_shape):
return boxes, labels, scores, mask.sum()
def get_yolo_detection(preds, anchors, class_num, img_width, img_height):
"""Get yolo box, confidence score, class label from Darknet53 output"""
preds_n = np.array(preds)
n, c, h, w = preds_n.shape
anchor_num = len(anchors) // 2
preds_n = preds_n.reshape([n, anchor_num, class_num + 5, h, w]) \
.transpose((0, 1, 3, 4, 2))
preds_n[:, :, :, :, :2] = sigmoid(preds_n[:, :, :, :, :2])
preds_n[:, :, :, :, 4:] = sigmoid(preds_n[:, :, :, :, 4:])
pred_boxes = preds_n[:, :, :, :, :4]
pred_confs = preds_n[:, :, :, :, 4]
pred_scores = preds_n[:, :, :, :, 5:] * np.expand_dims(pred_confs, axis=4)
grid_x = np.tile(np.arange(w).reshape((1, w)), (h, 1))
grid_y = np.tile(np.arange(h).reshape((h, 1)), (1, w))
anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors), 2)]
anchors_s = np.array([(an_w, an_h) for an_w, an_h in anchors])
anchor_w = anchors_s[:, 0:1].reshape((1, anchor_num, 1, 1))
anchor_h = anchors_s[:, 1:2].reshape((1, anchor_num, 1, 1))
pred_boxes[:, :, :, :, 0] += grid_x
pred_boxes[:, :, :, :, 1] += grid_y
pred_boxes[:, :, :, :, 2] = np.exp(pred_boxes[:, :, :, :, 2]) * anchor_w
pred_boxes[:, :, :, :, 3] = np.exp(pred_boxes[:, :, :, :, 3]) * anchor_h
pred_boxes[:, :, :, :, 0] = pred_boxes[:, :, :, :, 0] * img_width / w
pred_boxes[:, :, :, :, 1] = pred_boxes[:, :, :, :, 1] * img_height / h
pred_boxes[:, :, :, :, 2] = pred_boxes[:, :, :, :, 2]
pred_boxes[:, :, :, :, 3] = pred_boxes[:, :, :, :, 3]
pred_boxes = box_xywh_to_xyxy(pred_boxes)
pred_boxes = np.tile(np.expand_dims(pred_boxes, axis=4), (1, 1, 1, 1, class_num, 1))
pred_labels = np.zeros_like(pred_scores) + np.arange(class_num)
return (
pred_boxes.reshape((n, -1, 4)),
pred_scores.reshape((n, -1)),
pred_labels.reshape((n, -1)),
)
def get_all_yolo_pred(outputs, yolo_anchors, yolo_classes, input_shape):
all_pred_boxes = []
all_pred_scores = []
all_pred_labels = []
for output, anchors, classes in zip(outputs, yolo_anchors, yolo_classes):
pred_boxes, pred_scores, pred_labels = get_yolo_detection(output, anchors, classes, input_shape[0], input_shape[1])
all_pred_boxes.append(pred_boxes)
all_pred_labels.append(pred_labels)
all_pred_scores.append(pred_scores)
pred_boxes = np.concatenate(all_pred_boxes, axis=1)
pred_scores = np.concatenate(all_pred_scores, axis=1)
pred_labels = np.concatenate(all_pred_labels, axis=1)
return (pred_boxes, pred_scores, pred_labels)
def calc_nms_box_new(pred_boxes, pred_scores, pred_labels, valid_thresh=0.01, nms_thresh=0.4, nms_topk=400, nms_posk=100):
output_boxes = np.empty((0, 4))
output_scores = np.empty(0)
output_labels = np.empty(0)
for boxes, labels, scores in zip(pred_boxes, pred_labels, pred_scores):
valid_mask = scores > valid_thresh
boxes = boxes[valid_mask]
scores = scores[valid_mask]
labels = labels[valid_mask]
score_sort_index = np.argsort(scores)[::-1]
boxes = boxes[score_sort_index][:nms_topk]
scores = scores[score_sort_index][:nms_topk]
labels = labels[score_sort_index][:nms_topk]
for c in np.unique(labels):
c_mask = labels == c
c_boxes = boxes[c_mask]
c_scores = scores[c_mask]
detect_boxes = []
detect_scores = []
detect_labels = []
while c_boxes.shape[0]:
detect_boxes.append(c_boxes[0])
detect_scores.append(c_scores[0])
detect_labels.append(c)
if c_boxes.shape[0] == 1:
break
iou = box_iou_xyxy(detect_boxes[-1].reshape((1, 4)), c_boxes[1:])
c_boxes = c_boxes[1:][iou < nms_thresh]
c_scores = c_scores[1:][iou < nms_thresh]
output_boxes = np.append(output_boxes, detect_boxes, axis=0)
output_scores = np.append(output_scores, detect_scores)
output_labels = np.append(output_labels, detect_labels)
return (output_boxes, output_scores, output_labels)
def calc_nms_box(pred_boxes, pred_confs, pred_labels, im_shape, input_size, valid_thresh=0.8, nms_thresh=0.4, nms_topk=400, nms_posk=100):
"""
Removes detections which confidence score under valid_thresh and perform
Non-Maximun Suppression to filtered boxes
"""
_, box_num, class_num = pred_labels.shape
pred_boxes = box_xywh_to_xyxy(pred_boxes)
output_boxes = np.empty((0, 4))
output_scores = np.empty(0)
output_labels = np.empty((0))
for i, (boxes, confs, classes) in enumerate(zip(pred_boxes, pred_confs, pred_labels)):
conf_mask = confs > valid_thresh
if conf_mask.sum() == 0:
continue
boxes = boxes[conf_mask]
classes = classes[conf_mask]
confs = confs[conf_mask]
conf_sort_index = np.argsort(confs)[::-1]
boxes = boxes[conf_sort_index][:nms_topk]
classes = classes[conf_sort_index][:nms_topk]
confs = confs[conf_sort_index][:nms_topk]
cls_score = np.max(classes, axis=1)
cls_pred = np.argmax(classes, axis=1)
for c in np.unique(cls_pred):
c_mask = cls_pred == c
c_confs = confs[c_mask]
c_boxes = boxes[c_mask]
c_scores = cls_score[c_mask]
c_score_index = np.argsort(c_scores)
c_boxes_s = c_boxes[c_score_index[::-1]]
c_confs_s = c_confs[c_score_index[::-1]]
c_scores_s = c_scores[c_score_index[::-1]]
detect_boxes = []
detect_scores = []
detect_labels = []
while c_boxes_s.shape[0]:
detect_boxes.append(c_boxes_s[0])
detect_scores.append(c_scores_s[0])
detect_labels.append(c)
if c_boxes_s.shape[0] == 1:
break
iou = box_iou_xyxy(detect_boxes[-1].reshape((1, 4)), c_boxes_s[1:])
c_boxes_s = c_boxes_s[1:][iou < nms_thresh]
c_confs_s = c_confs_s[1:][iou < nms_thresh]
c_scores_s = c_scores_s[1:][iou < nms_thresh]
output_boxes = np.append(output_boxes, detect_boxes, axis=0)
output_scores = np.append(output_scores, detect_scores)
output_labels = np.append(output_labels, detect_labels)
output_boxes = output_boxes[:nms_posk]
output_scores = output_scores[:nms_posk]
output_labels = output_labels[:nms_posk]
output_boxes = rescale_box_in_input_image(output_boxes, im_shape, input_size)
return (output_boxes, output_scores, output_labels)
def draw_boxes_on_image(image_path, boxes, scores, labels, label_names, score_thresh=0.5):
image = np.array(Image.open(image_path))
plt.figure()
......
......@@ -20,7 +20,6 @@ import time
import numpy as np
import paddle
import paddle.fluid as fluid
import box_utils
import reader
import models
from utility import print_arguments, parse_args
......@@ -64,6 +63,8 @@ def eval():
def get_pred_result(boxes, scores, labels, im_id):
result = []
for box, score, label in zip(boxes, scores, labels):
if score < 0.05:
continue
x1, y1, x2, y2 = box
w = x2 - x1 + 1
h = y2 - y1 + 1
......@@ -72,41 +73,41 @@ def eval():
res = {
'image_id': im_id,
'category_id': label_ids[int(label)],
'bbox': bbox,
'score': score
'bbox': map(float, bbox),
'score': float(score)
}
result.append(res)
return result
dts_res = []
fetch_list = outputs
fetch_list = [outputs]
total_time = 0
for batch_id, batch_data in enumerate(test_reader()):
start_time = time.time()
batch_outputs = exe.run(
fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(batch_data),
return_numpy=False)
for data, outputs in zip(batch_data, batch_outputs):
im_id = data[1]
im_shape = data[2]
pred_boxes, pred_scores, pred_labels = box_utils.get_all_yolo_pred(
batch_outputs, yolo_anchors, yolo_classes, (input_size, input_size))
boxes, scores, labels = box_utils.calc_nms_box_new(pred_boxes, pred_scores, pred_labels,
cfg.valid_thresh, cfg.nms_thresh)
boxes = box_utils.rescale_box_in_input_image(boxes, im_shape, input_size)
return_numpy=False,
use_program_cache=True)
lod = batch_outputs[0].lod()[0]
nmsed_boxes = np.array(batch_outputs[0])
if nmsed_boxes.shape[1] != 6:
continue
for i in range(len(lod) - 1):
im_id = batch_data[i][1]
start = lod[i]
end = lod[i + 1]
if start == end:
continue
nmsed_box = nmsed_boxes[start:end, :]
labels = nmsed_box[:, 0]
scores = nmsed_box[:, 1]
boxes = nmsed_box[:, 2:6]
dts_res += get_pred_result(boxes, scores, labels, im_id)
end_time = time.time()
print("batch id: {}, time: {}".format(batch_id, end_time - start_time))
total_time += (end_time - start_time)
if cfg.debug:
if '2014' in cfg.dataset:
img_name = "COCO_val2014_{:012d}.jpg".format(im_id)
box_utils.draw_boxes_on_image(os.path.join("./dataset/coco/val2014", img_name), boxes, scores, labels, label_names)
if '2017' in cfg.dataset:
img_name = "{:012d}.jpg".format(im_id)
box_utils.draw_boxes_on_image(os.path.join("./dataset/coco/val2017", img_name), boxes, scores, labels, label_names)
total_time += end_time - start_time
with open("yolov3_result.json", 'w') as outfile:
json.dump(dts_res, outfile)
......
......@@ -34,7 +34,8 @@ def infer():
fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)
# yapf: enable
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
fetch_list = outputs
fetch_list = [outputs]
# fetch_list = outputs
image_names = []
if cfg.image_name is not None:
image_names.append(cfg.image_name)
......@@ -50,13 +51,14 @@ def infer():
outputs = exe.run(
fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(data),
return_numpy=True)
return_numpy=False)
bboxes = np.array(outputs[0])
if bboxes.shape[1] != 6:
print("No object found in {}".format(image_name))
labels = bboxes[:, 0].astype('int32')
scores = bboxes[:, 1].astype('float32')
boxes = bboxes[:, 2:].astype('float32')
pred_boxes, pred_scores, pred_labels = box_utils.get_all_yolo_pred(outputs, yolo_anchors,
yolo_classes, (input_size, input_size))
boxes, scores, labels = box_utils.calc_nms_box_new(pred_boxes, pred_scores, pred_labels,
cfg.valid_thresh, cfg.nms_thresh)
boxes = box_utils.rescale_box_in_input_image(boxes, im_shape, input_size)
path = os.path.join(cfg.image_path, image_name)
box_utils.draw_boxes_on_image(path, boxes, scores, labels, label_names, cfg.draw_thresh)
......
......@@ -99,6 +99,8 @@ class YOLOv3(object):
self.use_random = use_random
self.outputs = []
self.losses = []
self.boxes = []
self.scores = []
self.downsample = 32
def build_model(self):
......@@ -213,6 +215,18 @@ class YOLOv3(object):
# use_label_smooth=False,
name="yolo_loss"+str(i))
self.losses.append(fluid.layers.reduce_mean(loss))
else:
boxes, scores = fluid.layers.yolo_box(
x=out,
img_size=self.im_shape,
anchors=mask_anchors,
class_num=class_num,
conf_thresh=cfg.valid_thresh,
downsample_ratio=self.downsample,
name="yolo_box"+str(i))
self.boxes.append(boxes)
self.scores.append(fluid.layers.transpose(scores, perm=[0, 2, 1]))
self.downsample //= 2
layer_outputs.append(out)
......@@ -221,7 +235,17 @@ class YOLOv3(object):
return sum(self.losses)
def get_pred(self):
return self.outputs
yolo_boxes = fluid.layers.concat(self.boxes, axis=1)
yolo_scores = fluid.layers.concat(self.scores, axis=2)
return fluid.layers.multiclass_nms(
bboxes=yolo_boxes,
scores=yolo_scores,
score_threshold=cfg.valid_thresh,
nms_top_k=cfg.nms_topk,
keep_top_k=cfg.nms_posk,
nms_threshold=cfg.nms_thresh,
background_label=-1,
name="multiclass_nms")
def get_yolo_anchors(self):
return self.yolo_anchors
......
......@@ -156,7 +156,7 @@ class DataSetReader(object):
h, w, _ = im.shape
im_scale_x = size / float(w)
im_scale_y = size / float(h)
out_img = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=cv2.INTER_LINEAR)
out_img = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=cv2.INTER_CUBIC)
mean = np.array(mean).reshape((1, 1, -1))
std = np.array(std).reshape((1, 1, -1))
out_img = (out_img / 255.0 - mean) / std
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册