提交 de4504d6 编写于 作者: B baiyfbupt

refine infer.py

上级 894e7ac0
# saved model
model/ model/
# pretrained model
pretrained/
# used data and label
data/ data/
label/ label/
pretrained/
# log and swap files
*.swp *.swp
*.log
# infer
infer_results/
...@@ -3,6 +3,7 @@ import time ...@@ -3,6 +3,7 @@ import time
import numpy as np import numpy as np
import argparse import argparse
import functools import functools
import datetime
from PIL import Image from PIL import Image
from PIL import ImageDraw from PIL import ImageDraw
...@@ -11,7 +12,7 @@ import paddle.fluid as fluid ...@@ -11,7 +12,7 @@ import paddle.fluid as fluid
import reader import reader
from pyramidbox import PyramidBox from pyramidbox import PyramidBox
from utility import add_arguments, print_arguments from utility import add_arguments, print_arguments
from paddle.fluid.framework import Program, Parameter, default_main_program, Variable
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser) add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable # yapf: disable
...@@ -28,65 +29,282 @@ add_arg('resize_w', int, 0, "The resized image height.") ...@@ -28,65 +29,282 @@ add_arg('resize_w', int, 0, "The resized image height.")
def draw_bounding_box_on_image(image_path, nms_out, confs_threshold): def draw_bounding_box_on_image(image_path, nms_out, confs_threshold):
image = Image.open(image_path) image = Image.open(image_path)
draw = ImageDraw.Draw(image) draw = ImageDraw.Draw(image)
im_width, im_height = image.size
for dt in nms_out: for dt in nms_out:
category_id, score, xmin, ymin, xmax, ymax = dt.tolist() xmin, ymin, xmax, ymax, score = dt
if score < confs_threshold: if score < confs_threshold:
continue continue
bbox = dt[2:] (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
xmin, ymin, xmax, ymax = bbox
(left, right, top, bottom) = (xmin * im_width, xmax * im_width,
ymin * im_height, ymax * im_height)
draw.line( draw.line(
[(left, top), (left, bottom), (right, bottom), (right, top), [(left, top), (left, bottom), (right, bottom), (right, top),
(left, top)], (left, top)],
width=4, width=4,
fill='red') fill='red')
image_name = image_path.split('/')[-1] image_name = image_path.split('/')[-1]
image_class = image_path.split('/')[-2]
print("image with bbox drawed saved as {}".format(image_name)) print("image with bbox drawed saved as {}".format(image_name))
image.save(image_name) image.save('./infer_results/' + image_class.encode('utf-8') + '/' +
image_name.encode('utf-8'))
def infer(args, data_args): def write_to_txt(image_path, f, nms_out):
num_classes = 2 image_name = image_path.split('/')[-1]
infer_reader = reader.infer(data_args, args.image_path) image_class = image_path.split('/')[-2]
data = infer_reader() f.write('{:s}\n'.format(
image_class.encode('utf-8') + '/' + image_name.encode('utf-8')))
f.write('{:d}\n'.format(nms_out.shape[0]))
for dt in nms_out:
xmin, ymin, xmax, ymax, score = dt
f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, (
xmax - xmin + 1), (ymax - ymin + 1), score))
print("image infer result saved {}".format(image_name[:-4]))
def get_round(x, loc):
str_x = str(x)
if '.' in str_x:
len_after = len(str_x.split('.')[1])
str_before = str_x.split('.')[0]
str_after = str_x.split('.')[1]
if len_after >= 3:
str_final = str_before + '.' + str_after[0:loc]
return float(str_final)
else:
return x
if args.resize_h and args.resize_w:
image_shape = [3, args.resize_h, args.resize_w]
else:
image_shape = data.shape[1:]
fetches = [] def bbox_vote(det):
order = det[:, 4].ravel().argsort()[::-1]
det = det[order, :]
if det.shape[0] == 0:
dets = np.array([[10, 10, 20, 20, 0.002]])
det = np.empty(shape=[0, 5])
while det.shape[0] > 0:
# IOU
area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
xx1 = np.maximum(det[0, 0], det[:, 0])
yy1 = np.maximum(det[0, 1], det[:, 1])
xx2 = np.minimum(det[0, 2], det[:, 2])
yy2 = np.minimum(det[0, 3], det[:, 3])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
o = inter / (area[0] + area[:] - inter)
# get needed merge det and delete these det
merge_index = np.where(o >= 0.3)[0]
det_accu = det[merge_index, :]
det = np.delete(det, merge_index, 0)
if merge_index.shape[0] <= 1:
if det.shape[0] == 0:
try:
dets = np.row_stack((dets, det_accu))
except:
dets = det_accu
continue
det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
max_score = np.max(det_accu[:, 4])
det_accu_sum = np.zeros((1, 5))
det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4],
axis=0) / np.sum(det_accu[:, -1:])
det_accu_sum[:, 4] = max_score
try:
dets = np.row_stack((dets, det_accu_sum))
except:
dets = det_accu_sum
dets = dets[0:750, :]
return dets
network = PyramidBox(
image_shape,
num_classes,
sub_network=args.use_pyramidbox,
is_infer=True)
infer_program, nmsed_out = network.infer()
fetches = [nmsed_out]
def detect_face(image, image_shape, raw_image, shrink):
num_classes = 2
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
model_dir = args.model_dir if shrink != 1:
if not os.path.exists(model_dir): image = image.resize((int(image_shape[2] * shrink),
raise ValueError("The model path [%s] does not exist." % (model_dir)) int(image_shape[1] * shrink)), Image.ANTIALIAS)
image_shape = [
image_shape[0], int(image_shape[1] * shrink),
int(image_shape[2] * shrink)
]
print "image_shape:", image_shape
img = np.array(image)
# HWC to CHW
if len(img.shape) == 3:
img = np.swapaxes(img, 1, 2)
img = np.swapaxes(img, 1, 0)
# RBG to BGR
img = img[[2, 1, 0], :, :]
img = img.astype('float32')
img -= np.array(
[104., 117., 123.])[:, np.newaxis, np.newaxis].astype('float32')
img = img * 0.007843
img = [img]
img = np.array(img)
scope = fluid.core.Scope()
model_program = fluid.Program()
startup_program = fluid.Program()
with fluid.scope_guard(scope):
with fluid.unique_name.guard():
with fluid.program_guard(model_program, startup_program):
fetches = []
network = PyramidBox(
image_shape,
num_classes,
sub_network=args.use_pyramidbox,
is_infer=True)
infer_program, nmsed_out = network.infer()
fetches = [nmsed_out]
feeder = fluid.DataFeeder(
place=place, feed_list=network.feeds())
fluid.io.load_persistables(
exe, args.model_dir, main_program=model_program)
#fluid.io.load_vars(exe, args.model_dir, predicate=if_exist)
detection, = exe.run(infer_program,
feed=feeder.feed([img]),
fetch_list=fetches,
return_numpy=False)
detection = np.array(detection)
# layout: xmin, ymin, xmax. ymax, score
det_conf = detection[:, 1]
if args.resize_h != 0 and args.resize_w != 0:
det_xmin = raw_image.size[0] * detection[:, 2]
det_ymin = raw_image.size[1] * detection[:, 3]
det_xmax = raw_image.size[0] * detection[:, 4]
det_ymax = raw_image.size[1] * detection[:, 5]
else:
det_xmin = image_shape[2] * detection[:, 2] / shrink
det_ymin = image_shape[1] * detection[:, 3] / shrink
det_xmax = image_shape[2] * detection[:, 4] / shrink
det_ymax = image_shape[1] * detection[:, 5] / shrink
det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf))
keep_index = np.where(det[:, 4] >= 0)[0]
det = det[keep_index, :]
return det
def flip_test(image, image_shape, raw_image, shrink):
image = image.transpose(Image.FLIP_LEFT_RIGHT)
det_f = detect_face(image, image_shape, raw_image, shrink)
det_t = np.zeros(det_f.shape)
det_t[:, 0] = raw_image.size[0] - det_f[:, 2]
det_t[:, 1] = det_f[:, 1]
det_t[:, 2] = raw_image.size[0] - det_f[:, 0]
det_t[:, 3] = det_f[:, 3]
det_t[:, 4] = det_f[:, 4]
return det_t
def multi_scale_test(image, image_shape, raw_image, max_im_shrink):
# shrink detecting and shrink only detect big face
st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink
det_s = detect_face(image, image_shape, raw_image, st)
index = np.where(
np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1)
> 30)[0]
det_s = det_s[index, :]
# enlarge one times
bt = min(2, max_im_shrink) if max_im_shrink > 1 else (
st + max_im_shrink) / 2
det_b = detect_face(image, image_shape, raw_image, bt)
# enlarge small iamge x times for small face
if max_im_shrink > 2:
bt *= 2
while bt < max_im_shrink:
det_b = np.row_stack(
(det_b, detect_face(image, image_shape, raw_image, bt)))
bt *= 2
det_b = np.row_stack(
(det_b, detect_face(image, image_shape, raw_image, max_im_shrink)))
# enlarge only detect small face
if bt > 1:
index = np.where(
np.minimum(det_b[:, 2] - det_b[:, 0] + 1,
det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
det_b = det_b[index, :]
else:
index = np.where(
np.maximum(det_b[:, 2] - det_b[:, 0] + 1,
det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
det_b = det_b[index, :]
return det_s, det_b
def get_im_shrink(image_shape):
max_im_shrink_v1 = (0x7fffffff / 577.0 /
(image_shape[1] * image_shape[2]))**0.5
max_im_shrink_v2 = (
(678 * 1024 * 2.0 * 2.0) / (image_shape[1] * image_shape[2]))**0.5
max_im_shrink = get_round(min(max_im_shrink_v1, max_im_shrink_v2), 2) - 0.3
if max_im_shrink >= 1.5 and max_im_shrink < 2:
max_im_shrink = max_im_shrink - 0.1
elif max_im_shrink >= 2 and max_im_shrink < 3:
max_im_shrink = max_im_shrink - 0.2
elif max_im_shrink >= 3 and max_im_shrink < 4:
max_im_shrink = max_im_shrink - 0.3
elif max_im_shrink >= 4 and max_im_shrink < 5:
max_im_shrink = max_im_shrink - 0.4
elif max_im_shrink >= 5 and max_im_shrink < 6:
max_im_shrink = max_im_shrink - 0.5
elif max_im_shrink >= 6:
max_im_shrink = max_im_shrink - 0.5
print 'max_im_shrink = ', max_im_shrink
shrink = max_im_shrink if max_im_shrink < 1 else 1
print "shrink = ", shrink
return shrink, max_im_shrink
def infer(args, batch_size, data_args):
if not os.path.exists(args.model_dir):
raise ValueError("The model path [%s] does not exist." %
(args.model_dir))
infer_reader = paddle.batch(
reader.test(data_args, file_list), batch_size=batch_size)
for batch_id, img in enumerate(infer_reader()):
image = img[0][0]
image_path = img[0][1]
raw_image = Image.open(image_path)
if args.resize_h != 0 and args.resize_w != 0:
image_shape = [3, args.resize_h, args.resize_w]
else:
image_shape = [3, image.size[1], image.size[0]]
shrink, max_im_shrink = get_im_shrink(image_shape)
det0 = detect_face(image, image_shape, raw_image, shrink)
det1 = flip_test(image, image_shape, raw_image, shrink)
[det2, det3] = multi_scale_test(image, image_shape, raw_image,
max_im_shrink)
det = np.row_stack((det0, det1, det2, det3))
dets = bbox_vote(det)
def if_exist(var): image_name = image_path.split('/')[-1]
return os.path.exists(os.path.join(model_dir, var.name)) image_class = image_path.split('/')[-2]
fluid.io.load_vars(exe, model_dir, predicate=if_exist) if not os.path.exists('./infer_results/' + image_class.encode('utf-8')):
os.makedirs('./infer_results/' + image_class.encode('utf-8'))
feed = {'image': fluid.create_lod_tensor(data, [], place)} f = open('./infer_results/' + image_class.encode('utf-8') + '/' +
predict, = exe.run(infer_program, image_name.encode('utf-8')[:-4] + '.txt', 'w')
feed=feed, write_to_txt(image_path, f, dets)
fetch_list=fetches, #draw_bounding_box_on_image(image_path, dets, args.confs_threshold)
return_numpy=False) print "Done"
predict = np.array(predict)
draw_bounding_box_on_image(args.image_path, predict, args.confs_threshold)
if __name__ == '__main__': if __name__ == '__main__':
...@@ -104,4 +322,4 @@ if __name__ == '__main__': ...@@ -104,4 +322,4 @@ if __name__ == '__main__':
apply_distort=False, apply_distort=False,
apply_expand=False, apply_expand=False,
ap_version='11point') ap_version='11point')
infer(args, data_args=data_args) infer(args, batch_size=1, data_args=data_args)
...@@ -39,7 +39,11 @@ def conv_block(input, groups, filters, ksizes, strides=None, with_pool=True): ...@@ -39,7 +39,11 @@ def conv_block(input, groups, filters, ksizes, strides=None, with_pool=True):
act='relu') act='relu')
if with_pool: if with_pool:
pool = fluid.layers.pool2d( pool = fluid.layers.pool2d(
input=conv, pool_size=2, pool_type='max', pool_stride=2) input=conv,
pool_size=2,
pool_type='max',
pool_stride=2,
ceil_mode=True)
return conv, pool return conv, pool
else: else:
return conv return conv
...@@ -148,6 +152,8 @@ class PyramidBox(object): ...@@ -148,6 +152,8 @@ class PyramidBox(object):
b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.)) b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.))
conv2 = fluid.layers.conv2d( conv2 = fluid.layers.conv2d(
up_to, ch, 1, act='relu', bias_attr=b_attr) up_to, ch, 1, act='relu', bias_attr=b_attr)
if self.is_infer:
upsampling = fluid.layers.crop(upsampling, shape=conv2)
# eltwise mul # eltwise mul
conv_fuse = upsampling * conv2 conv_fuse = upsampling * conv2
return conv_fuse return conv_fuse
...@@ -393,8 +399,11 @@ class PyramidBox(object): ...@@ -393,8 +399,11 @@ class PyramidBox(object):
total_loss = face_loss + head_loss total_loss = face_loss + head_loss
return face_loss, head_loss, total_loss return face_loss, head_loss, total_loss
def infer(self): def infer(self, main_program=None):
test_program = fluid.default_main_program().clone(for_test=True) if main_program is None:
test_program = fluid.default_main_program().clone(for_test=True)
else:
test_program = main_program.clone(for_test=True)
with fluid.program_guard(test_program): with fluid.program_guard(test_program):
face_nmsed_out = fluid.layers.detection_output( face_nmsed_out = fluid.layers.detection_output(
self.face_mbox_loc, self.face_mbox_loc,
......
...@@ -238,34 +238,41 @@ def pyramidbox(settings, file_list, mode, shuffle): ...@@ -238,34 +238,41 @@ def pyramidbox(settings, file_list, mode, shuffle):
im_width, im_height = im.size im_width, im_height = im.size
# layout: label | xmin | ymin | xmax | ymax # layout: label | xmin | ymin | xmax | ymax
bbox_labels = [] if mode == 'train':
for index_box in range(len(dict_input_txt[index_image])): bbox_labels = []
if index_box >= 2: for index_box in range(len(dict_input_txt[index_image])):
bbox_sample = [] if index_box >= 2:
temp_info_box = dict_input_txt[index_image][ bbox_sample = []
index_box].split(' ') temp_info_box = dict_input_txt[index_image][
xmin = float(temp_info_box[0]) index_box].split(' ')
ymin = float(temp_info_box[1]) xmin = float(temp_info_box[0])
w = float(temp_info_box[2]) ymin = float(temp_info_box[1])
h = float(temp_info_box[3]) w = float(temp_info_box[2])
xmax = xmin + w h = float(temp_info_box[3])
ymax = ymin + h xmax = xmin + w
ymax = ymin + h
bbox_sample.append(1)
bbox_sample.append(float(xmin) / im_width) bbox_sample.append(1)
bbox_sample.append(float(ymin) / im_height) bbox_sample.append(float(xmin) / im_width)
bbox_sample.append(float(xmax) / im_width) bbox_sample.append(float(ymin) / im_height)
bbox_sample.append(float(ymax) / im_height) bbox_sample.append(float(xmax) / im_width)
bbox_labels.append(bbox_sample) bbox_sample.append(float(ymax) / im_height)
bbox_labels.append(bbox_sample)
im, sample_labels = preprocess(im, bbox_labels, mode, settings)
sample_labels = np.array(sample_labels) im, sample_labels = preprocess(im, bbox_labels, mode, settings)
if len(sample_labels) == 0: continue sample_labels = np.array(sample_labels)
im = im.astype('float32') if len(sample_labels) == 0: continue
boxes = sample_labels[:, 1:5] im = im.astype('float32')
lbls = [1] * len(boxes) boxes = sample_labels[:, 1:5]
difficults = [1] * len(boxes) lbls = [1] * len(boxes)
yield im, boxes, expand_bboxes(boxes), lbls, difficults difficults = [1] * len(boxes)
yield im, boxes, expand_bboxes(boxes), lbls, difficults
if mode == 'test':
if settings.resize_w and settings.resize_h:
im = im.resize((settings.resize_w, settings.resize_h),
Image.ANTIALIAS)
yield im, image_path
return reader return reader
...@@ -274,6 +281,10 @@ def train(settings, file_list, shuffle=True): ...@@ -274,6 +281,10 @@ def train(settings, file_list, shuffle=True):
return pyramidbox(settings, file_list, 'train', shuffle) return pyramidbox(settings, file_list, 'train', shuffle)
def test(settings, file_list):
return pyramidbox(settings, file_list, 'test', False)
def infer(settings, image_path): def infer(settings, image_path):
def batch_reader(): def batch_reader():
img = Image.open(image_path) img = Image.open(image_path)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册