未验证 提交 09452a90 编写于 作者: Q qingqing01 提交者: GitHub

Data anchor sampling. (#998)

* Add data anchor sampling in PyramidBox paper.
上级 e15da197
......@@ -100,6 +100,76 @@ def generate_sample(sampler, image_width, image_height):
return sampled_bbox
def data_anchor_sampling(sampler, bbox_labels, image_width, image_height,
scale_array, resize_width, resize_height):
num_gt = len(bbox_labels)
# np.random.randint range: [low, high)
rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0
if num_gt != 0:
norm_xmin = bbox_labels[rand_idx][0]
norm_ymin = bbox_labels[rand_idx][1]
norm_xmax = bbox_labels[rand_idx][2]
norm_ymax = bbox_labels[rand_idx][3]
xmin = norm_xmin * image_width
ymin = norm_ymin * image_height
wid = image_width * (norm_xmax - norm_xmin)
hei = image_height * (norm_ymax - norm_ymin)
range_size = 0
for scale_ind in range(0, len(scale_array) - 1):
area = wid * hei
if area > scale_array[scale_ind] ** 2 and area < \
scale_array[scale_ind + 1] ** 2:
range_size = scale_ind + 1
scale_choose = 0.0
if range_size == 0:
rand_idx_size = range_size + 1
# np.random.randint range: [low, high)
rng_rand_size = np.random.randint(0, range_size)
rand_idx_size = rng_rand_size % range_size
scale_choose = random.uniform(scale_array[rand_idx_size] / 2.0,
2.0 * scale_array[rand_idx_size])
sample_bbox_size = wid * resize_width / scale_choose
w_off_orig = 0.0
h_off_orig = 0.0
if sample_bbox_size < max(image_height, image_width):
if wid <= sample_bbox_size:
w_off_orig = random.uniform(xmin + wid - sample_bbox_size, xmin)
w_off_orig = random.uniform(xmin, xmin + wid - sample_bbox_size)
if hei <= sample_bbox_size:
h_off_orig = random.uniform(ymin + hei - sample_bbox_size, ymin)
h_off_orig = random.uniform(ymin, ymin + hei - sample_bbox_size)
w_off_orig = random.uniform(image_width - sample_bbox_size, 0.0)
h_off_orig = random.uniform(image_height - sample_bbox_size, 0.0)
w_off_orig = math.floor(w_off_orig)
h_off_orig = math.floor(h_off_orig)
# Figure out top left coordinates.
w_off = 0.0
h_off = 0.0
w_off = float(w_off_orig / image_width)
h_off = float(h_off_orig / image_height)
sampled_bbox = bbox(w_off, h_off,
w_off + float(sample_bbox_size / image_width),
h_off + float(sample_bbox_size / image_height))
return sampled_bbox
def jaccard_overlap(sample_bbox, object_bbox):
if sample_bbox.xmin >= object_bbox.xmax or \
sample_bbox.xmax <= object_bbox.xmin or \
......@@ -161,8 +231,6 @@ def satisfy_sample_constraint(sampler, sample_bbox, bbox_labels):
def generate_batch_samples(batch_sampler, bbox_labels, image_width,
sampled_bbox = []
index = []
c = 0
for sampler in batch_sampler:
found = 0
for i in range(sampler.max_trial):
......@@ -172,8 +240,24 @@ def generate_batch_samples(batch_sampler, bbox_labels, image_width,
if satisfy_sample_constraint(sampler, sample_bbox, bbox_labels):
found = found + 1
c = c + 1
return sampled_bbox
def generate_batch_random_samples(batch_sampler, bbox_labels, image_width,
image_height, scale_array, resize_width,
sampled_bbox = []
for sampler in batch_sampler:
found = 0
for i in range(sampler.max_trial):
if found >= sampler.max_sample:
sample_bbox = data_anchor_sampling(
sampler, bbox_labels, image_width, image_height, scale_array,
resize_width, resize_height)
if satisfy_sample_constraint(sampler, sample_bbox, bbox_labels):
found = found + 1
return sampled_bbox
......@@ -243,42 +327,79 @@ def crop_image(img, bbox_labels, sample_bbox, image_width, image_height):
xmax = int(sample_bbox.xmax * image_width)
ymin = int(sample_bbox.ymin * image_height)
ymax = int(sample_bbox.ymax * image_height)
sample_img = img[ymin:ymax, xmin:xmax]
sample_labels = transform_labels(bbox_labels, sample_bbox)
return sample_img, sample_labels
def crop_image_sampling(img, bbox_labels, sample_bbox, image_width,
image_height, resize_width, resize_height):
# no clipping here
xmin = int(sample_bbox.xmin * image_width)
xmax = int(sample_bbox.xmax * image_width)
ymin = int(sample_bbox.ymin * image_height)
ymax = int(sample_bbox.ymax * image_height)
w_off = xmin
h_off = ymin
width = xmax - xmin
height = ymax - ymin
cross_xmin = max(0.0, float(w_off))
cross_ymin = max(0.0, float(h_off))
cross_xmax = min(float(w_off + width - 1.0), float(image_width))
cross_ymax = min(float(h_off + height - 1.0), float(image_height))
cross_width = cross_xmax - cross_xmin
cross_height = cross_ymax - cross_ymin
roi_xmin = 0 if w_off >= 0 else abs(w_off)
roi_ymin = 0 if h_off >= 0 else abs(h_off)
roi_width = cross_width
roi_height = cross_height
sample_img = np.zeros((width, height, 3))
sample_img[roi_xmin : roi_xmin + roi_width, roi_ymin : roi_ymin + roi_height] = \
img[cross_xmin : cross_xmin + cross_width, cross_ymin : cross_ymin + cross_height]
sample_img = cv2.resize(
sample_img, (resize_width, resize_height), interpolation=cv2.INTER_AREA)
sample_labels = transform_labels(bbox_labels, sample_bbox)
return sample_img, sample_labels
def random_brightness(img, settings):
prob = random.uniform(0, 1)
if prob < settings._brightness_prob:
delta = random.uniform(-settings._brightness_delta,
settings._brightness_delta) + 1
if prob < settings.brightness_prob:
delta = random.uniform(-settings.brightness_delta,
settings.brightness_delta) + 1
img = ImageEnhance.Brightness(img).enhance(delta)
return img
def random_contrast(img, settings):
prob = random.uniform(0, 1)
if prob < settings._contrast_prob:
delta = random.uniform(-settings._contrast_delta,
settings._contrast_delta) + 1
if prob < settings.contrast_prob:
delta = random.uniform(-settings.contrast_delta,
settings.contrast_delta) + 1
img = ImageEnhance.Contrast(img).enhance(delta)
return img
def random_saturation(img, settings):
prob = random.uniform(0, 1)
if prob < settings._saturation_prob:
delta = random.uniform(-settings._saturation_delta,
settings._saturation_delta) + 1
if prob < settings.saturation_prob:
delta = random.uniform(-settings.saturation_delta,
settings.saturation_delta) + 1
img = ImageEnhance.Color(img).enhance(delta)
return img
def random_hue(img, settings):
prob = random.uniform(0, 1)
if prob < settings._hue_prob:
delta = random.uniform(-settings._hue_delta, settings._hue_delta)
if prob < settings.hue_prob:
delta = random.uniform(-settings.hue_delta, settings.hue_delta)
img_hsv = np.array(img.convert('HSV'))
img_hsv[:, :, 0] = img_hsv[:, :, 0] + delta
img = Image.fromarray(img_hsv, mode='HSV').convert('RGB')
......@@ -303,9 +424,9 @@ def distort_image(img, settings):
def expand_image(img, bbox_labels, img_width, img_height, settings):
prob = random.uniform(0, 1)
if prob < settings._expand_prob:
if settings._expand_max_ratio - 1 >= 0.01:
expand_ratio = random.uniform(1, settings._expand_max_ratio)
if prob < settings.expand_prob:
if settings.expand_max_ratio - 1 >= 0.01:
expand_ratio = random.uniform(1, settings.expand_max_ratio)
height = int(img_height * expand_ratio)
width = int(img_width * expand_ratio)
h_off = math.floor(random.uniform(0, height - img_height))
......@@ -314,7 +435,7 @@ def expand_image(img, bbox_labels, img_width, img_height, settings):
(width - w_off) / img_width,
(height - h_off) / img_height)
expand_img = np.ones((height, width, 3))
expand_img = np.uint8(expand_img * np.squeeze(settings._img_mean))
expand_img = np.uint8(expand_img * np.squeeze(settings.img_mean))
expand_img = Image.fromarray(expand_img)
expand_img.paste(img, (int(w_off), int(h_off)))
bbox_labels = transform_labels(bbox_labels, expand_bbox)
......@@ -22,6 +22,7 @@ import xml.etree.ElementTree
import os
import time
import copy
import random
class Settings(object):
......@@ -36,112 +37,93 @@ class Settings(object):
self._dataset = dataset
self._ap_version = ap_version
self._toy = toy
self._data_dir = data_dir
self._apply_distort = apply_distort
self._apply_expand = apply_expand
self._resize_height = resize_h
self._resize_width = resize_w
self._img_mean = np.array(mean_value)[:, np.newaxis, np.newaxis].astype(
self.dataset = dataset
self.ap_version = ap_version
self.toy = toy
self.data_dir = data_dir
self.apply_distort = apply_distort
self.apply_expand = apply_expand
self.resize_height = resize_h
self.resize_width = resize_w
self.img_mean = np.array(mean_value)[:, np.newaxis, np.newaxis].astype(
self._expand_prob = 0.5
self._expand_max_ratio = 4
self._hue_prob = 0.5
self._hue_delta = 18
self._contrast_prob = 0.5
self._contrast_delta = 0.5
self._saturation_prob = 0.5
self._saturation_delta = 0.5
self._brightness_prob = 0.5
self.expand_prob = 0.5
self.expand_max_ratio = 4
self.hue_prob = 0.5
self.hue_delta = 18
self.contrast_prob = 0.5
self.contrast_delta = 0.5
self.saturation_prob = 0.5
self.saturation_delta = 0.5
self.brightness_prob = 0.5
# _brightness_delta is the normalized value by 256
# self._brightness_delta = 32
self._brightness_delta = 0.125
def dataset(self):
return self._dataset
def ap_version(self):
return self._ap_version
def toy(self):
return self._toy
def apply_expand(self):
return self._apply_expand
def apply_distort(self):
return self._apply_distort
def data_dir(self):
return self._data_dir
def data_dir(self, data_dir):
self._data_dir = data_dir
def label_list(self):
return self._label_list
def resize_h(self):
return self._resize_height
def resize_w(self):
return self._resize_width
def img_mean(self):
return self._img_mean
self.brightness_delta = 0.125
self.scale = 0.007843 # 1 / 127.5
self.data_anchor_sampling_prob = 0.5
def preprocess(img, bbox_labels, mode, settings):
img_width, img_height = img.size
sampled_labels = bbox_labels
if mode == 'train':
if settings._apply_distort:
if settings.apply_distort:
img = image_util.distort_image(img, settings)
if settings._apply_expand:
if settings.apply_expand:
img, bbox_labels, img_width, img_height = image_util.expand_image(
img, bbox_labels, img_width, img_height, settings)
# sampling
batch_sampler = []
# hard-code here
image_util.sampler(1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0,
image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0,
image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0,
image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0,
image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0,
sampled_bbox = image_util.generate_batch_samples(
batch_sampler, bbox_labels, img_width, img_height)
img = np.array(img)
if len(sampled_bbox) > 0:
idx = int(random.uniform(0, len(sampled_bbox)))
img, sampled_labels = image_util.crop_image(
img, bbox_labels, sampled_bbox[idx], img_width, img_height)
img = Image.fromarray(img)
img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS)
prob = random.uniform(0., 1.)
if prob > settings.data_anchor_sampling_prob:
scale_array = np.array([16, 32, 64, 128, 256, 512])
image_util.sampler(1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2,
0.0, True))
sampled_bbox = image_util.generate_batch_random_samples(
batch_sampler, bbox_labels, img_width, img_height, scale_array,
settings.resize_width, settings.resize_height)
img = np.array(img)
if len(sampled_bbox) > 0:
idx = int(random.uniform(0, len(sampled_bbox)))
img, sampled_labels = image_util.crop_image_sampling(
img, bbox_labels, sampled_bbox[idx], img_width, img_height,
resize_width, resize_heigh)
img = Image.fromarray(img)
# hard-code here
image_util.sampler(1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0,
0.0, True))
image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0,
0.0, True))
image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0,
0.0, True))
image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0,
0.0, True))
image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0,
0.0, True))
sampled_bbox = image_util.generate_batch_samples(
batch_sampler, bbox_labels, img_width, img_height)
img = np.array(img)
if len(sampled_bbox) > 0:
idx = int(random.uniform(0, len(sampled_bbox)))
img, sampled_labels = image_util.crop_image(
img, bbox_labels, sampled_bbox[idx], img_width, img_height)
img = Image.fromarray(img)
img = img.resize((settings.resize_width, settings.resize_height),
img = np.array(img)
if mode == 'train':
......@@ -160,7 +142,7 @@ def preprocess(img, bbox_labels, mode, settings):
img = img[[2, 1, 0], :, :]
img = img.astype('float32')
img -= settings.img_mean
img = img * 0.007843
img = img * settings.scale
return img, sampled_labels
......@@ -180,7 +162,6 @@ def put_txt_in_dict(input_txt):
if '--' not in tmp_line_txt:
if len(tmp_line_txt) > 6:
# tmp_line_txt = tmp_line_txt[:-2]
split_str = tmp_line_txt.split(' ')
x1_min = float(split_str[0])
y1_min = float(split_str[1])
......@@ -288,8 +269,8 @@ def infer(settings, image_path):
if img.mode == 'L':
img = im.convert('RGB')
im_width, im_height = img.size
if settings.resize_w and settings.resize_h:
img = img.resize((settings.resize_w, settings.resize_h),
if settings.resize_width and settings.resize_height:
img = img.resize((settings.resize_width, settings.resize_height),
img = np.array(img)
# HWC to CHW
......@@ -300,9 +281,7 @@ def infer(settings, image_path):
img = img[[2, 1, 0], :, :]
img = img.astype('float32')
img -= settings.img_mean
img = img * 0.007843
img = [img]
img = np.array(img)
return img
img = img * settings.scale
return np.array([img])
return batch_reader
......@@ -15,34 +15,41 @@ parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('parallel', bool, True, "parallel")
add_arg('learning_rate', float, 0.001, "Learning rate.")
add_arg('batch_size', int, 12, "Minibatch size.")
add_arg('num_passes', int, 120, "Epoch number.")
add_arg('use_gpu', bool, True, "Whether use GPU.")
add_arg('use_pyramidbox', bool, True, "Whether use PyramidBox model.")
add_arg('dataset', str, 'WIDERFACE', "coco2014, coco2017, and pascalvoc.")
add_arg('model_save_dir', str, 'model', "The path to save model.")
add_arg('pretrained_model', str, './pretrained/', "The init model path.")
add_arg('resize_h', int, 640, "The resized image height.")
add_arg('resize_w', int, 640, "The resized image height.")
add_arg('parallel', bool, True, "parallel")
add_arg('learning_rate', float, 0.001, "Learning rate.")
add_arg('batch_size', int, 12, "Minibatch size.")
add_arg('num_passes', int, 120, "Epoch number.")
add_arg('use_gpu', bool, True, "Whether use GPU.")
add_arg('use_pyramidbox', bool, True, "Whether use PyramidBox model.")
add_arg('model_save_dir', str, 'output', "The path to save model.")
add_arg('pretrained_model', str, './pretrained/', "The init model path.")
add_arg('resize_h', int, 640, "The resized image height.")
add_arg('resize_w', int, 640, "The resized image height.")
#yapf: enable
def train(args, data_args, learning_rate, batch_size, pretrained_model,
num_passes, optimizer_method):
def train(args, config, train_file_list, optimizer_method):
learning_rate = args.learning_rate
batch_size = args.batch_size
num_passes = args.num_passes
height = args.resize_h
width = args.resize_w
use_gpu = args.use_gpu
use_pyramidbox = args.use_pyramidbox
model_save_dir = args.model_save_dir
pretrained_model = args.pretrained_model
num_classes = 2
image_shape = [3, height, width]
devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(","))
image_shape = [3, data_args.resize_h, data_args.resize_w]
fetches = []
network = PyramidBox(image_shape, num_classes,
if args.use_pyramidbox:
if use_pyramidbox:
face_loss, head_loss, loss = network.train()
fetches = [face_loss, head_loss]
......@@ -70,9 +77,9 @@ def train(args, data_args, learning_rate, batch_size, pretrained_model,
# fluid.memory_optimize(fluid.default_main_program())
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
......@@ -80,7 +87,7 @@ def train(args, data_args, learning_rate, batch_size, pretrained_model,
if pretrained_model:
if pretrained_model.isdigit():
start_pass = int(pretrained_model) + 1
pretrained_model = os.path.join(args.model_save_dir, pretrained_model)
pretrained_model = os.path.join(model_save_dir, pretrained_model)
print("Resume from %s " %(pretrained_model))
if not os.path.exists(pretrained_model):
......@@ -92,10 +99,10 @@ def train(args, data_args, learning_rate, batch_size, pretrained_model,
if args.parallel:
train_exe = fluid.ParallelExecutor(
use_cuda=args.use_gpu, loss_name=loss.name)
use_cuda=use_gpu, loss_name=loss.name)
train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size)
reader.train(config, train_file_list), batch_size=batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=network.feeds())
def save_model(postfix):
......@@ -143,22 +150,12 @@ if __name__ == '__main__':
data_dir = 'data/WIDERFACE/WIDER_train/images/'
train_file_list = 'label/train_gt_widerface.res'
val_file_list = 'label/val_gt_widerface.res'
model_save_dir = args.model_save_dir
data_args = reader.Settings(
config = reader.Settings(
mean_value=[104., 117., 123],
train(args, config, train_file_list, optimizer_method="momentum")
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册