提交 258ae207 编写于 作者: B Bai Yifan 提交者: qingqing01

Fix object detection reader (#1219)

* Accelerate reader in object_detection.
上级 82414c41
"""
This code is based on https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py
"""
import time
import numpy as np
import threading
import multiprocessing
try:
import queue
except ImportError:
import Queue as queue
class GeneratorEnqueuer(object):
"""
Builds a queue out of a data generator.
Args:
generator: a generator function which endlessly yields data
use_multiprocessing (bool): use multiprocessing if True,
otherwise use threading.
wait_time (float): time to sleep in-between calls to `put()`.
random_seed (int): Initial seed for workers,
will be incremented by one for each workers.
"""
def __init__(self,
generator,
use_multiprocessing=False,
wait_time=0.05,
random_seed=None):
self.wait_time = wait_time
self._generator = generator
self._use_multiprocessing = use_multiprocessing
self._threads = []
self._stop_event = None
self.queue = None
self._manager = None
self.seed = random_seed
def start(self, workers=1, max_queue_size=10):
"""
Start worker threads which add data from the generator into the queue.
Args:
workers (int): number of worker threads
max_queue_size (int): queue size
(when full, threads could block on `put()`)
"""
def data_generator_task():
"""
Data generator task.
"""
def task():
if (self.queue is not None and
self.queue.qsize() < max_queue_size):
generator_output = next(self._generator)
self.queue.put((generator_output))
else:
time.sleep(self.wait_time)
if not self._use_multiprocessing:
while not self._stop_event.is_set():
with self.genlock:
try:
task()
except Exception:
self._stop_event.set()
break
else:
while not self._stop_event.is_set():
try:
task()
except Exception:
self._stop_event.set()
break
try:
if self._use_multiprocessing:
self._manager = multiprocessing.Manager()
self.queue = self._manager.Queue(maxsize=max_queue_size)
self._stop_event = multiprocessing.Event()
else:
self.genlock = threading.Lock()
self.queue = queue.Queue()
self._stop_event = threading.Event()
for _ in range(workers):
if self._use_multiprocessing:
# Reset random seed else all children processes
# share the same seed
np.random.seed(self.seed)
thread = multiprocessing.Process(target=data_generator_task)
thread.daemon = True
if self.seed is not None:
self.seed += 1
else:
thread = threading.Thread(target=data_generator_task)
self._threads.append(thread)
thread.start()
except:
self.stop()
raise
def is_running(self):
"""
Returns:
bool: Whether the worker theads are running.
"""
return self._stop_event is not None and not self._stop_event.is_set()
def stop(self, timeout=None):
"""
Stops running threads and wait for them to exit, if necessary.
Should be called by the same thread which called `start()`.
Args:
timeout(int|None): maximum time to wait on `thread.join()`.
"""
if self.is_running():
self._stop_event.set()
for thread in self._threads:
if self._use_multiprocessing:
if thread.is_alive():
thread.terminate()
else:
thread.join(timeout)
if self._manager:
self._manager.shutdown()
self._threads = []
self._stop_event = None
self.queue = None
def get(self):
"""
Creates a generator to extract data from the queue.
Skip the data if it is `None`.
# Yields
tuple of data in the queue.
"""
while self.is_running():
if not self.queue.empty():
inputs = self.queue.get()
if inputs is not None:
yield inputs
else:
time.sleep(self.wait_time)
......@@ -3,6 +3,7 @@ import time
import numpy as np
import argparse
import functools
import math
import paddle
import paddle.fluid as fluid
......@@ -29,65 +30,75 @@ add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will
# yapf: enable
def eval(args, data_args, test_list, batch_size, model_dir=None):
def build_program(main_prog, startup_prog, args, data_args):
image_shape = [3, data_args.resize_h, data_args.resize_w]
if 'coco' in data_args.dataset:
num_classes = 91
elif 'pascalvoc' in data_args.dataset:
num_classes = 21
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
gt_box = fluid.layers.data(
name='gt_box', shape=[4], dtype='float32', lod_level=1)
gt_label = fluid.layers.data(
name='gt_label', shape=[1], dtype='int32', lod_level=1)
difficult = fluid.layers.data(
name='gt_difficult', shape=[1], dtype='int32', lod_level=1)
with fluid.program_guard(main_prog, startup_prog):
py_reader = fluid.layers.py_reader(
capacity=64,
shapes=[[-1] + image_shape, [-1, 4], [-1, 1], [-1, 1]],
lod_levels=[0, 1, 1, 1],
dtypes=["float32", "float32", "int32", "int32"],
use_double_buffer=True)
with fluid.unique_name.guard():
image, gt_box, gt_label, difficult = fluid.layers.read_file(
py_reader)
locs, confs, box, box_var = mobile_net(num_classes, image,
image_shape)
nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=args.nms_threshold)
with fluid.program_guard(main_prog):
map = fluid.evaluator.DetectionMAP(
nmsed_out,
gt_label,
gt_box,
difficult,
num_classes,
overlap_threshold=0.5,
evaluate_difficult=False,
ap_version=args.ap_version)
return py_reader, map
locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=args.nms_threshold)
loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, box_var)
loss = fluid.layers.reduce_sum(loss)
def eval(args, data_args, test_list, batch_size, model_dir=None):
startup_prog = fluid.Program()
test_prog = fluid.Program()
test_py_reader, map_eval = build_program(
main_prog=test_prog,
startup_prog=startup_prog,
args=args,
data_args=data_args)
test_prog = test_prog.clone(for_test=True)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
# yapf: disable
if model_dir:
def if_exist(var):
return os.path.exists(os.path.join(model_dir, var.name))
fluid.io.load_vars(exe, model_dir, predicate=if_exist)
fluid.io.load_vars(exe, model_dir, main_program=test_prog, predicate=if_exist)
# yapf: enable
test_reader = paddle.batch(
reader.test(data_args, test_list), batch_size=batch_size)
feeder = fluid.DataFeeder(
place=place, feed_list=[image, gt_box, gt_label, difficult])
test_reader = reader.test(data_args, test_list, batch_size=batch_size)
test_py_reader.decorate_paddle_reader(test_reader)
def test():
# switch network to test mode (i.e. batch norm test mode)
test_program = fluid.default_main_program().clone(for_test=True)
with fluid.program_guard(test_program):
map_eval = fluid.evaluator.DetectionMAP(
nmsed_out,
gt_label,
gt_box,
difficult,
num_classes,
overlap_threshold=0.5,
evaluate_difficult=False,
ap_version=args.ap_version)
_, accum_map = map_eval.get_map_var()
map_eval.reset(exe)
for batch_id, data in enumerate(test_reader()):
test_map, = exe.run(test_program,
feed=feeder.feed(data),
fetch_list=[accum_map])
if batch_id % 20 == 0:
_, accum_map = map_eval.get_map_var()
map_eval.reset(exe)
test_py_reader.start()
try:
batch_id = 0
while True:
test_map, = exe.run(test_prog, fetch_list=[accum_map])
if batch_id % 10 == 0:
print("Batch {0}, map {1}".format(batch_id, test_map))
print("Test model {0}, map {1}".format(model_dir, test_map))
test()
batch_id += 1
except fluid.core.EOFException:
test_py_reader.reset()
print("Test model {0}, map {1}".format(model_dir, test_map))
if __name__ == '__main__':
......@@ -117,8 +128,7 @@ if __name__ == '__main__':
mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R],
apply_distort=False,
apply_expand=False,
ap_version=args.ap_version,
toy=0)
ap_version=args.ap_version)
eval(
args,
data_args=data_args,
......
......@@ -127,8 +127,7 @@ if __name__ == '__main__':
mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R],
apply_distort=False,
apply_expand=False,
ap_version='',
toy=0)
ap_version='')
infer(
args,
data_args=data_args,
......
......@@ -22,6 +22,7 @@ import os
import time
import copy
import six
from data_util import GeneratorEnqueuer
class Settings(object):
......@@ -34,11 +35,9 @@ class Settings(object):
mean_value=[127.5, 127.5, 127.5],
apply_distort=True,
apply_expand=True,
ap_version='11point',
toy=0):
ap_version='11point'):
self._dataset = dataset
self._ap_version = ap_version
self._toy = toy
self._data_dir = data_dir
if 'pascalvoc' in dataset:
self._label_list = []
......@@ -71,10 +70,6 @@ class Settings(object):
def ap_version(self):
return self._ap_version
@property
def toy(self):
return self._toy
@property
def apply_distort(self):
return self._apply_expand
......@@ -167,7 +162,7 @@ def preprocess(img, bbox_labels, mode, settings):
return img, sampled_labels
def coco(settings, file_list, mode, shuffle):
def coco(settings, file_list, mode, batch_size, shuffle):
# cocoapi
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
......@@ -175,16 +170,12 @@ def coco(settings, file_list, mode, shuffle):
coco = COCO(file_list)
image_ids = coco.getImgIds()
images = coco.loadImgs(image_ids)
category_ids = coco.getCatIds()
category_names = [item['name'] for item in coco.loadCats(category_ids)]
if not settings.toy == 0:
images = images[:settings.toy] if len(images) > settings.toy else images
print("{} on {} with {} images".format(mode, settings.dataset, len(images)))
def reader():
if mode == 'train' and shuffle:
np.random.shuffle(images)
batch_out = []
for image in images:
image_name = image['file_name']
image_path = os.path.join(settings.data_dir, image_name)
......@@ -203,7 +194,6 @@ def coco(settings, file_list, mode, shuffle):
bbox_sample = []
# start from 1, leave 0 to background
bbox_sample.append(float(ann['category_id']))
#float(category_ids.index(ann['category_id'])) + 1)
bbox = ann['bbox']
xmin, ymin, w, h = bbox
xmax = xmin + w
......@@ -222,24 +212,32 @@ def coco(settings, file_list, mode, shuffle):
lbls = sample_labels[:, 0].astype('int32')
iscrowd = sample_labels[:, -1].astype('int32')
if 'cocoMAP' in settings.ap_version:
yield im, boxes, lbls, iscrowd, \
[im_id, im_width, im_height]
batch_out.append((im, boxes, lbls, iscrowd,
[im_id, im_width, im_height]))
else:
yield im, boxes, lbls, iscrowd
batch_out.append((im, boxes, lbls, iscrowd))
if len(batch_out) == batch_size:
yield batch_out
batch_out = []
if mode == 'test' and len(batch_out) > 1:
yield batch_out
batch_out = []
return reader
def pascalvoc(settings, file_list, mode, shuffle):
def pascalvoc(settings, file_list, mode, batch_size, shuffle):
flist = open(file_list)
images = [line.strip() for line in flist]
if not settings.toy == 0:
images = images[:settings.toy] if len(images) > settings.toy else images
print("{} on {} with {} images".format(mode, settings.dataset, len(images)))
def reader():
if mode == 'train' and shuffle:
np.random.shuffle(images)
batch_out = []
cnt = 0
for image in images:
image_path, label_path = image.split()
image_path = os.path.join(settings.data_dir, image_path)
......@@ -273,37 +271,69 @@ def pascalvoc(settings, file_list, mode, shuffle):
boxes = sample_labels[:, 1:5]
lbls = sample_labels[:, 0].astype('int32')
difficults = sample_labels[:, -1].astype('int32')
yield im, boxes, lbls, difficults
batch_out.append((im, boxes, lbls, difficults))
if len(batch_out) == batch_size:
yield batch_out
cnt += len(batch_out)
batch_out = []
if mode == 'test' and len(batch_out) > 1:
yield batch_out
cnt += len(batch_out)
batch_out = []
return reader
def train(settings, file_list, shuffle=True):
def train(settings,
file_list,
batch_size,
shuffle=True,
use_multiprocessing=True,
num_workers=8,
max_queue=24):
file_list = os.path.join(settings.data_dir, file_list)
def infinite_reader(gen):
while True:
for data in gen():
yield data
if 'coco' in settings.dataset:
train_settings = copy.copy(settings)
if '2014' in file_list:
sub_dir = "train2014"
elif '2017' in file_list:
sub_dir = "train2017"
train_settings.data_dir = os.path.join(settings.data_dir, sub_dir)
return coco(train_settings, file_list, 'train', shuffle)
generator = coco(settings, file_list, "train", batch_size, shuffle)
else:
return pascalvoc(settings, file_list, 'train', shuffle)
generator = pascalvoc(settings, file_list, "train", batch_size, shuffle)
def reader():
try:
enqueuer = GeneratorEnqueuer(
infinite_reader(generator),
use_multiprocessing=use_multiprocessing)
enqueuer.start(max_queue_size=max_queue, workers=num_workers)
generator_output = None
while True:
while enqueuer.is_running():
if not enqueuer.queue.empty():
generator_output = enqueuer.queue.get()
break
else:
time.sleep(0.02)
yield generator_output
generator_output = None
finally:
if enqueuer is not None:
enqueuer.stop()
return reader
def test(settings, file_list):
def test(settings, file_list, batch_size):
file_list = os.path.join(settings.data_dir, file_list)
if 'coco' in settings.dataset:
test_settings = copy.copy(settings)
if '2014' in file_list:
sub_dir = "val2014"
elif '2017' in file_list:
sub_dir = "val2017"
test_settings.data_dir = os.path.join(settings.data_dir, sub_dir)
return coco(test_settings, file_list, 'test', False)
return coco(settings, file_list, 'test', batch_size, False)
else:
return pascalvoc(settings, file_list, 'test', False)
return pascalvoc(settings, file_list, 'test', batch_size, False)
def infer(settings, image_path):
......
......@@ -4,6 +4,7 @@ import numpy as np
import argparse
import functools
import shutil
import math
import paddle
import paddle.fluid as fluid
......@@ -16,233 +17,290 @@ add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('learning_rate', float, 0.001, "Learning rate.")
add_arg('batch_size', int, 64, "Minibatch size.")
add_arg('num_passes', int, 120, "Epoch number.")
add_arg('epoc_num', int, 120, "Epoch number.")
add_arg('use_gpu', bool, True, "Whether use GPU.")
add_arg('parallel', bool, True, "Parallel.")
add_arg('dataset', str, 'pascalvoc', "coco2014, coco2017, and pascalvoc.")
add_arg('model_save_dir', str, 'model', "The path to save model.")
add_arg('pretrained_model', str, 'pretrained/ssd_mobilenet_v1_coco/', "The init model path.")
add_arg('apply_distort', bool, True, "Whether apply distort.")
add_arg('apply_expand', bool, True, "Whether apply expand.")
add_arg('nms_threshold', float, 0.45, "NMS threshold.")
add_arg('ap_version', str, '11point', "integral, 11point.")
add_arg('resize_h', int, 300, "The resized image height.")
add_arg('resize_w', int, 300, "The resized image height.")
add_arg('mean_value_B', float, 127.5, "Mean value for B channel which will be subtracted.") #123.68
add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will be subtracted.") #116.78
add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94
add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample.")
add_arg('ap_version', str, '11point', "Integral, 11point.")
add_arg('image_shape', str, '3,300,300', "Input image shape.")
add_arg('mean_BGR', str, '127.5,127.5,127.5', "Mean value for B,G,R channel which will be subtracted.")
add_arg('data_dir', str, 'data/pascalvoc', "data directory")
add_arg('enable_ce', bool, False, "Whether use CE to evaluate the model")
#yapf: enable
train_parameters = {
"pascalvoc": {
"train_images": 19200,
"image_shape": [3, 300, 300],
"class_num": 21,
"batch_size": 64,
"lr": 0.001,
"lr_epochs": [40, 60, 80, 100],
"lr_decay": [1, 0.5, 0.25, 0.1, 0.01]
},
"coco2014": {
"train_images": 82783,
"image_shape": [3, 300, 300],
"class_num": 91,
"batch_size": 64,
"lr": 0.001,
"lr_epochs": [12, 19],
"lr_decay": [1, 0.5, 0.25]
},
"coco2017": {
"train_images": 118287,
"image_shape": [3, 300, 300],
"class_num": 91,
"batch_size": 64,
"lr": 0.001,
"lr_epochs": [12, 19],
"lr_decay": [1, 0.5, 0.25]
}
}
def optimizer_setting(train_params):
batch_size = train_params["batch_size"]
iters = train_params["train_images"] / batch_size
lr = train_params["lr"]
boundaries = [i * iters for i in train_params["lr_epochs"]]
values = [ i * lr for i in train_params["lr_decay"]]
optimizer = fluid.optimizer.RMSProp(
learning_rate=fluid.layers.piecewise_decay(boundaries, values),
regularization=fluid.regularizer.L2Decay(0.00005), )
return optimizer
def build_program(main_prog, startup_prog, train_params, is_train):
image_shape = train_params['image_shape']
class_num = train_params['class_num']
with fluid.program_guard(main_prog, startup_prog):
py_reader = fluid.layers.py_reader(
capacity=64,
shapes=[[-1] + image_shape, [-1, 4], [-1, 1], [-1, 1]],
lod_levels=[0, 1, 1, 1],
dtypes=["float32", "float32", "int32", "int32"],
use_double_buffer=True)
with fluid.unique_name.guard():
image, gt_box, gt_label, difficult = fluid.layers.read_file(py_reader)
locs, confs, box, box_var = mobile_net(class_num, image, image_shape)
if is_train:
loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box,
box_var)
loss = fluid.layers.reduce_sum(loss)
optimizer = optimizer_setting(train_params)
optimizer.minimize(loss)
else:
nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=0.45)
with fluid.program_guard(main_prog):
loss = fluid.evaluator.DetectionMAP(
nmsed_out,
gt_label,
gt_box,
difficult,
class_num,
overlap_threshold=0.5,
evaluate_difficult=False,
ap_version=args.ap_version)
return py_reader, loss
def train(args,
train_file_list,
val_file_list,
data_args,
learning_rate,
batch_size,
num_passes,
model_save_dir,
pretrained_model=None):
if args.enable_ce:
fluid.framework.default_startup_program().random_seed = 111
image_shape = [3, data_args.resize_h, data_args.resize_w]
if 'coco' in data_args.dataset:
num_classes = 91
elif 'pascalvoc' in data_args.dataset:
num_classes = 21
train_params,
train_file_list,
val_file_list):
model_save_dir = args.model_save_dir
pretrained_model = args.pretrained_model
epoc_num = args.epoc_num
use_gpu = args.use_gpu
parallel = args.parallel
enable_ce = args.enable_ce
is_shuffle = True
devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(","))
batch_size = train_params['batch_size']
batch_size_per_device = batch_size // devices_num
iters_per_epoc = train_params["train_images"] // batch_size
num_workers = 8
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
gt_box = fluid.layers.data(
name='gt_box', shape=[4], dtype='float32', lod_level=1)
gt_label = fluid.layers.data(
name='gt_label', shape=[1], dtype='int32', lod_level=1)
difficult = fluid.layers.data(
name='gt_difficult', shape=[1], dtype='int32', lod_level=1)
locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=args.nms_threshold)
loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box,
box_var)
loss = fluid.layers.reduce_sum(loss)
test_program = fluid.default_main_program().clone(for_test=True)
with fluid.program_guard(test_program):
map_eval = fluid.evaluator.DetectionMAP(
nmsed_out,
gt_label,
gt_box,
difficult,
num_classes,
overlap_threshold=0.5,
evaluate_difficult=False,
ap_version=args.ap_version)
if 'coco' in data_args.dataset:
# learning rate decay in 12, 19 pass, respectively
if '2014' in train_file_list:
epocs = 82783 // batch_size
boundaries = [epocs * 12, epocs * 19]
elif '2017' in train_file_list:
epocs = 118287 // batch_size
boundaries = [epocs * 12, epocs * 19]
values = [
learning_rate, learning_rate * 0.5, learning_rate * 0.25
]
elif 'pascalvoc' in data_args.dataset:
epocs = 19200 // batch_size
boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100]
values = [
learning_rate, learning_rate * 0.5, learning_rate * 0.25,
learning_rate * 0.1, learning_rate * 0.01
]
optimizer = fluid.optimizer.RMSProp(
learning_rate=fluid.layers.piecewise_decay(boundaries, values),
regularization=fluid.regularizer.L2Decay(0.00005), )
startup_prog = fluid.Program()
train_prog = fluid.Program()
test_prog = fluid.Program()
if enable_ce:
import random
random.seed(0)
np.random.seed(0)
is_shuffle = False
startup_prog.random_seed = 111
train_prog.random_seed = 111
test_prog.random_seed = 111
num_workers = 1
optimizer.minimize(loss)
train_py_reader, loss = build_program(
main_prog=train_prog,
startup_prog=startup_prog,
train_params=train_params,
is_train=True)
test_py_reader, map_eval = build_program(
main_prog=test_prog,
startup_prog=startup_prog,
train_params=train_params,
is_train=False)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
test_prog = test_prog.clone(for_test=True)
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
exe.run(startup_prog)
if pretrained_model:
def if_exist(var):
return os.path.exists(os.path.join(pretrained_model, var.name))
fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)
fluid.io.load_vars(exe, pretrained_model, main_program=train_prog,
predicate=if_exist)
if args.parallel:
train_exe = fluid.ParallelExecutor(
use_cuda=args.use_gpu, loss_name=loss.name)
if parallel:
train_exe = fluid.ParallelExecutor(main_program=train_prog,
use_cuda=use_gpu, loss_name=loss.name)
if not args.enable_ce:
train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size)
else:
import random
random.seed(0)
np.random.seed(0)
train_reader = paddle.batch(
reader.train(data_args, train_file_list, False), batch_size=batch_size)
test_reader = paddle.batch(
reader.test(data_args, val_file_list), batch_size=batch_size)
feeder = fluid.DataFeeder(
place=place, feed_list=[image, gt_box, gt_label, difficult])
def save_model(postfix):
train_reader = reader.train(data_args,
train_file_list,
batch_size_per_device,
shuffle=is_shuffle,
use_multiprocessing=True,
num_workers=num_workers,
max_queue=24)
test_reader = reader.test(data_args, val_file_list, batch_size)
train_py_reader.decorate_paddle_reader(train_reader)
test_py_reader.decorate_paddle_reader(test_reader)
def save_model(postfix, main_prog):
model_path = os.path.join(model_save_dir, postfix)
if os.path.isdir(model_path):
shutil.rmtree(model_path)
print('save models to %s' % (model_path))
fluid.io.save_persistables(exe, model_path)
fluid.io.save_persistables(exe, model_path, main_program=main_prog)
best_map = 0.
def test(pass_id, best_map):
def test(epoc_id, best_map):
_, accum_map = map_eval.get_map_var()
map_eval.reset(exe)
every_pass_map=[]
for batch_id, data in enumerate(test_reader()):
test_map, = exe.run(test_program,
feed=feeder.feed(data),
fetch_list=[accum_map])
if batch_id % 20 == 0:
every_pass_map.append(test_map)
print("Batch {0}, map {1}".format(batch_id, test_map))
mean_map = np.mean(every_pass_map)
every_epoc_map=[]
test_py_reader.start()
try:
batch_id = 0
while True:
test_map, = exe.run(test_prog, fetch_list=[accum_map])
if batch_id % 10 == 0:
every_epoc_map.append(test_map)
print("Batch {0}, map {1}".format(batch_id, test_map))
batch_id += 1
except fluid.core.EOFException:
test_py_reader.reset()
mean_map = np.mean(every_epoc_map)
print("Epoc {0}, test map {1}".format(epoc_id, test_map))
if test_map[0] > best_map:
best_map = test_map[0]
save_model('best_model')
print("Pass {0}, test map {1}".format(pass_id, test_map))
save_model('best_model', test_prog)
return best_map, mean_map
for pass_id in range(num_passes):
batch_begin = time.time()
start_time = time.time()
prev_start_time = start_time
every_pass_loss = []
for batch_id, data in enumerate(train_reader()):
prev_start_time = start_time
train_py_reader.start()
total_time = 0.0
try:
for epoc_id in range(epoc_num):
epoch_idx = epoc_id + 1
start_time = time.time()
if len(data) < (devices_num * 2):
print("There are too few data to train on all devices.")
continue
if args.parallel:
loss_v, = train_exe.run(fetch_list=[loss.name],
feed=feeder.feed(data))
else:
loss_v, = exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[loss])
loss_v = np.mean(np.array(loss_v))
every_pass_loss.append(loss_v)
if batch_id % 20 == 0:
print("Pass {0}, batch {1}, loss {2}, time {3}".format(
pass_id, batch_id, loss_v, start_time - prev_start_time))
end_time = time.time()
best_map, mean_map = test(pass_id, best_map)
if args.enable_ce and pass_id == args.num_passes - 1:
total_time = end_time - start_time
train_avg_loss = np.mean(every_pass_loss)
if devices_num == 1:
print("kpis train_cost %s" % train_avg_loss)
print("kpis test_acc %s" % mean_map)
print("kpis train_speed %s" % (epocs / total_time))
else:
print("kpis train_cost_card%s %s" %
(devices_num, train_avg_loss))
print("kpis test_acc_card%s %s" %
(devices_num, mean_map))
print("kpis train_speed_card%s %f" %
(devices_num, epocs / total_time))
prev_start_time = start_time
every_epoc_loss = []
for batch_id in range(iters_per_epoc):
prev_start_time = start_time
start_time = time.time()
if parallel:
loss_v, = train_exe.run(fetch_list=[loss.name])
else:
loss_v, = exe.run(train_prog, fetch_list=[loss])
loss_v = np.mean(np.array(loss_v))
every_epoc_loss.append(loss_v)
if batch_id % 20 == 0:
print("Epoc {0}, batch {1}, loss {2}, time {3}".format(
epoc_id, batch_id, loss_v, start_time - prev_start_time))
end_time = time.time()
total_time += end_time - start_time
best_map, mean_map = test(epoc_id, best_map)
print("Best test map {0}".format(best_map))
if epoc_id % 10 == 0 or epoc_id == epoc_num - 1:
save_model(str(epoc_id), train_prog)
if enable_ce and epoc_id == epoc_num - 1:
train_avg_loss = np.mean(every_epoc_loss)
if devices_num == 1:
print("kpis train_cost %s" % train_avg_loss)
print("kpis test_acc %s" % mean_map)
print("kpis train_speed %s" % (total_time / epoch_idx))
else:
print("kpis train_cost_card%s %s" %
(devices_num, train_avg_loss))
print("kpis test_acc_card%s %s" %
(devices_num, mean_map))
print("kpis train_speed_card%s %f" %
(devices_num, total_time / epoch_idx))
except fluid.core.EOFException:
train_py_reader.reset()
except StopIteration:
train_py_reader.reset()
train_py_reader.reset()
if pass_id % 10 == 0 or pass_id == num_passes - 1:
save_model(str(pass_id))
print("Best test map {0}".format(best_map))
if __name__ == '__main__':
args = parser.parse_args()
print_arguments(args)
data_dir = args.data_dir
dataset = args.dataset
assert dataset in ['pascalvoc', 'coco2014', 'coco2017']
# for pascalvoc
label_file = 'label_list'
model_save_dir = args.model_save_dir
train_file_list = 'trainval.txt'
val_file_list = 'test.txt'
if 'coco' in args.dataset:
data_dir = 'data/coco'
if '2014' in args.dataset:
train_file_list = 'annotations/instances_train2014.json'
val_file_list = 'annotations/instances_val2014.json'
elif '2017' in args.dataset:
train_file_list = 'annotations/instances_train2017.json'
val_file_list = 'annotations/instances_val2017.json'
if dataset == 'coco2014':
train_file_list = 'annotations/instances_train2014.json'
val_file_list = 'annotations/instances_val2014.json'
elif dataset == 'coco2017':
train_file_list = 'annotations/instances_train2017.json'
val_file_list = 'annotations/instances_val2017.json'
mean_BGR = [float(m) for m in args.mean_BGR.split(",")]
image_shape = [int(m) for m in args.image_shape.split(",")]
train_parameters[dataset]['image_shape'] = image_shape
train_parameters[dataset]['batch_size'] = args.batch_size
train_parameters[dataset]['lr'] = args.learning_rate
train_parameters[dataset]['epoc_num'] = args.epoc_num
data_args = reader.Settings(
dataset=args.dataset,
data_dir=data_dir,
label_file=label_file,
resize_h=args.resize_h,
resize_w=args.resize_w,
mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R],
apply_distort=args.apply_distort,
apply_expand=args.apply_expand,
ap_version = args.ap_version,
toy=args.is_toy)
train(
args,
train_file_list=train_file_list,
val_file_list=val_file_list,
data_args=data_args,
learning_rate=args.learning_rate,
batch_size=args.batch_size,
num_passes=args.num_passes,
model_save_dir=model_save_dir,
pretrained_model=args.pretrained_model)
resize_h=image_shape[1],
resize_w=image_shape[2],
mean_value=mean_BGR,
apply_distort=True,
apply_expand=True,
ap_version = args.ap_version)
train(args,
data_args,
train_parameters[dataset],
train_file_list=train_file_list,
val_file_list=val_file_list)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册