未验证 提交 6423cab6 编写于 作者: C chengjuntao 提交者: GitHub

update new api for rrpn (#4296)

update new api for rrpn
上级 042737db
...@@ -36,7 +36,6 @@ def eval(): ...@@ -36,7 +36,6 @@ def eval():
place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size]
class_nums = cfg.class_num class_nums = cfg.class_num
model = model_builder.RRPN( model = model_builder.RRPN(
add_conv_body_func=resnet.ResNet(), add_conv_body_func=resnet.ResNet(),
...@@ -48,19 +47,14 @@ def eval(): ...@@ -48,19 +47,14 @@ def eval():
infer_prog = fluid.Program() infer_prog = fluid.Program()
with fluid.program_guard(infer_prog, startup_prog): with fluid.program_guard(infer_prog, startup_prog):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
model.build_model(image_shape) model.build_model()
pred_boxes = model.eval_bbox_out() pred_boxes = model.eval_bbox_out()
infer_prog = infer_prog.clone(True) infer_prog = infer_prog.clone(True)
exe.run(startup_prog) exe.run(startup_prog)
fluid.load(infer_prog, cfg.pretrained_model, exe)
# yapf: disable
def if_exist(var):
return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
if cfg.pretrained_model:
checkpoint.load_params(exe, infer_prog, cfg.pretrained_model)
# yapf: enable
test_reader = reader.test(1) test_reader = reader.test(1)
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) data_loader = model.data_loader
data_loader.set_sample_list_generator(test_reader, places=place)
fetch_list = [pred_boxes] fetch_list = [pred_boxes]
res_list = [] res_list = []
...@@ -68,11 +62,10 @@ def eval(): ...@@ -68,11 +62,10 @@ def eval():
'bbox', 'gt_box', 'gt_class', 'is_crowed', 'im_info', 'im_id', 'bbox', 'gt_box', 'gt_class', 'is_crowed', 'im_info', 'im_id',
'is_difficult' 'is_difficult'
] ]
for i, data in enumerate(test_reader()): for i, data in enumerate(data_loader()):
im_info = [data[0][1]]
result = exe.run(infer_prog, result = exe.run(infer_prog,
fetch_list=[v.name for v in fetch_list], fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(data), feed=data,
return_numpy=False) return_numpy=False)
pred_boxes_v = result[0] pred_boxes_v = result[0]
nmsed_out = pred_boxes_v nmsed_out = pred_boxes_v
......
...@@ -31,11 +31,11 @@ logger = logging.getLogger(__name__) ...@@ -31,11 +31,11 @@ logger = logging.getLogger(__name__)
def get_key_dict(out, data, key): def get_key_dict(out, data, key):
res = {} res = {}
for i in range(len(key)): for name in key:
if i == 0: if name == 'bbox':
res[key[i]] = out res[name] = np.array(out)
else: else:
res[key[i]] = data[i] res[name] = np.array(data[name])
return res return res
...@@ -167,7 +167,7 @@ def calculate_ap(rec, prec): ...@@ -167,7 +167,7 @@ def calculate_ap(rec, prec):
def icdar_map(result, class_name, ovthresh): def icdar_map(result, class_name, ovthresh):
im_ids = [] im_ids = []
for res in result: for res in result:
im_ids.append(res['im_id']) im_ids.append(res['im_id'][0][0])
recs = {} recs = {}
for i, im_id in enumerate(im_ids): for i, im_id in enumerate(im_ids):
...@@ -185,11 +185,11 @@ def icdar_map(result, class_name, ovthresh): ...@@ -185,11 +185,11 @@ def icdar_map(result, class_name, ovthresh):
confidence = [] confidence = []
bbox = [] bbox = []
for res in result: for res in result:
im_info = res['im_info'] im_info = res['im_info'][0]
pred_boxes = res['bbox'] pred_boxes = res['bbox']
for box in pred_boxes: for box in pred_boxes:
if box[0] == class_name: if box[0] == class_name:
image_ids.append(res['im_id']) image_ids.append(res['im_id'][0][0])
confidence.append(box[1]) confidence.append(box[1])
clipd_box = clip_box(box[2:].reshape(-1, 8), im_info) clipd_box = clip_box(box[2:].reshape(-1, 8), im_info)
bbox.append(clipd_box[0]) bbox.append(clipd_box[0])
...@@ -286,7 +286,7 @@ def icdar_box_eval(result, thresh): ...@@ -286,7 +286,7 @@ def icdar_box_eval(result, thresh):
num_global_care_gt = 0 num_global_care_gt = 0
num_global_care_det = 0 num_global_care_det = 0
for res in result: for res in result:
im_info = res['im_info'] im_info = res['im_info'][0]
h = im_info[1] h = im_info[1]
w = im_info[2] w = im_info[2]
gt_boxes = res['gt_box'] gt_boxes = res['gt_box']
......
...@@ -32,7 +32,6 @@ from utility import print_arguments, parse_args, check_gpu ...@@ -32,7 +32,6 @@ from utility import print_arguments, parse_args, check_gpu
def infer(): def infer():
place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size]
class_nums = cfg.class_num class_nums = cfg.class_num
model = model_builder.RRPN( model = model_builder.RRPN(
add_conv_body_func=resnet.ResNet(), add_conv_body_func=resnet.ResNet(),
...@@ -43,31 +42,25 @@ def infer(): ...@@ -43,31 +42,25 @@ def infer():
infer_prog = fluid.Program() infer_prog = fluid.Program()
with fluid.program_guard(infer_prog, startup_prog): with fluid.program_guard(infer_prog, startup_prog):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
model.build_model(image_shape) model.build_model()
pred_boxes = model.eval_bbox_out() pred_boxes = model.eval_bbox_out()
infer_prog = infer_prog.clone(True) infer_prog = infer_prog.clone(True)
exe.run(startup_prog) exe.run(startup_prog)
fluid.load(infer_prog, cfg.pretrained_model, exe)
# yapf: disable
def if_exist(var):
return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
if cfg.pretrained_model:
checkpoint.load_params(exe, infer_prog, cfg.pretrained_model)
# yapf: enable
infer_reader = reader.infer(cfg.image_path) infer_reader = reader.infer(cfg.image_path)
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) data_loader = model.data_loader
data_loader.set_sample_list_generator(infer_reader, places=place)
fetch_list = [pred_boxes] fetch_list = [pred_boxes]
imgs = os.listdir(cfg.image_path) imgs = os.listdir(cfg.image_path)
imgs.sort() imgs.sort()
for i, data in enumerate(infer_reader()): for i, data in enumerate(data_loader()):
result = exe.run(infer_prog, result = exe.run(infer_prog,
fetch_list=[v.name for v in fetch_list], fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(data), feed=data,
return_numpy=False) return_numpy=False)
nmsed_out = result[0] nmsed_out = result[0]
im_info = data[0][1] im_info = np.array(data[0]['im_info'])[0]
im_scale = im_info[2] im_scale = im_info[2]
outs = np.array(nmsed_out) outs = np.array(nmsed_out)
draw_bounding_box_on_image(cfg.image_path, imgs[i], outs, im_scale, draw_bounding_box_on_image(cfg.image_path, imgs[i], outs, im_scale,
......
...@@ -35,8 +35,8 @@ class RRPN(object): ...@@ -35,8 +35,8 @@ class RRPN(object):
self.use_pyreader = use_pyreader self.use_pyreader = use_pyreader
self.use_random = use_random self.use_random = use_random
def build_model(self, image_shape): def build_model(self):
self.build_input(image_shape) self.build_input()
body_conv = self.add_conv_body_func(self.image) body_conv = self.add_conv_body_func(self.image)
# RPN # RPN
self.rpn_heads(body_conv) self.rpn_heads(body_conv)
...@@ -61,56 +61,42 @@ class RRPN(object): ...@@ -61,56 +61,42 @@ class RRPN(object):
def eval_bbox_out(self): def eval_bbox_out(self):
return self.pred_result return self.pred_result
def build_input(self, image_shape): def build_input(self):
if self.use_pyreader: self.image = fluid.data(
in_shapes = [[-1] + image_shape, [-1, 5], [-1, 1], [-1, 1], name='image', shape=[None, 3, None, None], dtype='float32')
[-1, 3], [-1, 1]] if self.mode == 'train':
lod_levels = [0, 1, 1, 1, 0, 0] self.gt_box = fluid.data(
dtypes = [ name='gt_box', shape=[None, 5], dtype='float32', lod_level=1)
'float32', 'float32', 'int32', 'int32', 'float32', 'int64' else:
self.gt_box = fluid.data(
name='gt_box', shape=[None, 8], dtype='float32', lod_level=1)
self.gt_label = fluid.data(
name='gt_class', shape=[None, 1], dtype='int32', lod_level=1)
self.is_crowd = fluid.data(
name='is_crowed', shape=[None, 1], dtype='int32', lod_level=1)
self.im_info = fluid.data(
name='im_info', shape=[None, 3], dtype='float32')
self.im_id = fluid.data(name='im_id', shape=[None, 1], dtype='int64')
self.difficult = fluid.data(
name='is_difficult', shape=[None, -1], dtype='float32', lod_level=1)
if self.mode == 'train':
feed_data = [
self.image, self.gt_box, self.gt_label, self.is_crowd,
self.im_info, self.im_id
] ]
self.py_reader = fluid.layers.py_reader( elif self.mode == 'infer':
capacity=64, feed_data = [self.image, self.im_info]
shapes=in_shapes,
lod_levels=lod_levels,
dtypes=dtypes,
use_double_buffer=True)
ins = fluid.layers.read_file(self.py_reader)
self.image = ins[0]
self.gt_box = ins[1]
self.gt_label = ins[2]
self.is_crowd = ins[3]
self.im_info = ins[4]
self.im_id = ins[5]
else: else:
self.image = fluid.layers.data( feed_data = [
name='image', shape=image_shape, dtype='float32')
self.gt_box = fluid.layers.data(
name='gt_box', shape=[4], dtype='float32', lod_level=1)
self.gt_label = fluid.layers.data(
name='gt_label', shape=[1], dtype='int32', lod_level=1)
self.is_crowd = fluid.layers.data(
name='is_crowd', shape=[1], dtype='int32', lod_level=1)
self.im_info = fluid.layers.data(
name='im_info', shape=[3], dtype='float32')
self.im_id = fluid.layers.data(
name='im_id', shape=[1], dtype='int64')
self.difficult = fluid.layers.data(
name='difficult', shape=[1], dtype='float32', lod_level=1)
def feeds(self):
if self.mode == 'infer':
return [self.image, self.im_info]
if self.mode == 'val':
return [
self.image, self.gt_box, self.gt_label, self.is_crowd, self.image, self.gt_box, self.gt_label, self.is_crowd,
self.im_info, self.im_id, self.difficult self.im_info, self.im_id, self.difficult
] ]
return [ if self.mode == 'train':
self.image, self.gt_box, self.gt_label, self.is_crowd, self.im_info, self.data_loader = fluid.io.DataLoader.from_generator(
self.im_id feed_list=feed_data, capacity=64, iterable=False)
] else:
self.data_loader = fluid.io.DataLoader.from_generator(
feed_list=feed_data, capacity=64, iterable=True)
def eval_bbox(self): def eval_bbox(self):
self.im_scale = fluid.layers.slice( self.im_scale = fluid.layers.slice(
...@@ -151,23 +137,37 @@ class RRPN(object): ...@@ -151,23 +137,37 @@ class RRPN(object):
dimension = fluid.layers.fill_constant( dimension = fluid.layers.fill_constant(
shape=[1, 1], value=2, dtype='int32') shape=[1, 1], value=2, dtype='int32')
cond = fluid.layers.less_than(dimension, res_dimension) cond = fluid.layers.less_than(dimension, res_dimension)
res = fluid.layers.create_global_var(
shape=[1, 10], value=0.0, dtype='float32', persistable=False) def case1():
with fluid.layers.control_flow.Switch() as switch: res = fluid.layers.create_global_var(
with switch.case(cond): shape=[1, 10],
coordinate = fluid.layers.fill_constant( value=0.0,
shape=[9], value=0.0, dtype='float32') dtype='float32',
pred_class = fluid.layers.fill_constant( persistable=False)
shape=[1], value=i + 1, dtype='float32') coordinate = fluid.layers.fill_constant(
add_class = fluid.layers.concat( shape=[9], value=0.0, dtype='float32')
[pred_class, coordinate], axis=0) pred_class = fluid.layers.fill_constant(
normal_result = fluid.layers.elementwise_add(pred_result, shape=[1], value=i + 1, dtype='float32')
add_class) add_class = fluid.layers.concat(
fluid.layers.assign(normal_result, res) [pred_class, coordinate], axis=0)
with switch.default(): normal_result = fluid.layers.elementwise_add(pred_result,
normal_result = fluid.layers.fill_constant( add_class)
shape=[1, 10], value=-1.0, dtype='float32') fluid.layers.assign(normal_result, res)
fluid.layers.assign(normal_result, res) return res
def case2():
res = fluid.layers.create_global_var(
shape=[1, 10],
value=0.0,
dtype='float32',
persistable=False)
normal_result = fluid.layers.fill_constant(
shape=[1, 10], value=-1.0, dtype='float32')
fluid.layers.assign(normal_result, res)
return res
res = fluid.layers.case(
pred_fn_pairs=[(cond, case1)], default=case2)
results.append(res) results.append(res)
if len(results) == 1: if len(results) == 1:
self.pred_result = results[0] self.pred_result = results[0]
......
...@@ -56,7 +56,7 @@ def get_device_num(): ...@@ -56,7 +56,7 @@ def get_device_num():
def train(): def train():
learning_rate = cfg.learning_rate learning_rate = cfg.learning_rate
image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size] #image_shape = [-1, 3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]
devices_num = get_device_num() devices_num = get_device_num()
total_batch_size = devices_num * cfg.TRAIN.im_per_batch total_batch_size = devices_num * cfg.TRAIN.im_per_batch
...@@ -71,7 +71,7 @@ def train(): ...@@ -71,7 +71,7 @@ def train():
add_roi_box_head_func=resnet.ResNetC5(), add_roi_box_head_func=resnet.ResNetC5(),
use_pyreader=cfg.use_pyreader, use_pyreader=cfg.use_pyreader,
use_random=use_random) use_random=use_random)
model.build_model(image_shape) model.build_model()
losses, keys, rpn_rois = model.loss() losses, keys, rpn_rois = model.loss()
loss = losses[0] loss = losses[0]
fetch_list = losses fetch_list = losses
...@@ -132,16 +132,16 @@ def train(): ...@@ -132,16 +132,16 @@ def train():
if num_trainers > 1: if num_trainers > 1:
train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader) train_reader)
py_reader = model.py_reader data_loader = model.data_loader
py_reader.decorate_paddle_reader(train_reader) data_loader.set_sample_list_generator(train_reader, places=place)
else: else:
if num_trainers > 1: shuffle = False if num_trainers > 1: shuffle = False
train_reader = reader.train( train_reader = reader.train(
batch_size=total_batch_size, shuffle=shuffle) batch_size=total_batch_size, shuffle=shuffle)
feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())
def train_loop_pyreader(): def train_loop():
py_reader.start() data_loader.start()
train_stats = TrainingStats(cfg.log_window, keys) train_stats = TrainingStats(cfg.log_window, keys)
try: try:
start_time = time.time() start_time = time.time()
...@@ -173,48 +173,9 @@ def train(): ...@@ -173,48 +173,9 @@ def train():
total_time = end_time - start_time total_time = end_time - start_time
last_loss = np.array(outs[0]).mean() last_loss = np.array(outs[0]).mean()
except (StopIteration, fluid.core.EOFException): except (StopIteration, fluid.core.EOFException):
py_reader.reset() data_loader.reset()
def train_loop():
start_time = time.time()
prev_start_time = start_time
start = start_time
train_stats = TrainingStats(cfg.log_window, keys)
for iter_id, data in enumerate(train_reader()):
prev_start_time = start_time
start_time = time.time()
if data[0][1].shape[0] == 0:
continue
outs = exe.run(compiled_train_prog,
fetch_list=[v.name for v in fetch_list],
feed=feeder.feed(data))
stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])}
train_stats.update(stats)
logs = train_stats.log()
if iter_id % 10 == 0:
strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
now_time(), iter_id,
np.mean(outs[-1]), logs, start_time - prev_start_time)
print(strs)
sys.stdout.flush()
if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0 and iter_id != 0:
save_name = "{}".format(iter_id + 1)
checkpoint.save(exe, train_prog,
os.path.join(cfg.model_save_dir, save_name))
if (iter_id + 1) == cfg.max_iter:
checkpoint.save(exe, train_prog,
os.path.join(cfg.model_save_dir, "model_final"))
break
end_time = time.time()
total_time = end_time - start_time
last_loss = np.array(outs[0]).mean()
if cfg.use_pyreader: train_loop()
train_loop_pyreader()
else:
train_loop()
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -133,7 +133,6 @@ def parse_args(): ...@@ -133,7 +133,6 @@ def parse_args():
add_arg('dataset', str, 'icdar2015', "icdar2015, icdar2017.") add_arg('dataset', str, 'icdar2015', "icdar2015, icdar2017.")
add_arg('class_num', int, 2, "Class number.") add_arg('class_num', int, 2, "Class number.")
add_arg('data_dir', str, 'dataset/icdar2015', "The data root path.") add_arg('data_dir', str, 'dataset/icdar2015', "The data root path.")
add_arg('use_pyreader', bool, False, "Use pyreader.")
add_arg('use_profile', bool, False, "Whether use profiler.") add_arg('use_profile', bool, False, "Whether use profiler.")
add_arg('padding_minibatch',bool, False, add_arg('padding_minibatch',bool, False,
"If False, only resize image and not pad, image shape is different between" "If False, only resize image and not pad, image shape is different between"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册