diff --git a/PaddleCV/rrpn/eval.py b/PaddleCV/rrpn/eval.py index bf7732071967cab8766b9512c6007efb8e23db8a..5f0d94b86671442ef2380db9d3bf97bea85a60de 100755 --- a/PaddleCV/rrpn/eval.py +++ b/PaddleCV/rrpn/eval.py @@ -36,7 +36,6 @@ def eval(): place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) - image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size] class_nums = cfg.class_num model = model_builder.RRPN( add_conv_body_func=resnet.ResNet(), @@ -48,19 +47,14 @@ def eval(): infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): - model.build_model(image_shape) + model.build_model() pred_boxes = model.eval_bbox_out() infer_prog = infer_prog.clone(True) exe.run(startup_prog) - - # yapf: disable - def if_exist(var): - return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) - if cfg.pretrained_model: - checkpoint.load_params(exe, infer_prog, cfg.pretrained_model) - # yapf: enable + fluid.load(infer_prog, cfg.pretrained_model, exe) test_reader = reader.test(1) - feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) + data_loader = model.data_loader + data_loader.set_sample_list_generator(test_reader, places=place) fetch_list = [pred_boxes] res_list = [] @@ -68,11 +62,10 @@ def eval(): 'bbox', 'gt_box', 'gt_class', 'is_crowed', 'im_info', 'im_id', 'is_difficult' ] - for i, data in enumerate(test_reader()): - im_info = [data[0][1]] + for i, data in enumerate(data_loader()): result = exe.run(infer_prog, fetch_list=[v.name for v in fetch_list], - feed=feeder.feed(data), + feed=data, return_numpy=False) pred_boxes_v = result[0] nmsed_out = pred_boxes_v diff --git a/PaddleCV/rrpn/eval_helper.py b/PaddleCV/rrpn/eval_helper.py index c9e66e67cbb740785cc8b1509006a750d7b0158f..9dbbac36396606b9afcd2c4a7d280cef85b036e4 100755 --- a/PaddleCV/rrpn/eval_helper.py +++ b/PaddleCV/rrpn/eval_helper.py @@ -31,11 +31,11 @@ logger = logging.getLogger(__name__) def get_key_dict(out, data, key): res = {} - for i in range(len(key)): - if i == 0: - res[key[i]] = out + for name in key: + if name == 'bbox': + res[name] = np.array(out) else: - res[key[i]] = data[i] + res[name] = np.array(data[name]) return res @@ -167,7 +167,7 @@ def calculate_ap(rec, prec): def icdar_map(result, class_name, ovthresh): im_ids = [] for res in result: - im_ids.append(res['im_id']) + im_ids.append(res['im_id'][0][0]) recs = {} for i, im_id in enumerate(im_ids): @@ -185,11 +185,11 @@ def icdar_map(result, class_name, ovthresh): confidence = [] bbox = [] for res in result: - im_info = res['im_info'] + im_info = res['im_info'][0] pred_boxes = res['bbox'] for box in pred_boxes: if box[0] == class_name: - image_ids.append(res['im_id']) + image_ids.append(res['im_id'][0][0]) confidence.append(box[1]) clipd_box = clip_box(box[2:].reshape(-1, 8), im_info) bbox.append(clipd_box[0]) @@ -286,7 +286,7 @@ def icdar_box_eval(result, thresh): num_global_care_gt = 0 num_global_care_det = 0 for res in result: - im_info = res['im_info'] + im_info = res['im_info'][0] h = im_info[1] w = im_info[2] gt_boxes = res['gt_box'] diff --git a/PaddleCV/rrpn/infer.py b/PaddleCV/rrpn/infer.py index 3af9d21c2e2da456a5f719225c327633d97f6eb1..566afaacb410de4ceb3ace05ed00bc30e28f99c2 100755 --- a/PaddleCV/rrpn/infer.py +++ b/PaddleCV/rrpn/infer.py @@ -32,7 +32,6 @@ from utility import print_arguments, parse_args, check_gpu def infer(): place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) - image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size] class_nums = cfg.class_num model = model_builder.RRPN( add_conv_body_func=resnet.ResNet(), @@ -43,31 +42,25 @@ def infer(): infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): - model.build_model(image_shape) + model.build_model() pred_boxes = model.eval_bbox_out() infer_prog = infer_prog.clone(True) exe.run(startup_prog) - - # yapf: disable - def if_exist(var): - return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) - if cfg.pretrained_model: - checkpoint.load_params(exe, infer_prog, cfg.pretrained_model) - # yapf: enable + fluid.load(infer_prog, cfg.pretrained_model, exe) infer_reader = reader.infer(cfg.image_path) - feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) - + data_loader = model.data_loader + data_loader.set_sample_list_generator(infer_reader, places=place) fetch_list = [pred_boxes] imgs = os.listdir(cfg.image_path) imgs.sort() - for i, data in enumerate(infer_reader()): + for i, data in enumerate(data_loader()): result = exe.run(infer_prog, fetch_list=[v.name for v in fetch_list], - feed=feeder.feed(data), + feed=data, return_numpy=False) nmsed_out = result[0] - im_info = data[0][1] + im_info = np.array(data[0]['im_info'])[0] im_scale = im_info[2] outs = np.array(nmsed_out) draw_bounding_box_on_image(cfg.image_path, imgs[i], outs, im_scale, diff --git a/PaddleCV/rrpn/models/model_builder.py b/PaddleCV/rrpn/models/model_builder.py index 1f976faca76732e0f3e31b485bd482602fbb8c4c..e37e940e6cd2d575956cae34ba2a4ad4d0356cde 100755 --- a/PaddleCV/rrpn/models/model_builder.py +++ b/PaddleCV/rrpn/models/model_builder.py @@ -35,8 +35,8 @@ class RRPN(object): self.use_pyreader = use_pyreader self.use_random = use_random - def build_model(self, image_shape): - self.build_input(image_shape) + def build_model(self): + self.build_input() body_conv = self.add_conv_body_func(self.image) # RPN self.rpn_heads(body_conv) @@ -61,56 +61,42 @@ class RRPN(object): def eval_bbox_out(self): return self.pred_result - def build_input(self, image_shape): - if self.use_pyreader: - in_shapes = [[-1] + image_shape, [-1, 5], [-1, 1], [-1, 1], - [-1, 3], [-1, 1]] - lod_levels = [0, 1, 1, 1, 0, 0] - dtypes = [ - 'float32', 'float32', 'int32', 'int32', 'float32', 'int64' + def build_input(self): + self.image = fluid.data( + name='image', shape=[None, 3, None, None], dtype='float32') + if self.mode == 'train': + self.gt_box = fluid.data( + name='gt_box', shape=[None, 5], dtype='float32', lod_level=1) + else: + self.gt_box = fluid.data( + name='gt_box', shape=[None, 8], dtype='float32', lod_level=1) + self.gt_label = fluid.data( + name='gt_class', shape=[None, 1], dtype='int32', lod_level=1) + self.is_crowd = fluid.data( + name='is_crowed', shape=[None, 1], dtype='int32', lod_level=1) + self.im_info = fluid.data( + name='im_info', shape=[None, 3], dtype='float32') + self.im_id = fluid.data(name='im_id', shape=[None, 1], dtype='int64') + self.difficult = fluid.data( + name='is_difficult', shape=[None, -1], dtype='float32', lod_level=1) + if self.mode == 'train': + feed_data = [ + self.image, self.gt_box, self.gt_label, self.is_crowd, + self.im_info, self.im_id ] - self.py_reader = fluid.layers.py_reader( - capacity=64, - shapes=in_shapes, - lod_levels=lod_levels, - dtypes=dtypes, - use_double_buffer=True) - ins = fluid.layers.read_file(self.py_reader) - self.image = ins[0] - self.gt_box = ins[1] - self.gt_label = ins[2] - self.is_crowd = ins[3] - self.im_info = ins[4] - self.im_id = ins[5] + elif self.mode == 'infer': + feed_data = [self.image, self.im_info] else: - self.image = fluid.layers.data( - name='image', shape=image_shape, dtype='float32') - self.gt_box = fluid.layers.data( - name='gt_box', shape=[4], dtype='float32', lod_level=1) - self.gt_label = fluid.layers.data( - name='gt_label', shape=[1], dtype='int32', lod_level=1) - self.is_crowd = fluid.layers.data( - name='is_crowd', shape=[1], dtype='int32', lod_level=1) - self.im_info = fluid.layers.data( - name='im_info', shape=[3], dtype='float32') - self.im_id = fluid.layers.data( - name='im_id', shape=[1], dtype='int64') - - self.difficult = fluid.layers.data( - name='difficult', shape=[1], dtype='float32', lod_level=1) - - def feeds(self): - if self.mode == 'infer': - return [self.image, self.im_info] - if self.mode == 'val': - return [ + feed_data = [ self.image, self.gt_box, self.gt_label, self.is_crowd, self.im_info, self.im_id, self.difficult ] - return [ - self.image, self.gt_box, self.gt_label, self.is_crowd, self.im_info, - self.im_id - ] + if self.mode == 'train': + self.data_loader = fluid.io.DataLoader.from_generator( + feed_list=feed_data, capacity=64, iterable=False) + else: + self.data_loader = fluid.io.DataLoader.from_generator( + feed_list=feed_data, capacity=64, iterable=True) def eval_bbox(self): self.im_scale = fluid.layers.slice( @@ -151,23 +137,37 @@ class RRPN(object): dimension = fluid.layers.fill_constant( shape=[1, 1], value=2, dtype='int32') cond = fluid.layers.less_than(dimension, res_dimension) - res = fluid.layers.create_global_var( - shape=[1, 10], value=0.0, dtype='float32', persistable=False) - with fluid.layers.control_flow.Switch() as switch: - with switch.case(cond): - coordinate = fluid.layers.fill_constant( - shape=[9], value=0.0, dtype='float32') - pred_class = fluid.layers.fill_constant( - shape=[1], value=i + 1, dtype='float32') - add_class = fluid.layers.concat( - [pred_class, coordinate], axis=0) - normal_result = fluid.layers.elementwise_add(pred_result, - add_class) - fluid.layers.assign(normal_result, res) - with switch.default(): - normal_result = fluid.layers.fill_constant( - shape=[1, 10], value=-1.0, dtype='float32') - fluid.layers.assign(normal_result, res) + + def case1(): + res = fluid.layers.create_global_var( + shape=[1, 10], + value=0.0, + dtype='float32', + persistable=False) + coordinate = fluid.layers.fill_constant( + shape=[9], value=0.0, dtype='float32') + pred_class = fluid.layers.fill_constant( + shape=[1], value=i + 1, dtype='float32') + add_class = fluid.layers.concat( + [pred_class, coordinate], axis=0) + normal_result = fluid.layers.elementwise_add(pred_result, + add_class) + fluid.layers.assign(normal_result, res) + return res + + def case2(): + res = fluid.layers.create_global_var( + shape=[1, 10], + value=0.0, + dtype='float32', + persistable=False) + normal_result = fluid.layers.fill_constant( + shape=[1, 10], value=-1.0, dtype='float32') + fluid.layers.assign(normal_result, res) + return res + + res = fluid.layers.case( + pred_fn_pairs=[(cond, case1)], default=case2) results.append(res) if len(results) == 1: self.pred_result = results[0] diff --git a/PaddleCV/rrpn/train.py b/PaddleCV/rrpn/train.py index 11dafa990d074ab8035feaaf3d966a5a883d6ac5..5f451929f72aad7c7112df8e0d25ee946c3cef1f 100755 --- a/PaddleCV/rrpn/train.py +++ b/PaddleCV/rrpn/train.py @@ -56,7 +56,7 @@ def get_device_num(): def train(): learning_rate = cfg.learning_rate - image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size] + #image_shape = [-1, 3, cfg.TRAIN.max_size, cfg.TRAIN.max_size] devices_num = get_device_num() total_batch_size = devices_num * cfg.TRAIN.im_per_batch @@ -71,7 +71,7 @@ def train(): add_roi_box_head_func=resnet.ResNetC5(), use_pyreader=cfg.use_pyreader, use_random=use_random) - model.build_model(image_shape) + model.build_model() losses, keys, rpn_rois = model.loss() loss = losses[0] fetch_list = losses @@ -132,16 +132,16 @@ def train(): if num_trainers > 1: train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) - py_reader = model.py_reader - py_reader.decorate_paddle_reader(train_reader) + data_loader = model.data_loader + data_loader.set_sample_list_generator(train_reader, places=place) else: if num_trainers > 1: shuffle = False train_reader = reader.train( batch_size=total_batch_size, shuffle=shuffle) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) - def train_loop_pyreader(): - py_reader.start() + def train_loop(): + data_loader.start() train_stats = TrainingStats(cfg.log_window, keys) try: start_time = time.time() @@ -173,48 +173,9 @@ def train(): total_time = end_time - start_time last_loss = np.array(outs[0]).mean() except (StopIteration, fluid.core.EOFException): - py_reader.reset() - - def train_loop(): - start_time = time.time() - prev_start_time = start_time - start = start_time - train_stats = TrainingStats(cfg.log_window, keys) - for iter_id, data in enumerate(train_reader()): - prev_start_time = start_time - start_time = time.time() - if data[0][1].shape[0] == 0: - continue - - outs = exe.run(compiled_train_prog, - fetch_list=[v.name for v in fetch_list], - feed=feeder.feed(data)) - stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} - train_stats.update(stats) - logs = train_stats.log() - if iter_id % 10 == 0: - strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format( - now_time(), iter_id, - np.mean(outs[-1]), logs, start_time - prev_start_time) - print(strs) - sys.stdout.flush() - if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0 and iter_id != 0: - save_name = "{}".format(iter_id + 1) - checkpoint.save(exe, train_prog, - os.path.join(cfg.model_save_dir, save_name)) - if (iter_id + 1) == cfg.max_iter: - checkpoint.save(exe, train_prog, - os.path.join(cfg.model_save_dir, "model_final")) - break - - end_time = time.time() - total_time = end_time - start_time - last_loss = np.array(outs[0]).mean() + data_loader.reset() - if cfg.use_pyreader: - train_loop_pyreader() - else: - train_loop() + train_loop() if __name__ == '__main__': diff --git a/PaddleCV/rrpn/utility.py b/PaddleCV/rrpn/utility.py index d737d3e78146f7ca2cacaa2edc443b4ae654b3fd..226f6e37304ef771d3a9733ffeccb47fbd767f71 100755 --- a/PaddleCV/rrpn/utility.py +++ b/PaddleCV/rrpn/utility.py @@ -133,7 +133,6 @@ def parse_args(): add_arg('dataset', str, 'icdar2015', "icdar2015, icdar2017.") add_arg('class_num', int, 2, "Class number.") add_arg('data_dir', str, 'dataset/icdar2015', "The data root path.") - add_arg('use_pyreader', bool, False, "Use pyreader.") add_arg('use_profile', bool, False, "Whether use profiler.") add_arg('padding_minibatch',bool, False, "If False, only resize image and not pad, image shape is different between"