From 1450b76907ad1184d096a8ff91c9c77fe334b699 Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Fri, 14 Sep 2018 09:38:41 -0500 Subject: [PATCH] Add pyreader, refine code and add profiling code. (#1250) * Add pyreader, refine code and add profiling code. * Update parameter initializer. --- fluid/faster_rcnn/models/__init__.py | 0 fluid/faster_rcnn/models/model_builder.py | 259 ++++++++++++++++++++++ fluid/faster_rcnn/models/resnet.py | 153 +++++++++++++ fluid/faster_rcnn/profile.py | 187 ++++++++++++++++ fluid/faster_rcnn/reader.py | 33 +-- fluid/faster_rcnn/train.py | 188 +++++++--------- 6 files changed, 684 insertions(+), 136 deletions(-) create mode 100644 fluid/faster_rcnn/models/__init__.py create mode 100644 fluid/faster_rcnn/models/model_builder.py create mode 100644 fluid/faster_rcnn/models/resnet.py create mode 100644 fluid/faster_rcnn/profile.py diff --git a/fluid/faster_rcnn/models/__init__.py b/fluid/faster_rcnn/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/fluid/faster_rcnn/models/model_builder.py b/fluid/faster_rcnn/models/model_builder.py new file mode 100644 index 00000000..970c746c --- /dev/null +++ b/fluid/faster_rcnn/models/model_builder.py @@ -0,0 +1,259 @@ +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Constant +from paddle.fluid.initializer import Normal +from paddle.fluid.regularizer import L2Decay + + +class FasterRCNN(object): + def __init__(self, + cfg=None, + add_conv_body_func=None, + add_roi_box_head_func=None, + is_train=True, + use_pyreader=True, + use_random=True): + self.add_conv_body_func = add_conv_body_func + self.add_roi_box_head_func = add_roi_box_head_func + self.cfg = cfg + self.is_train = is_train + self.use_pyreader = use_pyreader + self.use_random = use_random + #self.py_reader = None + + def build_model(self, image_shape): + self.build_input(image_shape) + body_conv = self.add_conv_body_func(self.image) + # RPN + self.rpn_heads(body_conv) + # Fast RCNN + self.fast_rcnn_heads(body_conv) + + def loss(self): + # Fast RCNN loss + loss_cls, loss_bbox = self.fast_rcnn_loss() + # RPN loss + rpn_cls_loss, rpn_reg_loss = self.rpn_loss() + return loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss, + + def build_input(self, image_shape): + if self.use_pyreader: + self.py_reader = fluid.layers.py_reader( + capacity=64, + shapes=[[-1] + image_shape, [-1, 4], [-1, 1], [-1, 1], [-1, 3]], + lod_levels=[0, 1, 1, 1, 0], + dtypes=["float32", "float32", "int32", "int32", "float32"], + use_double_buffer=True) + self.image, self.gt_box, self.gt_label, self.is_crowd, \ + self.im_info = fluid.layers.read_file(self.py_reader) + else: + self.image = fluid.layers.data( + name='image', shape=image_shape, dtype='float32') + self.gt_box = fluid.layers.data( + name='gt_box', shape=[4], dtype='float32', lod_level=1) + self.gt_label = fluid.layers.data( + name='gt_label', shape=[1], dtype='int32', lod_level=1) + self.is_crowd = fluid.layers.data( + name='is_crowd', + shape=[-1], + dtype='int32', + lod_level=1, + append_batch_size=False) + self.im_info = fluid.layers.data( + name='im_info', shape=[3], dtype='float32') + + def feeds(self): + return [ + self.image, self.gt_box, self.gt_label, self.is_crowd, self.im_info + ] + + def rpn_heads(self, rpn_input): + # RPN hidden representation + dim_out = rpn_input.shape[1] + rpn_conv = fluid.layers.conv2d( + input=rpn_input, + num_filters=dim_out, + filter_size=3, + stride=1, + padding=1, + act='relu', + name='conv_rpn', + param_attr=ParamAttr( + name="conv_rpn_w", initializer=Normal( + loc=0., scale=0.01)), + bias_attr=ParamAttr( + name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) + self.anchor, self.var = fluid.layers.anchor_generator( + input=rpn_conv, + anchor_sizes=self.cfg.anchor_sizes, + aspect_ratios=self.cfg.aspect_ratios, + variance=self.cfg.variance, + stride=[16.0, 16.0]) + num_anchor = self.anchor.shape[2] + # Proposal classification scores + self.rpn_cls_score = fluid.layers.conv2d( + rpn_conv, + num_filters=num_anchor, + filter_size=1, + stride=1, + padding=0, + act=None, + name='rpn_cls_score', + param_attr=ParamAttr( + name="rpn_cls_logits_w", initializer=Normal( + loc=0., scale=0.01)), + bias_attr=ParamAttr( + name="rpn_cls_logits_b", + learning_rate=2., + regularizer=L2Decay(0.))) + # Proposal bbox regression deltas + self.rpn_bbox_pred = fluid.layers.conv2d( + rpn_conv, + num_filters=4 * num_anchor, + filter_size=1, + stride=1, + padding=0, + act=None, + name='rpn_bbox_pred', + param_attr=ParamAttr( + name="rpn_bbox_pred_w", initializer=Normal( + loc=0., scale=0.01)), + bias_attr=ParamAttr( + name="rpn_bbox_pred_b", + learning_rate=2., + regularizer=L2Decay(0.))) + + rpn_cls_score_prob = fluid.layers.sigmoid( + self.rpn_cls_score, name='rpn_cls_score_prob') + + rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals( + scores=rpn_cls_score_prob, + bbox_deltas=self.rpn_bbox_pred, + im_info=self.im_info, + anchors=self.anchor, + variances=self.var, + pre_nms_top_n=12000, + post_nms_top_n=2000, + nms_thresh=0.7, + min_size=0.0, + eta=1.0) + + if self.is_train: + outs = fluid.layers.generate_proposal_labels( + rpn_rois=rpn_rois, + gt_classes=self.gt_label, + is_crowd=self.is_crowd, + gt_boxes=self.gt_box, + im_info=self.im_info, + batch_size_per_im=512, + fg_fraction=0.25, + fg_thresh=0.5, + bg_thresh_hi=0.5, + bg_thresh_lo=0.0, + bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], + class_nums=self.cfg.class_num, + use_random=self.use_random) + + self.rois = outs[0] + self.labels_int32 = outs[1] + self.bbox_targets = outs[2] + self.bbox_inside_weights = outs[3] + self.bbox_outside_weights = outs[4] + + def fast_rcnn_heads(self, roi_input): + pool = fluid.layers.roi_pool( + input=roi_input, + rois=self.rois, + pooled_height=14, + pooled_width=14, + spatial_scale=0.0625) + rcnn_out = self.add_roi_box_head_func(pool) + self.cls_score = fluid.layers.fc(input=rcnn_out, + size=self.cfg.class_num, + act=None, + name='cls_score', + param_attr=ParamAttr( + name='cls_score_w', + initializer=Normal( + loc=0.0, scale=0.001)), + bias_attr=ParamAttr( + name='cls_score_b', + learning_rate=2., + regularizer=L2Decay(0.))) + self.bbox_pred = fluid.layers.fc(input=rcnn_out, + size=4 * self.cfg.class_num, + act=None, + name='bbox_pred', + param_attr=ParamAttr( + name='bbox_pred_w', + initializer=Normal( + loc=0.0, scale=0.01)), + bias_attr=ParamAttr( + name='bbox_pred_b', + learning_rate=2., + regularizer=L2Decay(0.))) + + def fast_rcnn_loss(self): + labels_int64 = fluid.layers.cast(x=self.labels_int32, dtype='int64') + labels_int64.stop_gradient = True + #loss_cls = fluid.layers.softmax_with_cross_entropy( + # logits=cls_score, + # label=labels_int64 + # ) + softmax = fluid.layers.softmax(self.cls_score, use_cudnn=False) + loss_cls = fluid.layers.cross_entropy(softmax, labels_int64) + loss_cls = fluid.layers.reduce_mean(loss_cls) + loss_bbox = fluid.layers.smooth_l1( + x=self.bbox_pred, + y=self.bbox_targets, + inside_weight=self.bbox_inside_weights, + outside_weight=self.bbox_outside_weights, + sigma=1.0) + loss_bbox = fluid.layers.reduce_mean(loss_bbox) + return loss_cls, loss_bbox + + def rpn_loss(self): + rpn_cls_score_reshape = fluid.layers.transpose( + self.rpn_cls_score, perm=[0, 2, 3, 1]) + rpn_bbox_pred_reshape = fluid.layers.transpose( + self.rpn_bbox_pred, perm=[0, 2, 3, 1]) + + anchor_reshape = fluid.layers.reshape(self.anchor, shape=(-1, 4)) + var_reshape = fluid.layers.reshape(self.var, shape=(-1, 4)) + + rpn_cls_score_reshape = fluid.layers.reshape( + x=rpn_cls_score_reshape, shape=(0, -1, 1)) + rpn_bbox_pred_reshape = fluid.layers.reshape( + x=rpn_bbox_pred_reshape, shape=(0, -1, 4)) + + score_pred, loc_pred, score_tgt, loc_tgt = \ + fluid.layers.rpn_target_assign( + bbox_pred=rpn_bbox_pred_reshape, + cls_logits=rpn_cls_score_reshape, + anchor_box=anchor_reshape, + anchor_var=var_reshape, + gt_boxes=self.gt_box, + is_crowd=self.is_crowd, + im_info=self.im_info, + rpn_batch_size_per_im=256, + rpn_straddle_thresh=0.0, + rpn_fg_fraction=0.5, + rpn_positive_overlap=0.7, + rpn_negative_overlap=0.3, + use_random=self.use_random) + score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') + rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( + x=score_pred, label=score_tgt) + rpn_cls_loss = fluid.layers.reduce_mean( + rpn_cls_loss, name='loss_rpn_cls') + + rpn_reg_loss = fluid.layers.smooth_l1(x=loc_pred, y=loc_tgt, sigma=3.0) + rpn_reg_loss = fluid.layers.reduce_sum( + rpn_reg_loss, name='loss_rpn_bbox') + score_shape = fluid.layers.shape(score_tgt) + score_shape = fluid.layers.cast(x=score_shape, dtype='float32') + norm = fluid.layers.reduce_prod(score_shape) + norm.stop_gradient = True + rpn_reg_loss = rpn_reg_loss / norm + + return rpn_cls_loss, rpn_reg_loss diff --git a/fluid/faster_rcnn/models/resnet.py b/fluid/faster_rcnn/models/resnet.py new file mode 100644 index 00000000..394c4890 --- /dev/null +++ b/fluid/faster_rcnn/models/resnet.py @@ -0,0 +1,153 @@ +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import Constant +from paddle.fluid.regularizer import L2Decay + + +def conv_bn_layer(input, + ch_out, + filter_size, + stride, + padding, + act='relu', + name=None): + conv1 = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=ParamAttr(name=name + "_biases"), + name=name + '.conv2d.output.1') + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + + return fluid.layers.batch_norm( + input=conv1, + act=act, + name=bn_name + '.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + is_test=True) + + +def conv_affine_layer(input, + ch_out, + filter_size, + stride, + padding, + act='relu', + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + '.conv2d.output.1') + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + + scale = fluid.layers.create_parameter( + shape=[conv.shape[1]], + dtype=conv.dtype, + attr=ParamAttr( + name=bn_name + '_scale', learning_rate=0.), + default_initializer=Constant(1.)) + scale.stop_gradient = True + bias = fluid.layers.create_parameter( + shape=[conv.shape[1]], + dtype=conv.dtype, + attr=ParamAttr( + bn_name + '_offset', learning_rate=0.), + default_initializer=Constant(0.)) + bias.stop_gradient = True + + elt_mul = fluid.layers.elementwise_mul(x=conv, y=scale, axis=1) + out = fluid.layers.elementwise_add(x=elt_mul, y=bias, axis=1) + if act == 'relu': + out = fluid.layers.relu(x=out) + return out + + +def shortcut(input, ch_out, stride, name): + ch_in = input.shape[1] # if args.data_format == 'NCHW' else input.shape[-1] + if ch_in != ch_out: + return conv_affine_layer(input, ch_out, 1, stride, 0, None, name=name) + else: + return input + + +def basicblock(input, ch_out, stride, name): + short = shortcut(input, ch_out, stride, name=name) + conv1 = conv_affine_layer(input, ch_out, 3, stride, 1, name=name) + conv2 = conv_affine_layer(conv1, ch_out, 3, 1, 1, act=None, name=name) + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu', name=name) + + +def bottleneck(input, ch_out, stride, name): + short = shortcut(input, ch_out * 4, stride, name=name + "_branch1") + conv1 = conv_affine_layer( + input, ch_out, 1, stride, 0, name=name + "_branch2a") + conv2 = conv_affine_layer(conv1, ch_out, 3, 1, 1, name=name + "_branch2b") + conv3 = conv_affine_layer( + conv2, ch_out * 4, 1, 1, 0, act=None, name=name + "_branch2c") + return fluid.layers.elementwise_add( + x=short, y=conv3, act='relu', name=name + ".add.output.5") + + +def layer_warp(block_func, input, ch_out, count, stride, name): + res_out = block_func(input, ch_out, stride, name=name + "a") + for i in range(1, count): + res_out = block_func(res_out, ch_out, 1, name=name + chr(ord("a") + i)) + return res_out + + +ResNet_cfg = { + 18: ([2, 2, 2, 1], basicblock), + 34: ([3, 4, 6, 3], basicblock), + 50: ([3, 4, 6, 3], bottleneck), + 101: ([3, 4, 23, 3], bottleneck), + 152: ([3, 8, 36, 3], bottleneck) +} + + +def add_ResNet50_conv4_body(body_input, freeze_at=2): + stages, block_func = ResNet_cfg[50] + stages = stages[0:3] + conv1 = conv_affine_layer( + body_input, ch_out=64, filter_size=7, stride=2, padding=3, name="conv1") + pool1 = fluid.layers.pool2d( + input=conv1, + pool_type='max', + pool_size=3, + pool_stride=2, + pool_padding=1) + res2 = layer_warp(block_func, pool1, 64, stages[0], 1, name="res2") + if freeze_at == 2: + res2.stop_gradient = True + res3 = layer_warp(block_func, res2, 128, stages[1], 2, name="res3") + if freeze_at == 3: + res3.stop_gradient = True + res4 = layer_warp(block_func, res3, 256, stages[2], 2, name="res4") + if freeze_at == 4: + res4.stop_gradient = True + return res4 + + +def add_ResNet_roi_conv5_head(head_input): + res5 = layer_warp(bottleneck, head_input, 512, 3, 2, name="res5") + res5_pool = fluid.layers.pool2d( + res5, pool_type='avg', pool_size=7, name='res5_pool') + return res5_pool diff --git a/fluid/faster_rcnn/profile.py b/fluid/faster_rcnn/profile.py new file mode 100644 index 00000000..38810ab9 --- /dev/null +++ b/fluid/faster_rcnn/profile.py @@ -0,0 +1,187 @@ +import os +import time +import numpy as np +import argparse +import functools +import shutil +import cPickle +from utility import add_arguments, print_arguments + +import paddle +import paddle.fluid as fluid +import reader +import paddle.fluid.profiler as profiler + +import models.model_builder as model_builder +import models.resnet as resnet +from learning_rate import exponential_with_warmup_decay + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +# ENV +add_arg('parallel', bool, True, "Minibatch size.") +add_arg('use_gpu', bool, True, "Whether use GPU.") +add_arg('model_save_dir', str, 'model', "The path to save model.") +add_arg('pretrained_model', str, 'imagenet_resnet50_fusebn', "The init model path.") +add_arg('dataset', str, 'coco2017', "coco2014, coco2017, and pascalvoc.") +add_arg('data_dir', str, 'data/COCO17', "data directory") +add_arg('skip_reader', bool, False, "Whether to skip data reader.") +add_arg('use_profile', bool, False, "Whether to use profiler tool.") +add_arg('class_num', int, 81, "Class number.") +add_arg('use_pyreader', bool, False, "Class number.") +# SOLVER +add_arg('learning_rate', float, 0.01, "Learning rate.") +add_arg('num_iteration', int, 10, "Epoch number.") +# RPN +add_arg('anchor_sizes', int, [32,64,128,256,512], "The size of anchors.") +add_arg('aspect_ratios', float, [0.5,1.0,2.0], "The ratio of anchors.") +add_arg('variance', float, [1.,1.,1.,1.], "The variance of anchors.") +add_arg('rpn_stride', float, 16., "Stride of the feature map that RPN is attached.") +# FAST RCNN +# TRAIN TEST +add_arg('batch_size', int, 1, "Minibatch size.") +add_arg('max_size', int, 1333, "The max resized image size.") +add_arg('scales', int, [800], "The resized image height.") +add_arg('batch_size_per_im',int, 512, "fast rcnn head batch size") +add_arg('mean_value', float, [102.9801, 115.9465, 122.7717], "pixel mean") +add_arg('debug', bool, False, "Debug mode") +#yapf: enable + +def train(cfg): + batch_size = cfg.batch_size + learning_rate = cfg.learning_rate + image_shape = [3, cfg.max_size, cfg.max_size] + num_iterations = cfg.num_iteration + + if cfg.debug: + fluid.default_startup_program().random_seed = 1000 + fluid.default_main_program().random_seed = 1000 + import random + random.seed(0) + np.random.seed(0) + + devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" + devices_num = len(devices.split(",")) + + model = model_builder.FasterRCNN( + cfg=cfg, + add_conv_body_func=resnet.add_ResNet50_conv4_body, + add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, + use_pyreader=cfg.use_pyreader, + use_random=False) + model.build_model(image_shape) + loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss = model.loss() + loss_cls.persistable=True + loss_bbox.persistable=True + rpn_cls_loss.persistable=True + rpn_reg_loss.persistable=True + loss = loss_cls + loss_bbox + rpn_cls_loss + rpn_reg_loss + + boundaries = [120000, 160000] + values = [learning_rate, learning_rate*0.1, learning_rate*0.01] + + optimizer = fluid.optimizer.Momentum( + learning_rate=exponential_with_warmup_decay(learning_rate=learning_rate, + boundaries=boundaries, + values=values, + warmup_iter=500, + warmup_factor=1.0/3.0), + regularization=fluid.regularizer.L2Decay(0.0001), + momentum=0.9) + optimizer.minimize(loss) + + fluid.memory_optimize(fluid.default_main_program()) + + place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + if cfg.pretrained_model: + def if_exist(var): + return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) + fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) + + if cfg.parallel: + train_exe = fluid.ParallelExecutor( + use_cuda=bool(cfg.use_gpu), loss_name=loss.name) + + + if cfg.use_pyreader: + train_reader = reader.train(cfg, batch_size=1, shuffle=not cfg.debug) + py_reader = model.py_reader + py_reader.decorate_paddle_reader(train_reader) + else: + train_reader = reader.train(cfg, batch_size=cfg.batch_size, shuffle=not cfg.debug) + feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) + + fetch_list = [loss, loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss] + + def run(iterations): + reader_time = [] + run_time = [] + total_images = 0 + + for batch_id in range(iterations): + start_time = time.time() + data = train_reader().next() + end_time = time.time() + reader_time.append(end_time - start_time) + start_time = time.time() + losses = train_exe.run(fetch_list=[v.name for v in fetch_list], + feed=feeder.feed(data)) + end_time = time.time() + run_time.append(end_time - start_time) + total_images += data[0][0].shape[0] + + lr = np.array(fluid.global_scope().find_var('learning_rate').get_tensor()) + print("Batch {:d}, lr {:.6f}, loss {:.6f} ".format( + batch_id, lr[0], losses[0][0])) + return reader_time, run_time, total_images + + + def run_pyreader(iterations): + reader_time = [0] + run_time = [] + total_images = 0 + + py_reader.start() + try: + for batch_id in range(iterations): + start_time = time.time() + losses = train_exe.run(fetch_list=[v.name for v in fetch_list]) + end_time = time.time() + run_time.append(end_time - start_time) + total_images += devices_num + lr = np.array(fluid.global_scope().find_var('learning_rate').get_tensor()) + print("Batch {:d}, lr {:.6f}, loss {:.6f} ".format( + batch_id, lr[0], losses[0][0])) + except fluid.core.EOFException: + py_reader.reset() + + return reader_time, run_time, total_images + + run_func = run if not cfg.use_pyreader else run_pyreader + + # warm-up + run_func(2) + # profiling + start = time.time() + if cfg.use_profile: + with profiler.profiler('GPU', 'total', '/tmp/profile_file'): + reader_time, run_time, total_images = run(num_iterations) + else: + reader_time, run_time, total_images = run_func(num_iterations) + + end = time.time() + total_time = end - start + print("Total time: {0}, reader time: {1} s, run time: {2} s, images/s: {3}".format( + total_time, np.sum(reader_time), np.sum(run_time), total_images / total_time)) + + +if __name__ == '__main__': + args = parser.parse_args() + print_arguments(args) + + data_args = reader.Settings(args) + train(data_args) diff --git a/fluid/faster_rcnn/reader.py b/fluid/faster_rcnn/reader.py index bcaf216d..256a058f 100644 --- a/fluid/faster_rcnn/reader.py +++ b/fluid/faster_rcnn/reader.py @@ -51,7 +51,7 @@ class Settings(object): np.newaxis, np.newaxis, :].astype('float32') -def coco(settings, mode, shuffle): +def coco(settings, mode, batch_size=None, shuffle=False): if mode == 'train': settings.train_file_list = os.path.join(settings.data_dir, settings.train_file_list) @@ -70,8 +70,7 @@ def coco(settings, mode, shuffle): def reader(): if mode == "train" and shuffle: random.shuffle(roidbs) - im_out, gt_boxes_out, gt_classes_out, is_crowd_out, im_info_out = [],[],[],[],[] - lod = [0] + batch_out = [] for roidb in roidbs: im, im_scales = data_utils.get_image_blob(roidb, settings) im_height = np.round(roidb['height'] * im_scales) @@ -83,28 +82,18 @@ def coco(settings, mode, shuffle): is_crowd = roidb['is_crowd'].astype('int32') if gt_boxes.shape[0] == 0: continue - im_out.append(im) - gt_boxes_out.extend(gt_boxes) - gt_classes_out.extend(gt_classes) - is_crowd_out.extend(is_crowd) - im_info_out.append(im_info) - lod.append(lod[-1] + gt_boxes.shape[0]) - if len(im_out) == settings.batch_size: - im_out = np.array(im_out).astype('float32') - gt_boxes_out = np.array(gt_boxes_out).astype('float32') - gt_classes_out = np.array(gt_classes_out).astype('int32') - is_crowd_out = np.array(is_crowd_out).astype('int32') - im_info_out = np.array(im_info_out).astype('float32') - yield im_out, gt_boxes_out, gt_classes_out, is_crowd_out, im_info_out, lod - im_out, gt_boxes_out, gt_classes_out, is_crowd_out, im_info_out = [],[],[],[],[] - lod = [0] + + batch_out.append((im, gt_boxes, gt_classes, is_crowd, im_info)) + if len(batch_out) == batch_size: + yield batch_out + batch_out = [] return reader -def train(settings, shuffle=True): - return coco(settings, 'train', shuffle) +def train(settings, batch_size, shuffle=True): + return coco(settings, 'train', batch_size, shuffle) -def test(settings): - return coco(settings, 'test', False) +def test(settings, batch_size): + return coco(settings, 'test', batch_size, shuffle=False) diff --git a/fluid/faster_rcnn/train.py b/fluid/faster_rcnn/train.py index 90d5493c..9dd53bc0 100644 --- a/fluid/faster_rcnn/train.py +++ b/fluid/faster_rcnn/train.py @@ -10,7 +10,8 @@ from utility import add_arguments, print_arguments import paddle import paddle.fluid as fluid import reader -from fasterrcnn_model import FasterRcnn, RPNloss +import models.model_builder as model_builder +import models.resnet as resnet from learning_rate import exponential_with_warmup_decay parser = argparse.ArgumentParser(description=__doc__) @@ -19,16 +20,19 @@ add_arg = functools.partial(add_arguments, argparser=parser) # ENV add_arg('parallel', bool, True, "Minibatch size.") add_arg('use_gpu', bool, True, "Whether use GPU.") -add_arg('model_save_dir', str, 'model', "The path to save model.") +add_arg('model_save_dir', str, 'output', "The path to save model.") add_arg('pretrained_model', str, 'imagenet_resnet50_fusebn', "The init model path.") add_arg('dataset', str, 'coco2017', "coco2014, coco2017, and pascalvoc.") add_arg('data_dir', str, 'data/COCO17', "data directory") +add_arg('class_num', int, 81, "Class number.") +add_arg('use_pyreader', bool, True, "Class number.") # SOLVER add_arg('learning_rate', float, 0.01, "Learning rate.") add_arg('num_passes', int, 20, "Epoch number.") # RPN add_arg('anchor_sizes', int, [32,64,128,256,512], "The size of anchors.") add_arg('aspect_ratios', float, [0.5,1.0,2.0], "The ratio of anchors.") +add_arg('variance', float, [1.,1.,1.,1.], "The variance of anchors.") add_arg('rpn_stride', float, 16., "Stride of the feature map that RPN is attached.") # FAST RCNN # TRAIN TEST @@ -40,83 +44,35 @@ add_arg('mean_value', float, [102.9801, 115.9465, 122.7717], "pixel mean" add_arg('debug', bool, False, "Debug mode") #yapf: enable -def train(args): - num_passes = args.num_passes - batch_size = args.batch_size - learning_rate = args.learning_rate - image_shape = [3, args.max_size, args.max_size] +def train(cfg): + num_passes = cfg.num_passes + batch_size = cfg.batch_size + learning_rate = cfg.learning_rate + image_shape = [3, cfg.max_size, cfg.max_size] - - if args.debug: + if cfg.debug: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 import random random.seed(0) np.random.seed(0) - devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - gt_box = fluid.layers.data( - name='gt_box', shape=[4], dtype='float32', lod_level=1) - gt_label = fluid.layers.data( - name='gt_label', shape=[1], dtype='int32', lod_level=1) - is_crowd = fluid.layers.data( - name='is_crowd', shape = [-1], dtype='int32', lod_level=1, append_batch_size=False) - im_info = fluid.layers.data( - name='im_info', shape=[3], dtype='float32') - - - rpn_cls_score, rpn_bbox_pred, anchor, var, cls_score, bbox_pred,\ - bbox_targets, bbox_inside_weights, bbox_outside_weights, rois, \ - labels_int32 = FasterRcnn( - input=image, - depth=50, - anchor_sizes=[32,64,128,256,512], - variance=[1.,1.,1.,1.], - aspect_ratios=[0.5,1.0,2.0], - gt_box=gt_box, - is_crowd=is_crowd, - gt_label=gt_label, - im_info=im_info, - class_nums=args.class_nums, - use_random=False if args.debug else True - ) - - cls_loss, reg_loss = RPNloss(rpn_cls_score, rpn_bbox_pred, anchor, var, \ - gt_box, is_crowd, im_info, use_random=False if args.debug else True) - cls_loss.persistable=True - reg_loss.persistable=True - rpn_loss = cls_loss + reg_loss - rpn_loss.persistable=True - - labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64') - labels_int64.stop_gradient = True - #loss_cls = fluid.layers.softmax_with_cross_entropy( - # logits=cls_score, - # label=labels_int64 - # ) - softmax = fluid.layers.softmax(cls_score, use_cudnn=False) - loss_cls = fluid.layers.cross_entropy(softmax, labels_int64) - loss_cls = fluid.layers.reduce_mean(loss_cls) - loss_cls.persistable=True - loss_bbox = fluid.layers.smooth_l1(x=bbox_pred, - y=bbox_targets, - inside_weight=bbox_inside_weights, - outside_weight=bbox_outside_weights, - sigma=1.0) - loss_bbox = fluid.layers.reduce_mean(loss_bbox) - loss_bbox.persistable=True - + model = model_builder.FasterRCNN( + cfg=cfg, + add_conv_body_func=resnet.add_ResNet50_conv4_body, + add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, + use_pyreader=cfg.use_pyreader, + use_random=False) + model.build_model(image_shape) + loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss = model.loss() loss_cls.persistable=True loss_bbox.persistable=True - detection_loss = loss_cls + loss_bbox - detection_loss.persistable=True - - loss = rpn_loss + detection_loss - loss.persistable=True + rpn_cls_loss.persistable=True + rpn_reg_loss.persistable=True + loss = loss_cls + loss_bbox + rpn_cls_loss + rpn_reg_loss boundaries = [120000, 160000] values = [learning_rate, learning_rate*0.1, learning_rate*0.01] @@ -133,80 +89,84 @@ def train(args): fluid.memory_optimize(fluid.default_main_program()) - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - if args.pretrained_model: + if cfg.pretrained_model: def if_exist(var): - return os.path.exists(os.path.join(args.pretrained_model, var.name)) - fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) + return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) + fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) - if args.parallel: + if cfg.parallel: train_exe = fluid.ParallelExecutor( - use_cuda=bool(args.use_gpu), loss_name=loss.name) + use_cuda=bool(cfg.use_gpu), loss_name=loss.name) + + + if cfg.use_pyreader: + train_reader = reader.train(cfg, batch_size=1, shuffle=not cfg.debug) + py_reader = model.py_reader + py_reader.decorate_paddle_reader(train_reader) + else: + train_reader = reader.train(cfg, batch_size=cfg.batch_size, shuffle=not cfg.debug) + feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) - train_reader = reader.train(args) def save_model(postfix): - model_path = os.path.join(args.model_save_dir, postfix) + model_path = os.path.join(cfg.model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) fluid.io.save_persistables(exe, model_path) - fetch_list = [loss, cls_loss, reg_loss, loss_cls, loss_bbox] + fetch_list = [loss, rpn_cls_loss, rpn_reg_loss, loss_cls, loss_bbox] - def tensor(data, place, lod=None): - t = fluid.core.LoDTensor() - t.set(data, place) - if lod: - t.set_lod(lod) - return t + def train_step_pyreader(epoc_id): + py_reader.start() + try: + start_time = time.time() + prev_start_time = start_time + every_pass_loss = [] + batch_id = 0 + while True: + prev_start_time = start_time + start_time = time.time() + losses = train_exe.run(fetch_list=[v.name for v in fetch_list]) + every_pass_loss.append(np.mean(np.array(losses[0]))) + lr = np.array(fluid.global_scope().find_var('learning_rate').get_tensor()) + print("Epoc {:d}, batch {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format( + epoc_id, batch_id, lr[0], losses[0][0], start_time - prev_start_time)) + batch_id += 1 + #print('cls_loss ', losses[1][0], ' reg_loss ', losses[2][0], ' loss_cls ', losses[3][0], ' loss_bbox ', losses[4][0]) + except fluid.core.EOFException: + py_reader.reset() + return np.mean(every_pass_loss) - total_time = 0.0 - for epoc_id in range(num_passes): + def train_step(epoc_id): start_time = time.time() prev_start_time = start_time every_pass_loss = [] - iter = 0 - pass_duration = 0.0 for batch_id, data in enumerate(train_reader()): prev_start_time = start_time start_time = time.time() - - image, gt_box, gt_label, is_crowd, im_info, lod = data - image_t = tensor(image, place) - gt_box_t = tensor(gt_box, place, [lod]) - gt_label_t = tensor(gt_label, place, [lod]) - is_crowd_t = tensor(is_crowd, place, [lod]) - im_info_t = tensor(im_info, place) - - feeding = {} - feeding['image'] = image_t - feeding['gt_box'] = gt_box_t - feeding['gt_label'] = gt_label_t - feeding['is_crowd'] = is_crowd_t - feeding['im_info'] = im_info_t - - if args.parallel: - losses = train_exe.run(fetch_list=[v.name for v in fetch_list], - feed=feeding) - else: - losses = exe.run(fluid.default_main_program(), - feed=feeding, - fetch_list=fetch_list) + losses = train_exe.run(fetch_list=[v.name for v in fetch_list], + feed=feeder.feed(data)) loss_v = np.mean(np.array(losses[0])) every_pass_loss.append(loss_v) lr = np.array(fluid.global_scope().find_var('learning_rate').get_tensor()) - if batch_id % 1 == 0: - print("Epoc {:d}, batch {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format( - epoc_id, batch_id, lr[0], losses[0][0], start_time - prev_start_time)) - #print('cls_loss ', losses[1][0], ' reg_loss ', losses[2][0], ' loss_cls ', losses[3][0], ' loss_bbox ', losses[4][0]) + print("Epoc {:d}, batch {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format( + epoc_id, batch_id, lr[0], losses[0][0], start_time - prev_start_time)) + #print('cls_loss ', losses[1][0], ' reg_loss ', losses[2][0], ' loss_cls ', losses[3][0], ' loss_bbox ', losses[4][0]) + return np.mean(every_pass_loss) - if epoc_id % 10 == 0 or epoc_id == num_passes - 1: - save_model(str(epoc_id)) + + for epoc_id in range(num_passes): + if cfg.use_pyreader: + train_step_pyreader(epoc_id) + else: + train_step(epoc_id) + save_model(str(epoc_id)) if __name__ == '__main__': args = parser.parse_args() -- GitLab