diff --git a/configs/cascade_rcnn_r50_1x.yml b/configs/cascade_rcnn_r50_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..e6e1f7d6b6a8eab3d98020390e368505164c3a52 --- /dev/null +++ b/configs/cascade_rcnn_r50_1x.yml @@ -0,0 +1,125 @@ +architecture: CascadeRCNN +use_gpu: true +max_iters: 180000 +log_smooth_window: 50 +save_dir: output +snapshot_iter: 10000 +pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams +metric: COCO +weights: output/cascade_rcnn_r50_1x/model_final +num_classes: 81 +num_stages: 3 +open_debug: False + +# Model Achitecture +CascadeRCNN: + # model anchor info flow + anchor: AnchorRPN + proposal: Proposal + mask: Mask + # model feat info flow + backbone: ResNet + rpn_head: RPNHead + bbox_head: BBoxHead + mask_head: MaskHead + +ResNet: + norm_type: 'affine' + depth: 50 + freeze_at: 'res2' + +RPNHead: + rpn_feat: + name: RPNFeat + feat_in: 1024 + feat_out: 1024 + anchor_per_position: 15 + +BBoxHead: + bbox_feat: + name: BBoxFeat + feat_in: 1024 + feat_out: 512 + roi_extractor: + resolution: 14 + sampling_ratio: 0 + spatial_scale: 0.0625 + extractor_type: 'RoIAlign' + +MaskHead: + mask_feat: + name: MaskFeat + feat_in: 2048 + feat_out: 256 + feat_in: 256 + resolution: 14 + +AnchorRPN: + anchor_generator: + name: AnchorGeneratorRPN + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_target_generator: + name: AnchorTargetGeneratorRPN + batch_size_per_im: 256 + fg_fraction: 0.5 + negative_overlap: 0.3 + positive_overlap: 0.7 + straddle_thresh: 0.0 + +Proposal: + proposal_generator: + name: ProposalGenerator + min_size: 0.0 + nms_thresh: 0.7 + train_pre_nms_top_n: 2000 + train_post_nms_top_n: 2000 + infer_pre_nms_top_n: 2000 + infer_post_nms_top_n: 2000 + return_rois_num: True + proposal_target_generator: + name: ProposalTargetGenerator + batch_size_per_im: 512 + bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],[0.05, 0.05, 0.1, 0.1],[0.333333, 0.333333, 0.666666, 0.666666]] + bg_thresh_hi: [0.5, 0.6, 0.7] + bg_thresh_lo: [0.0, 0.0, 0.0] + fg_thresh: [0.5, 0.6, 0.7] + fg_fraction: 0.25 + bbox_post_process: # used in infer + name: BBoxPostProcess + # decode -> clip -> nms + decode_clip_nms: + name: DecodeClipNms + keep_top_k: 100 + score_threshold: 0.05 + nms_threshold: 0.5 + +Mask: + mask_target_generator: + name: MaskTargetGenerator + resolution: 14 + mask_post_process: + name: MaskPostProcess + +# Train +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [120000, 160000] + - !LinearWarmup + start_factor: 0.3333333333333333 + steps: 500 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +_READER_: 'mask_reader.yml' diff --git a/configs/faster_rcnn_r50_1x.yml b/configs/faster_rcnn_r50_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..d36b45abd24481990c61951028a85a3f232df628 --- /dev/null +++ b/configs/faster_rcnn_r50_1x.yml @@ -0,0 +1,107 @@ +architecture: FasterRCNN +use_gpu: true +max_iters: 180000 +log_smooth_window: 50 +save_dir: output +snapshot_iter: 10000 +pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams +metric: COCO +weights: output/faster_rcnn_r50_1x/model_final +num_classes: 81 +open_debug: False + +# Model Achitecture +FasterRCNN: + # model anchor info flow + anchor: AnchorRPN + proposal: Proposal + # model feat info flow + backbone: ResNet + rpn_head: RPNHead + bbox_head: BBoxHead + +ResNet: + depth: 50 + norm_type: 'affine' + freeze_at: 'res2' + +RPNHead: + rpn_feat: + name: RPNFeat + feat_in: 1024 + feat_out: 1024 + anchor_per_position: 15 + +BBoxHead: + bbox_feat: + name: BBoxFeat + roi_extractor: + name: RoIExtractor + resolution: 14 + sampling_ratio: 0 + spatial_scale: 0.0625 + extractor_type: 'RoIAlign' + feat_out: 512 + +AnchorRPN: + anchor_generator: + name: AnchorGeneratorRPN + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_target_generator: + name: AnchorTargetGeneratorRPN + batch_size_per_im: 256 + fg_fraction: 0.5 + negative_overlap: 0.3 + positive_overlap: 0.7 + straddle_thresh: 0.0 + +Proposal: + proposal_generator: + name: ProposalGenerator + min_size: 0.0 + nms_thresh: 0.7 + train_pre_nms_top_n: 12000 + train_post_nms_top_n: 2000 + infer_pre_nms_top_n: 12000 # used in infer + infer_post_nms_top_n: 2000 # used in infer + return_rois_num: True + proposal_target_generator: + name: ProposalTargetGenerator + batch_size_per_im: 512 + bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],] + bg_thresh_hi: [0.5,] + bg_thresh_lo: [0.0,] + fg_thresh: [0.5,] + fg_fraction: 0.25 + bbox_post_process: # used in infer + name: BBoxPostProcess + # decode -> clip -> nms + decode_clip_nms: + name: DecodeClipNms + keep_top_k: 100 + score_threshold: 0.05 + nms_threshold: 0.5 + +# Train +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [120000, 160000] + - !LinearWarmup + start_factor: 0.3333333333333333 + steps: 500 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +_READER_: 'faster_reader.yml' diff --git a/configs/faster_reader.yml b/configs/faster_reader.yml new file mode 100644 index 0000000000000000000000000000000000000000..e31610685534455283e9d9e6ea1edb1384ec2b78 --- /dev/null +++ b/configs/faster_reader.yml @@ -0,0 +1,95 @@ +TrainReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'] + dataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + sample_transforms: + - !DecodeImage + to_rgb: True + - !RandomFlipImage + prob: 0.5 + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485,0.456,0.406] + std: [0.229, 0.224,0.225] + - !ResizeImage + target_size: 800 + max_size: 1333 + interp: 1 + use_cv2: true + - !Permute + to_bgr: false + channel_first: true + batch_transforms: + - !PadBatch + pad_to_stride: 0 + use_padded_im_info: False + pad_gt: true + batch_size: 1 + shuffle: true + worker_num: 2 + use_process: false + +EvalReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'im_shape'] + dataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + sample_transforms: + - !DecodeImage + to_rgb: true + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485,0.456,0.406] + std: [0.229, 0.224,0.225] + - !ResizeImage + interp: 1 + max_size: 1333 + target_size: 800 + use_cv2: true + - !Permute + channel_first: true + to_bgr: false + batch_transforms: + - !PadBatch + pad_to_stride: 32 + use_padded_im_info: false + pad_gt: True + batch_size: 2 + shuffle: false + drop_empty: false + worker_num: 2 + +TestReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'im_shape'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + sample_transforms: + - !DecodeImage + to_rgb: true + with_mixup: false + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485,0.456,0.406] + std: [0.229, 0.224,0.225] + - !ResizeImage + interp: 1 + max_size: 1333 + target_size: 800 + use_cv2: true + - !Permute + channel_first: true + to_bgr: false + batch_size: 1 + shuffle: false diff --git a/configs/mask_rcnn_r50_1x.yml b/configs/mask_rcnn_r50_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..7f089140aeda88218669ab10f3b5dddc67be24d1 --- /dev/null +++ b/configs/mask_rcnn_r50_1x.yml @@ -0,0 +1,127 @@ +architecture: MaskRCNN +use_gpu: true +max_iters: 180000 +log_smooth_window: 50 +save_dir: output +snapshot_iter: 10000 +pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams +metric: COCO +weights: output/mask_rcnn_r50_1x/model_final +num_classes: 81 +open_debug: False + +# Model Achitecture +MaskRCNN: + # model anchor info flow + anchor: AnchorRPN + proposal: Proposal + mask: Mask + # model feat info flow + backbone: ResNet + rpn_head: RPNHead + bbox_head: BBoxHead + mask_head: MaskHead + +ResNet: + norm_type: 'affine' + depth: 50 + freeze_at: 'res2' + +RPNHead: + rpn_feat: + name: RPNFeat + feat_in: 1024 + feat_out: 1024 + anchor_per_position: 15 + +BBoxHead: + bbox_feat: + name: BBoxFeat + roi_extractor: + name: RoIExtractor + resolution: 14 + sampling_ratio: 0 + spatial_scale: 0.0625 + extractor_type: 'RoIAlign' + feat_in: 1024 + feat_out: 512 + +MaskHead: + mask_feat: + name: MaskFeat + feat_in: 2048 + feat_out: 256 + mask_stages: 1 + feat_in: 256 + resolution: 14 + mask_stages: 1 + +AnchorRPN: + anchor_generator: + name: AnchorGeneratorRPN + anchor_sizes: [32, 64, 128, 256, 512] + aspect_ratios: [0.5, 1.0, 2.0] + stride: [16.0, 16.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_target_generator: + name: AnchorTargetGeneratorRPN + batch_size_per_im: 256 + fg_fraction: 0.5 + negative_overlap: 0.3 + positive_overlap: 0.7 + straddle_thresh: 0.0 + +Proposal: + proposal_generator: + name: ProposalGenerator + min_size: 0.0 + nms_thresh: 0.7 + train_pre_nms_top_n: 12000 + train_post_nms_top_n: 2000 + infer_pre_nms_top_n: 12000 + infer_post_nms_top_n: 2000 + return_rois_num: True + proposal_target_generator: + name: ProposalTargetGenerator + batch_size_per_im: 512 + bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],] + bg_thresh_hi: [0.5,] + bg_thresh_lo: [0.0,] + fg_thresh: [0.5,] + fg_fraction: 0.25 + bbox_post_process: # used in infer + name: BBoxPostProcess + # decode -> clip -> nms + decode_clip_nms: + name: DecodeClipNms + keep_top_k: 100 + score_threshold: 0.05 + nms_threshold: 0.5 + +Mask: + mask_target_generator: + name: MaskTargetGenerator + resolution: 14 + mask_post_process: + name: MaskPostProcess + +# Train +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [120000, 160000] + - !LinearWarmup + start_factor: 0.3333333333333333 + steps: 500 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +_READER_: 'mask_reader.yml' diff --git a/configs/mask_reader.yml b/configs/mask_reader.yml new file mode 100644 index 0000000000000000000000000000000000000000..5280abac3d10006c2b0eced7ed539c815a29fb7c --- /dev/null +++ b/configs/mask_reader.yml @@ -0,0 +1,101 @@ +TrainReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_mask'] + dataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + sample_transforms: + - !DecodeImage + to_rgb: true + - !RandomFlipImage + prob: 0.5 + is_mask_flip: true + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485,0.456,0.406] + std: [0.229, 0.224,0.225] + - !ResizeImage + target_size: 512 + max_size: 512 + interp: 1 + use_cv2: true + - !Permute + to_bgr: false + channel_first: true + batch_transforms: + - !PadBatch + pad_to_stride: 32 + use_padded_im_info: false + pad_gt: True + batch_size: 1 + shuffle: true + worker_num: 2 + drop_last: false + use_process: false + +EvalReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'im_shape'] + # for voc + #fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult'] + dataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + sample_transforms: + - !DecodeImage + to_rgb: true + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485,0.456,0.406] + std: [0.229, 0.224,0.225] + - !ResizeImage + interp: 1 + max_size: 1333 + target_size: 800 + use_cv2: true + - !Permute + channel_first: true + to_bgr: false + batch_transforms: + - !PadBatch + pad_to_stride: 32 + use_padded_im_info: false + pad_gt: True + batch_size: 1 + shuffle: false + drop_last: false + drop_empty: false + worker_num: 2 + +TestReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'im_shape'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + sample_transforms: + - !DecodeImage + to_rgb: true + with_mixup: false + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485,0.456,0.406] + std: [0.229, 0.224,0.225] + - !ResizeImage + interp: 1 + max_size: 1333 + target_size: 800 + use_cv2: true + - !Permute + channel_first: true + to_bgr: false + batch_size: 1 + shuffle: false + drop_last: false diff --git a/configs/yolov3_darknet.yml b/configs/yolov3_darknet.yml new file mode 100644 index 0000000000000000000000000000000000000000..7a1215def9fabffef2c2a4de6552636abddf27bd --- /dev/null +++ b/configs/yolov3_darknet.yml @@ -0,0 +1,75 @@ +architecture: YOLOv3 +use_gpu: true +max_iters: 500000 +log_smooth_window: 20 +save_dir: output +snapshot_iter: 10000 +metric: COCO +pretrain_weights: https://paddlemodels.bj.bcebos.com/yolo/darknet53.pdparams +weights: output/yolov3_darknet/model_final +num_classes: 80 +use_fine_grained_loss: false +open_debug: False + +YOLOv3: + anchor: AnchorYOLO + backbone: DarkNet + yolo_head: YOLOv3Head + +DarkNet: + depth: 53 + +YOLOv3Head: + yolo_feat: + name: YOLOFeat + feat_in_list: [1024, 768, 384] + anchor_per_position: 3 + +AnchorYOLO: + anchor_generator: + name: AnchorGeneratorYOLO + anchors: [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + anchor_target_generator: + name: AnchorTargetGeneratorYOLO + ignore_thresh: 0.7 + downsample_ratio: 32 + label_smooth: true + anchor_post_process: + name: BBoxPostProcessYOLO + # decode -> clip + yolo_box: + name: YOLOBox + conf_thresh: 0.005 + downsample_ratio: 32 + clip_bbox: True + nms: + name: MultiClassNMS + keep_top_k: 100 + score_threshold: 0.01 + nms_threshold: 0.45 + nms_top_k: 1000 + normalized: false + background_label: -1 + +LearningRate: + base_lr: 0.001 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 400000 + - 450000 + - !LinearWarmup + start_factor: 0. + steps: 4000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +_READER_: 'yolov3_reader.yml' diff --git a/configs/yolov3_reader.yml b/configs/yolov3_reader.yml new file mode 100644 index 0000000000000000000000000000000000000000..2a8463f1e6c2cb598ea4a55c6289f5b04b290d4a --- /dev/null +++ b/configs/yolov3_reader.yml @@ -0,0 +1,111 @@ +TrainReader: + inputs_def: + fields: ['image', 'gt_bbox', 'gt_class', 'gt_score'] + num_max_boxes: 50 + dataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + with_mixup: True + - !MixupImage + alpha: 1.5 + beta: 1.5 + - !ColorDistort {} + - !RandomExpand + fill_value: [123.675, 116.28, 103.53] + - !RandomCrop {} + - !RandomFlipImage + is_normalized: false + - !NormalizeBox {} + - !PadBox + num_max_boxes: 50 + - !BboxXYXY2XYWH {} + batch_transforms: + - !RandomShape + sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608] + random_inter: True + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: True + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + # Gt2YoloTarget is only used when use_fine_grained_loss set as true, + # this operator will be deleted automatically if use_fine_grained_loss + # is set as false + - !Gt2YoloTarget + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + downsample_ratios: [32, 16, 8] + batch_size: 8 + shuffle: true + mixup_epoch: 250 + drop_last: true + worker_num: 8 + bufsize: 16 + use_process: true + + +EvalReader: + inputs_def: + fields: ['image', 'im_size', 'im_id'] + num_max_boxes: 50 + dataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + - !ResizeImage + target_size: 608 + interp: 2 + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: True + is_channel_first: false + - !PadBox + num_max_boxes: 50 + - !Permute + to_bgr: false + channel_first: True + batch_size: 8 + drop_empty: false + worker_num: 8 + bufsize: 16 + +TestReader: + inputs_def: + image_shape: [3, 608, 608] + fields: ['image', 'im_size', 'im_id'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + - !ResizeImage + target_size: 608 + interp: 2 + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: True + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 1 diff --git a/ppdet/core/workspace.py b/ppdet/core/workspace.py index cbcdcefc450cd679466208b2fd9fc32701b38731..b151b15aae706e6033e0618f8b03dc1690948d23 100644 --- a/ppdet/core/workspace.py +++ b/ppdet/core/workspace.py @@ -199,13 +199,13 @@ def create(cls_or_name, **kwargs): config.update(kwargs) config.validate() cls = getattr(config.pymodule, name) - kwargs = {} kwargs.update(global_config[name]) # parse `shared` annoation of registered modules if getattr(config, 'shared', None): for k in config.shared: + target_key = config[k] shared_conf = config.schema[k].default assert isinstance(shared_conf, SharedConfig) @@ -225,9 +225,22 @@ def create(cls_or_name, **kwargs): # optional dependency if target_key is None: continue - # also accept dictionaries and serialized objects + if isinstance(target_key, dict) or hasattr(target_key, '__dict__'): - continue + if 'name' not in target_key.keys(): + continue + inject_name = str(target_key['name']) + if inject_name not in global_config: + raise ValueError( + "Missing injection name {} and check it's name in cfg file". + format(k)) + target = global_config[inject_name] + for i, v in target_key.items(): + if i == 'name': + continue + target[i] = v + if isinstance(target, SchemaDict): + kwargs[k] = create(inject_name) elif isinstance(target_key, str): if target_key not in global_config: raise ValueError("Missing injection config:", target_key) @@ -235,10 +248,10 @@ def create(cls_or_name, **kwargs): if isinstance(target, SchemaDict): kwargs[k] = create(target_key) elif hasattr(target, '__dict__'): # serialized object - kwargs[k] = target + kwargs[k] = new_dict else: raise ValueError("Unsupported injection type:", target_key) # prevent modification of global config values of reference types # (e.g., list, dict) from within the created module instances - kwargs = copy.deepcopy(kwargs) + #kwargs = copy.deepcopy(kwargs) return cls(**kwargs) diff --git a/ppdet/data/transform/batch_operators.py b/ppdet/data/transform/batch_operators.py index 8da0b1e3573f0c0f571211fb66997334bae92cdf..068614ab5ea32bc27ec7bb5900208332fbbcd608 100644 --- a/ppdet/data/transform/batch_operators.py +++ b/ppdet/data/transform/batch_operators.py @@ -47,10 +47,11 @@ class PadBatch(BaseOperator): height and width is divisible by `pad_to_stride`. """ - def __init__(self, pad_to_stride=0, use_padded_im_info=True): + def __init__(self, pad_to_stride=0, use_padded_im_info=True, pad_gt=False): super(PadBatch, self).__init__() self.pad_to_stride = pad_to_stride self.use_padded_im_info = use_padded_im_info + self.pad_gt = pad_gt def __call__(self, samples, context=None): """ @@ -60,9 +61,9 @@ class PadBatch(BaseOperator): coarsest_stride = self.pad_to_stride if coarsest_stride == 0: return samples + max_shape = np.array([data['image'].shape for data in samples]).max( axis=0) - if coarsest_stride > 0: max_shape[1] = int( np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride) @@ -79,6 +80,52 @@ class PadBatch(BaseOperator): data['image'] = padding_im if self.use_padded_im_info: data['im_info'][:2] = max_shape[1:3] + + if self.pad_gt: + gt_num = [] + if data['gt_poly'] is not None and len(data['gt_poly']) > 0: + pad_mask = True + else: + pad_mask = False + + if pad_mask: + poly_num = [] + poly_part_num = [] + point_num = [] + for data in samples: + gt_num.append(data['gt_bbox'].shape[0]) + if pad_mask: + poly_num.append(len(data['gt_poly'])) + for poly in data['gt_poly']: + poly_part_num.append(int(len(poly))) + for p_p in poly: + point_num.append(int(len(p_p) / 2)) + gt_num_max = max(gt_num) + gt_box_data = np.zeros([gt_num_max, 4]) + gt_class_data = np.zeros([gt_num_max]) + is_crowd_data = np.ones([gt_num_max]) + + if pad_mask: + poly_num_max = max(poly_num) + poly_part_num_max = max(poly_part_num) + point_num_max = max(point_num) + gt_masks_data = -np.ones( + [poly_num_max, poly_part_num_max, point_num_max, 2]) + + for i, data in enumerate(samples): + gt_num = data['gt_bbox'].shape[0] + gt_box_data[0:gt_num, :] = data['gt_bbox'] + gt_class_data[0:gt_num] = np.squeeze(data['gt_class']) + is_crowd_data[0:gt_num] = np.squeeze(data['is_crowd']) + if pad_mask: + for j, poly in enumerate(data['gt_poly']): + for k, p_p in enumerate(poly): + pp_np = np.array(p_p).reshape(-1, 2) + gt_masks_data[j, k, :pp_np.shape[0], :] = pp_np + data['gt_poly'] = gt_masks_data + data['gt_bbox'] = gt_box_data + data['gt_class'] = gt_class_data + data['is_crowd_data'] = is_crowd_data return samples diff --git a/ppdet/optimizer.py b/ppdet/optimizer.py index b3506e55af0f175f35d4ad77b74b0530e4722c25..2016cda13384385aeec88b667d7543090d66f24a 100644 --- a/ppdet/optimizer.py +++ b/ppdet/optimizer.py @@ -43,8 +43,7 @@ class PiecewiseDecay(object): milestones (list): steps at which to decay learning rate """ - def __init__(self, gamma=[0.1, 0.1], milestones=[60000, 80000], - values=None): + def __init__(self, gamma=[0.1, 0.01], milestones=[60000, 80000]): super(PiecewiseDecay, self).__init__() if type(gamma) is not list: self.gamma = [] @@ -53,126 +52,16 @@ class PiecewiseDecay(object): else: self.gamma = gamma self.milestones = milestones - self.values = values - def __call__(self, base_lr=None, learning_rate=None): - if self.values is not None: - return fluid.layers.piecewise_decay(self.milestones, self.values) - assert base_lr is not None, "either base LR or values should be provided" - values = [base_lr] - for g in self.gamma: - new_lr = base_lr * g - values.append(new_lr) - return fluid.layers.piecewise_decay(self.milestones, values) + def __call__(self, base_lr=None, boundary=None, value=None): + if boundary is not None: + boundary.extend(self.milestones) + if value is not None: + for i in self.gamma: + value.append(base_lr * i) -@serializable -class PolynomialDecay(object): - """ - Applies polynomial decay to the initial learning rate. - Args: - max_iter (int): The learning rate decay steps. - end_lr (float): End learning rate. - power (float): Polynomial attenuation coefficient - """ - - def __init__(self, max_iter=180000, end_lr=0.0001, power=1.0): - super(PolynomialDecay).__init__() - self.max_iter = max_iter - self.end_lr = end_lr - self.power = power - - def __call__(self, base_lr=None, learning_rate=None): - assert base_lr is not None, "either base LR or values should be provided" - lr = fluid.layers.polynomial_decay(base_lr, self.max_iter, self.end_lr, - self.power) - return lr - - -@serializable -class ExponentialDecay(object): - """ - Applies exponential decay to the learning rate. - Args: - max_iter (int): The learning rate decay steps. - decay_rate (float): The learning rate decay rate. - """ - - def __init__(self, max_iter, decay_rate): - super(ExponentialDecay).__init__() - self.max_iter = max_iter - self.decay_rate = decay_rate - - def __call__(self, base_lr=None, learning_rate=None): - assert base_lr is not None, "either base LR or values should be provided" - lr = fluid.layers.exponential_decay(base_lr, self.max_iter, - self.decay_rate) - return lr - - -@serializable -class CosineDecay(object): - """ - Cosine learning rate decay - - Args: - max_iters (float): max iterations for the training process. - if you commbine cosine decay with warmup, it is recommended that - the max_iter is much larger than the warmup iter - """ - - def __init__(self, max_iters=180000): - self.max_iters = max_iters - - def __call__(self, base_lr=None, learning_rate=None): - assert base_lr is not None, "either base LR or values should be provided" - lr = fluid.layers.cosine_decay(base_lr, 1, self.max_iters) - return lr - - -@serializable -class CosineDecayWithSkip(object): - """ - Cosine decay, with explicit support for warm up - - Args: - total_steps (int): total steps over which to apply the decay - skip_steps (int): skip some steps at the beginning, e.g., warm up - """ - - def __init__(self, total_steps, skip_steps=None): - super(CosineDecayWithSkip, self).__init__() - assert (not skip_steps or skip_steps > 0), \ - "skip steps must be greater than zero" - assert total_steps > 0, "total step must be greater than zero" - assert (not skip_steps or skip_steps < total_steps), \ - "skip steps must be smaller than total steps" - self.total_steps = total_steps - self.skip_steps = skip_steps - - def __call__(self, base_lr=None, learning_rate=None): - steps = _decay_step_counter() - total = self.total_steps - if self.skip_steps is not None: - total -= self.skip_steps - - lr = fluid.layers.tensor.create_global_var( - shape=[1], - value=base_lr, - dtype='float32', - persistable=True, - name="learning_rate") - - def decay(): - cos_lr = base_lr * .5 * (cos(steps * (math.pi / total)) + 1) - fluid.layers.tensor.assign(input=cos_lr, output=lr) - - if self.skip_steps is None: - decay() - else: - skipped = steps >= self.skip_steps - fluid.layers.cond(skipped, decay) - return lr + return fluid.dygraph.PiecewiseDecay(boundary, value, begin=0, step=1) @serializable @@ -190,14 +79,17 @@ class LinearWarmup(object): self.steps = steps self.start_factor = start_factor - def __call__(self, base_lr, learning_rate): - start_lr = base_lr * self.start_factor - - return fluid.layers.linear_lr_warmup( - learning_rate=learning_rate, - warmup_steps=self.steps, - start_lr=start_lr, - end_lr=base_lr) + def __call__(self, base_lr): + boundary = [] + value = [] + for i in range(self.steps): + alpha = i / self.steps + factor = self.start_factor * (1 - alpha) + alpha + lr = base_lr * factor + value.append(lr) + if i > 0: + boundary.append(i) + return boundary, value @register @@ -219,10 +111,12 @@ class LearningRate(object): self.schedulers = schedulers def __call__(self): - lr = None - for sched in self.schedulers: - lr = sched(self.base_lr, lr) - return lr + # TODO: split warmup & decay + # warmup + boundary, value = self.schedulers[1](self.base_lr) + # decay + decay_lr = self.schedulers[0](self.base_lr, boundary, value) + return decay_lr @register @@ -246,21 +140,24 @@ class OptimizerBuilder(): self.regularizer = regularizer self.optimizer = optimizer - def __call__(self, learning_rate): + def __call__(self, learning_rate, params=None): if self.clip_grad_by_norm is not None: fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByGlobalNorm( clip_norm=self.clip_grad_by_norm)) + if self.regularizer: reg_type = self.regularizer['type'] + 'Decay' reg_factor = self.regularizer['factor'] regularization = getattr(regularizer, reg_type)(reg_factor) else: regularization = None + optim_args = self.optimizer.copy() optim_type = optim_args['type'] del optim_args['type'] op = getattr(optimizer, optim_type) return op(learning_rate=learning_rate, + parameter_list=params, regularization=regularization, **optim_args) diff --git a/ppdet/utils/checkpoint.py b/ppdet/utils/checkpoint.py index 42fe8194d33450c9f8983ff6e541f14baf7ec392..3ee6c328a1024b109d6cc77b46ac28cd52082538 100644 --- a/ppdet/utils/checkpoint.py +++ b/ppdet/utils/checkpoint.py @@ -1,303 +1,87 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals -import errno import os -import shutil -import tempfile import time -import numpy as np import re +import numpy as np import paddle.fluid as fluid - from .download import get_weights_path -import logging -logger = logging.getLogger(__name__) - -__all__ = [ - 'load_checkpoint', - 'load_and_fusebn', - 'load_params', - 'save', -] - - -def is_url(path): - """ - Whether path is URL. - Args: - path (string): URL string or not. - """ - return path.startswith('http://') or path.startswith('https://') - -def _get_weight_path(path): - env = os.environ - if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env: - trainer_id = int(env['PADDLE_TRAINER_ID']) - num_trainers = int(env['PADDLE_TRAINERS_NUM']) - if num_trainers <= 1: - path = get_weights_path(path) +def get_ckpt_path(path): + if path.startswith('http://') or path.startswith('https://'): + env = os.environ + if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env: + trainer_id = int(env['PADDLE_TRAINER_ID']) + num_trainers = int(env['PADDLE_TRAINERS_NUM']) + if num_trainers <= 1: + path = get_weights_path(path) + else: + from ppdet.utils.download import map_path, WEIGHTS_HOME + weight_path = map_path(path, WEIGHTS_HOME) + lock_path = weight_path + '.lock' + if not os.path.exists(weight_path): + try: + os.makedirs(os.path.dirname(weight_path)) + except OSError as e: + if e.errno != errno.EEXIST: + raise + with open(lock_path, 'w'): # touch + os.utime(lock_path, None) + if trainer_id == 0: + get_weights_path(path) + os.remove(lock_path) + else: + while os.path.exists(lock_path): + time.sleep(1) + path = weight_path else: - from ppdet.utils.download import map_path, WEIGHTS_HOME - weight_path = map_path(path, WEIGHTS_HOME) - lock_path = weight_path + '.lock' - if not os.path.exists(weight_path): - try: - os.makedirs(os.path.dirname(weight_path)) - except OSError as e: - if e.errno != errno.EEXIST: - raise - with open(lock_path, 'w'): # touch - os.utime(lock_path, None) - if trainer_id == 0: - get_weights_path(path) - os.remove(lock_path) - else: - while os.path.exists(lock_path): - time.sleep(1) - path = weight_path - else: - path = get_weights_path(path) - return path - - -def _load_state(path): - if os.path.exists(path + '.pdopt'): - # XXX another hack to ignore the optimizer state - tmp = tempfile.mkdtemp() - dst = os.path.join(tmp, os.path.basename(os.path.normpath(path))) - shutil.copy(path + '.pdparams', dst + '.pdparams') - state = fluid.io.load_program_state(dst) - shutil.rmtree(tmp) - else: - state = fluid.io.load_program_state(path) - return state - + path = get_weights_path(path) -def _strip_postfix(path): - path, ext = os.path.splitext(path) - assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \ - "Unknown postfix {} from weights".format(ext) return path -def load_params(exe, prog, path, ignore_params=[]): - """ - Load model from the given path. - Args: - exe (fluid.Executor): The fluid.Executor object. - prog (fluid.Program): load weight to which Program object. - path (string): URL string or loca model path. - ignore_params (list): ignore variable to load when finetuning. - It can be specified by finetune_exclude_pretrained_params - and the usage can refer to docs/advanced_tutorials/TRANSFER_LEARNING.md - """ - - if is_url(path): - path = _get_weight_path(path) - - path = _strip_postfix(path) - if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')): - raise ValueError("Model pretrain path {} does not " - "exists.".format(path)) - - logger.debug('Loading parameters from {}...'.format(path)) - - ignore_set = set() - state = _load_state(path) - - # ignore the parameter which mismatch the shape - # between the model and pretrain weight. - all_var_shape = {} - for block in prog.blocks: - for param in block.all_parameters(): - all_var_shape[param.name] = param.shape - ignore_set.update([ - name for name, shape in all_var_shape.items() - if name in state and shape != state[name].shape - ]) - - if ignore_params: - all_var_names = [var.name for var in prog.list_vars()] - ignore_list = filter( - lambda var: any([re.match(name, var) for name in ignore_params]), - all_var_names) - ignore_set.update(list(ignore_list)) - - if len(ignore_set) > 0: - for k in ignore_set: - if k in state: - logger.warning('variable {} not used'.format(k)) - del state[k] - fluid.io.set_program_state(prog, state) - - -def load_checkpoint(exe, prog, path): - """ - Load model from the given path. - Args: - exe (fluid.Executor): The fluid.Executor object. - prog (fluid.Program): load weight to which Program object. - path (string): URL string or loca model path. - """ - if is_url(path): - path = _get_weight_path(path) - - path = _strip_postfix(path) - if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')): - raise ValueError("Model pretrain path {} does not " - "exists.".format(path)) - fluid.load(prog, path, executor=exe) - - -def global_step(scope=None): - """ - Load global step in scope. - Args: - scope (fluid.Scope): load global step from which scope. If None, - from default global_scope(). - - Returns: - global step: int. - """ - if scope is None: - scope = fluid.global_scope() - v = scope.find_var('@LR_DECAY_COUNTER@') - step = np.array(v.get_tensor())[0] if v else 0 - return step - - -def save(exe, prog, path): - """ - Load model from the given path. - Args: - exe (fluid.Executor): The fluid.Executor object. - prog (fluid.Program): save weight from which Program object. - path (string): the path to save model. - """ - if os.path.isdir(path): - shutil.rmtree(path) - logger.info('Save model to {}.'.format(path)) - fluid.save(prog, path) - - -def load_and_fusebn(exe, prog, path): - """ - Fuse params of batch norm to scale and bias. - - Args: - exe (fluid.Executor): The fluid.Executor object. - prog (fluid.Program): save weight from which Program object. - path (string): the path to save model. - """ - logger.debug('Load model and fuse batch norm if have from {}...'.format( - path)) - - if is_url(path): - path = _get_weight_path(path) - - if not os.path.exists(path): - raise ValueError("Model path {} does not exists.".format(path)) - - # Since the program uses affine-channel, there is no running mean and var - # in the program, here append running mean and var. - # NOTE, the params of batch norm should be like: - # x_scale - # x_offset - # x_mean - # x_variance - # x is any prefix - mean_variances = set() - bn_vars = [] - state = _load_state(path) - - def check_mean_and_bias(prefix): - m = prefix + 'mean' - v = prefix + 'variance' - return v in state and m in state - - has_mean_bias = True - - with fluid.program_guard(prog, fluid.Program()): - for block in prog.blocks: - ops = list(block.ops) - if not has_mean_bias: - break - for op in ops: - if op.type == 'affine_channel': - # remove 'scale' as prefix - scale_name = op.input('Scale')[0] # _scale - bias_name = op.input('Bias')[0] # _offset - prefix = scale_name[:-5] - mean_name = prefix + 'mean' - variance_name = prefix + 'variance' - if not check_mean_and_bias(prefix): - has_mean_bias = False - break - - bias = block.var(bias_name) - - mean_vb = block.create_var( - name=mean_name, - type=bias.type, - shape=bias.shape, - dtype=bias.dtype) - variance_vb = block.create_var( - name=variance_name, - type=bias.type, - shape=bias.shape, - dtype=bias.dtype) - - mean_variances.add(mean_vb) - mean_variances.add(variance_vb) - - bn_vars.append( - [scale_name, bias_name, mean_name, variance_name]) - - if not has_mean_bias: - fluid.io.set_program_state(prog, state) - logger.warning( - "There is no paramters of batch norm in model {}. " - "Skip to fuse batch norm. And load paramters done.".format(path)) - return - - fluid.load(prog, path, exe) - eps = 1e-5 - for names in bn_vars: - scale_name, bias_name, mean_name, var_name = names - - scale = fluid.global_scope().find_var(scale_name).get_tensor() - bias = fluid.global_scope().find_var(bias_name).get_tensor() - mean = fluid.global_scope().find_var(mean_name).get_tensor() - var = fluid.global_scope().find_var(var_name).get_tensor() - - scale_arr = np.array(scale) - bias_arr = np.array(bias) - mean_arr = np.array(mean) - var_arr = np.array(var) - - bn_std = np.sqrt(np.add(var_arr, eps)) - new_scale = np.float32(np.divide(scale_arr, bn_std)) - new_bias = bias_arr - mean_arr * new_scale - - # fuse to scale and bias in affine_channel - scale.set(new_scale, exe.place) - bias.set(new_bias, exe.place) +def load_dygraph_ckpt(model, + optimizer, + pretrain_ckpt=None, + ckpt=None, + ckpt_type='pretrain', + exclude_params=[], + open_debug=False): + + if ckpt_type == 'pretrain': + ckpt = pretrain_ckpt + ckpt = get_ckpt_path(ckpt) + if ckpt is not None and os.path.exists(ckpt): + param_state_dict, optim_state_dict = fluid.load_dygraph(ckpt) + if open_debug: + print("Loading Weights: ", param_state_dict.keys()) + + if len(exclude_params) != 0: + for k in exclude_params: + param_state_dict.pop(k, None) + + if ckpt_type == 'pretrain': + model.backbone.set_dict(param_state_dict) + elif ckpt_type == 'finetune': + model.set_dict(param_state_dict, use_structured_name=True) + else: + model.set_dict(param_state_dict) + + if ckpt_type == 'resume': + if optim_state_dict is None: + print("Can't Resume Last Training's Optimizer State!!!") + else: + optimizer.set_dict(optim_state_dict) + return model + + +def save_dygraph_ckpt(model, optimizer, save_dir): + if not os.path.exists(save_dir): + os.makedirs(save_dir) + fluid.dygraph.save_dygraph(model.state_dict(), save_dir) + fluid.dygraph.save_dygraph(optimizer.state_dict(), save_dir) + print("Save checkpoint:", save_dir) diff --git a/ppdet/utils/data_structure.py b/ppdet/utils/data_structure.py index 05d845c64a2978231d1b07e5fcbbbe85f7ded567..a600af32b5d810885c817b8a11f1c6aad7c2f262 100644 --- a/ppdet/utils/data_structure.py +++ b/ppdet/utils/data_structure.py @@ -35,7 +35,7 @@ class BufferDict(dict): def debug(self, dshape=True, dvalue=True, dtype=False): if self['open_debug']: - if self['debug_names'] is None: + if 'debug_names' not in self.keys(): ditems = self.keys() else: ditems = self['debug_names'] diff --git a/ppdet/utils/eval_utils.py b/ppdet/utils/eval_utils.py index 8ba53838dfa97db7d096b94bcc4652c776544235..b06accad194482477ba3085ae65721492d658694 100644 --- a/ppdet/utils/eval_utils.py +++ b/ppdet/utils/eval_utils.py @@ -1,242 +1,7 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - from __future__ import absolute_import from __future__ import division from __future__ import print_function -import logging -import numpy as np -import os -import time - -import paddle.fluid as fluid - -from .voc_eval import bbox_eval as voc_bbox_eval -from .post_process import mstest_box_post_process, mstest_mask_post_process, box_flip - -__all__ = ['parse_fetches', 'eval_run', 'eval_results', 'json_eval_results'] - -logger = logging.getLogger(__name__) - - -def parse_fetches(fetches, prog=None, extra_keys=None): - """ - Parse fetch variable infos from model fetches, - values for fetch_list and keys for stat - """ - keys, values = [], [] - cls = [] - for k, v in fetches.items(): - if hasattr(v, 'name'): - keys.append(k) - #v.persistable = True - values.append(v.name) - else: - cls.append(v) - - if prog is not None and extra_keys is not None: - for k in extra_keys: - try: - v = fluid.framework._get_var(k, prog) - keys.append(k) - values.append(v.name) - except Exception: - pass - - return keys, values, cls - - -def length2lod(length_lod): - offset_lod = [0] - for i in length_lod: - offset_lod.append(offset_lod[-1] + i) - return [offset_lod] - - -def get_sub_feed(input, place): - new_dict = {} - res_feed = {} - key_name = ['bbox', 'im_info', 'im_id', 'im_shape', 'bbox_flip'] - for k in key_name: - if k in input.keys(): - new_dict[k] = input[k] - for k in input.keys(): - if 'image' in k: - new_dict[k] = input[k] - for k, v in new_dict.items(): - data_t = fluid.LoDTensor() - data_t.set(v[0], place) - if 'bbox' in k: - lod = length2lod(v[1][0]) - data_t.set_lod(lod) - res_feed[k] = data_t - return res_feed - - -def clean_res(result, keep_name_list): - clean_result = {} - for k in result.keys(): - if k in keep_name_list: - clean_result[k] = result[k] - result.clear() - return clean_result - - -def eval_run(exe, - compile_program, - loader, - keys, - values, - cls, - cfg=None, - sub_prog=None, - sub_keys=None, - sub_values=None, - resolution=None): - """ - Run evaluation program, return program outputs. - """ - iter_id = 0 - results = [] - if len(cls) != 0: - values = [] - for i in range(len(cls)): - _, accum_map = cls[i].get_map_var() - cls[i].reset(exe) - values.append(accum_map) - - images_num = 0 - start_time = time.time() - has_bbox = 'bbox' in keys - - try: - loader.start() - while True: - outs = exe.run(compile_program, - fetch_list=values, - return_numpy=False) - res = { - k: (np.array(v), v.recursive_sequence_lengths()) - for k, v in zip(keys, outs) - } - multi_scale_test = getattr(cfg, 'MultiScaleTEST', None) - mask_multi_scale_test = multi_scale_test and 'Mask' in cfg.architecture - - if multi_scale_test: - post_res = mstest_box_post_process(res, multi_scale_test, - cfg.num_classes) - res.update(post_res) - if mask_multi_scale_test: - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - sub_feed = get_sub_feed(res, place) - sub_prog_outs = exe.run(sub_prog, - feed=sub_feed, - fetch_list=sub_values, - return_numpy=False) - sub_prog_res = { - k: (np.array(v), v.recursive_sequence_lengths()) - for k, v in zip(sub_keys, sub_prog_outs) - } - post_res = mstest_mask_post_process(sub_prog_res, cfg) - res.update(post_res) - if multi_scale_test: - res = clean_res( - res, ['im_info', 'bbox', 'im_id', 'im_shape', 'mask']) - if 'mask' in res: - from ppdet.utils.post_process import mask_encode - res['mask'] = mask_encode(res, resolution) - post_config = getattr(cfg, 'PostProcess', None) - if 'Corner' in cfg.architecture and post_config is not None: - from ppdet.utils.post_process import corner_post_process - corner_post_process(res, post_config, cfg.num_classes) - results.append(res) - if iter_id % 100 == 0: - logger.info('Test iter {}'.format(iter_id)) - iter_id += 1 - if len(res['bbox'][1]) == 0: - has_bbox = False - images_num += len(res['bbox'][1][0]) if has_bbox else 1 - except (StopIteration, fluid.core.EOFException): - loader.reset() - logger.info('Test finish iter {}'.format(iter_id)) - - end_time = time.time() - fps = images_num / (end_time - start_time) - if has_bbox: - logger.info('Total number of images: {}, inference time: {} fps.'. - format(images_num, fps)) - else: - logger.info('Total iteration: {}, inference time: {} batch/s.'.format( - images_num, fps)) - - return results - - -def eval_results(results, - metric, - num_classes, - resolution=None, - is_bbox_normalized=False, - output_directory=None, - map_type='11point', - dataset=None, - save_only=False): - """Evaluation for evaluation program results""" - box_ap_stats = [] - if metric == 'COCO': - from ppdet.utils.coco_eval import proposal_eval, bbox_eval, mask_eval - anno_file = dataset.get_anno() - with_background = dataset.with_background - if 'proposal' in results[0]: - output = 'proposal.json' - if output_directory: - output = os.path.join(output_directory, 'proposal.json') - proposal_eval(results, anno_file, output) - if 'bbox' in results[0]: - output = 'bbox.json' - if output_directory: - output = os.path.join(output_directory, 'bbox.json') - - box_ap_stats = bbox_eval( - results, - anno_file, - output, - with_background, - is_bbox_normalized=is_bbox_normalized, - save_only=save_only) - - if 'mask' in results[0]: - output = 'mask.json' - if output_directory: - output = os.path.join(output_directory, 'mask.json') - mask_eval( - results, anno_file, output, resolution, save_only=save_only) - else: - if 'accum_map' in results[-1]: - res = np.mean(results[-1]['accum_map'][0]) - logger.info('mAP: {:.2f}'.format(res * 100.)) - box_ap_stats.append(res * 100.) - elif 'bbox' in results[0]: - box_ap = voc_bbox_eval( - results, - num_classes, - is_bbox_normalized=is_bbox_normalized, - map_type=map_type) - box_ap_stats.append(box_ap) - return box_ap_stats - def json_eval_results(metric, json_directory=None, dataset=None): """ @@ -259,3 +24,51 @@ def json_eval_results(metric, json_directory=None, dataset=None): cocoapi_eval(v_json, coco_eval_style[i], anno_file=anno_file) else: logger.info("{} not exists!".format(v_json)) + + +def coco_eval_results(outs_res=None, + include_mask=False, + batch_size=1, + dataset=None): + print("start evaluate bbox using coco api") + import io + import six + import json + from pycocotools.coco import COCO + from pycocotools.cocoeval import COCOeval + from ppdet.py_op.post_process import get_det_res, get_seg_res + anno_file = os.path.join(dataset.dataset_dir, dataset.anno_path) + cocoGt = COCO(anno_file) + catid = {i + 1: v for i, v in enumerate(cocoGt.getCatIds())} + + if outs_res is not None and len(outs_res) > 0: + det_res = [] + for outs in outs_res: + det_res += get_det_res(outs['bbox_nums'], outs['bbox'], + outs['im_id'], outs['im_shape'], catid, + batch_size) + + with io.open("bbox_eval.json", 'w') as outfile: + encode_func = unicode if six.PY2 else str + outfile.write(encode_func(json.dumps(det_res))) + + cocoDt = cocoGt.loadRes("bbox_eval.json") + cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + if outs_res is not None and len(outs_res) > 0 and include_mask: + seg_res = [] + for outs in outs_res: + seg_res += get_seg_res(outs['bbox_nums'], outs['mask'], + outs['im_id'], catid, batch_size) + + with io.open("mask_eval.json", 'w') as outfile: + encode_func = unicode if six.PY2 else str + outfile.write(encode_func(json.dumps(seg_res))) + + cocoSg = cocoGt.loadRes("mask_eval.json") + cocoEval = COCOeval(cocoGt, cocoSg, 'bbox') + cocoEval.evaluate() + cocoEval.accumulate() diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tools/eval.py b/tools/eval.py new file mode 100755 index 0000000000000000000000000000000000000000..bec9b8fc0fc2043689e05886ca940cf33ad3f11f --- /dev/null +++ b/tools/eval.py @@ -0,0 +1,93 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os +import time +# ignore numba warning +import warnings +warnings.filterwarnings('ignore') +import random +import numpy as np +import paddle.fluid as fluid +from ppdet.core.workspace import load_config, merge_config, create +from ppdet.utils.check import check_gpu, check_version, check_config +from ppdet.utils.cli import ArgsParser +from ppdet.utils.eval_utils import coco_eval_results +from ppdet.data.reader import create_reader + + +def parse_args(): + parser = ArgsParser() + parser.add_argument( + "--output_eval", + default=None, + type=str, + help="Evaluation directory, default is current directory.") + + parser.add_argument( + '--json_eval', action='store_true', default=False, help='') + + parser.add_argument( + '--use_gpu', action='store_true', default=False, help='') + + args = parser.parse_args() + return args + + +def run(FLAGS, cfg): + + # Model + main_arch = cfg.architecture + model = create(cfg.architecture, mode='infer', open_debug=cfg.open_debug) + + # Init Model + if os.path.isfile(cfg.weights): + param_state_dict, opti_state_dict = fluid.load_dygraph(cfg.weights) + model.set_dict(param_state_dict) + + # Data Reader + if FLAGS.use_gpu: + devices_num = 1 + else: + devices_num = int(os.environ.get('CPU_NUM', 1)) + eval_reader = create_reader(cfg.EvalReader, devices_num=devices_num) + + # Run Eval + outs_res = [] + for iter_id, data in enumerate(eval_reader()): + start_time = time.time() + + # forward + model.eval() + outs = model(data, cfg['EvalReader']['inputs_def']['fields']) + outs_res.append(outs) + + # log + cost_time = time.time() - start_time + print("Eval iter: {}, time: {}".format(iter_id, cost_time)) + + # Metric + coco_eval_results( + outs_res, + include_mask=True if 'MaskHed' in cfg else False, + dataset=cfg['EvalReader']['dataset']) + + +def main(): + FLAGS = parse_args() + + cfg = load_config(FLAGS.config) + merge_config(FLAGS.opt) + check_config(cfg) + check_gpu(cfg.use_gpu) + check_version() + + place = fluid.CUDAPlace(fluid.dygraph.parallel.Env() + .dev_id) if cfg.use_gpu else fluid.CPUPlace() + + with fluid.dygraph.guard(place): + run(FLAGS, cfg) + + +if __name__ == '__main__': + main() diff --git a/tools/train.py b/tools/train.py new file mode 100755 index 0000000000000000000000000000000000000000..3c1865ede657b92a19ffa6968c2ce6dbfb5e9b98 --- /dev/null +++ b/tools/train.py @@ -0,0 +1,198 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os +import time +# ignore numba warning +import warnings +warnings.filterwarnings('ignore') +import random +import numpy as np +import paddle.fluid as fluid +from ppdet.core.workspace import load_config, merge_config, create +from ppdet.data.reader import create_reader +from ppdet.utils.check import check_gpu, check_version, check_config +from ppdet.utils.cli import ArgsParser +from ppdet.utils.checkpoint import load_dygraph_ckpt, save_dygraph_ckpt + + +def parse_args(): + parser = ArgsParser() + parser.add_argument( + "-ckpt_type", + default='pretrain', + type=str, + help="Loading Checkpoints only support 'pretrain', 'finetune', 'resume'." + ) + + parser.add_argument( + "--fp16", + action='store_true', + default=False, + help="Enable mixed precision training.") + parser.add_argument( + "--loss_scale", + default=8., + type=float, + help="Mixed precision training loss scale.") + parser.add_argument( + "--eval", + action='store_true', + default=False, + help="Whether to perform evaluation in train") + parser.add_argument( + "--output_eval", + default=None, + type=str, + help="Evaluation directory, default is current directory.") + parser.add_argument( + "--use_tb", + type=bool, + default=False, + help="whether to record the data to Tensorboard.") + parser.add_argument( + '--tb_log_dir', + type=str, + default="tb_log_dir/scalar", + help='Tensorboard logging directory for scalar.') + parser.add_argument( + "--enable_ce", + type=bool, + default=False, + help="If set True, enable continuous evaluation job." + "This flag is only used for internal test.") + parser.add_argument( + "--use_gpu", action='store_true', default=False, help="data parallel") + parser.add_argument( + "--use_parallel", + action='store_true', + default=False, + help="data parallel") + + parser.add_argument( + '--is_profiler', + type=int, + default=0, + help='The switch of profiler tools. (used for benchmark)') + + args = parser.parse_args() + return args + + +def run(FLAGS, cfg): + env = os.environ + FLAGS.dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env + if FLAGS.dist: + trainer_id = int(env['PADDLE_TRAINER_ID']) + local_seed = (99 + trainer_id) + random.seed(local_seed) + np.random.seed(local_seed) + + if FLAGS.enable_ce or cfg.open_debug: + fluid.default_startup_program().random_seed = 1000 + fluid.default_main_program().random_seed = 1000 + random.seed(0) + np.random.seed(0) + + if FLAGS.use_parallel: + strategy = fluid.dygraph.parallel.prepare_context() + parallel_log = "Note: use parallel " + + # Model + main_arch = cfg.architecture + model = create(cfg.architecture, mode='train', open_debug=cfg.open_debug) + + # Parallel Model + if FLAGS.use_parallel: + #strategy = fluid.dygraph.parallel.prepare_context() + model = fluid.dygraph.parallel.DataParallel(model, strategy) + parallel_log += "with data parallel!" + print(parallel_log) + + # Optimizer + lr = create('LearningRate')() + optimizer = create('OptimizerBuilder')(lr, model.parameters()) + + # Init Model & Optimzer + model = load_dygraph_ckpt( + model, + optimizer, + cfg.pretrain_weights, + cfg.weights, + FLAGS.ckpt_type, + open_debug=cfg.open_debug) + + # Data Reader + start_iter = 0 + if cfg.use_gpu: + devices_num = fluid.core.get_cuda_device_count( + ) if FLAGS.use_parallel else 1 + else: + devices_num = int(os.environ.get('CPU_NUM', 1)) + + train_reader = create_reader( + cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, + cfg, + devices_num=devices_num) + + # Run Train + for iter_id, data in enumerate(train_reader()): + start_time = time.time() + + # Model Forward + model.train() + outputs = model(data, cfg['TrainReader']['inputs_def']['fields']) + + # Model Backward + loss = outputs['loss'] + if FLAGS.use_parallel: + loss = model.scale_loss(loss) + loss.backward() + model.apply_collective_grads() + else: + loss.backward() + optimizer.minimize(loss) + model.clear_gradients() + + # Log state + cost_time = time.time() - start_time + # TODO: check this method + curr_lr = optimizer.current_step_lr() + log_info = "iter: {}, time: {:.4f}, lr: {:.6f}".format( + iter_id, cost_time, curr_lr) + for k, v in outputs.items(): + log_info += ", {}: {:.6f}".format(k, v.numpy()[0]) + print(log_info) + + # Debug + if cfg.open_debug and iter_id > 10: + break + + # Save Stage + if iter_id > 0 and iter_id % int(cfg.snapshot_iter) == 0: + cfg_name = os.path.basename(FLAGS.config).split('.')[0] + save_name = str( + iter_id) if iter_id != cfg.max_iters - 1 else "model_final" + save_dir = os.path.join(cfg.save_dir, cfg_name, save_name) + save_dygraph_ckpt(model, optimizer, save_dir) + + +def main(): + FLAGS = parse_args() + + cfg = load_config(FLAGS.config) + merge_config(FLAGS.opt) + check_config(cfg) + check_gpu(cfg.use_gpu) + check_version() + + place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ + if FLAGS.use_parallel else fluid.CUDAPlace(0) \ + if cfg.use_gpu else fluid.CPUPlace() + + with fluid.dygraph.guard(place): + run(FLAGS, cfg) + + +if __name__ == "__main__": + main()