diff --git a/configs/_base_/datasets/coco.yml b/configs/_base_/datasets/coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..6f8af073925f10f94c64cad83b7876f17553a226 --- /dev/null +++ b/configs/_base_/datasets/coco.yml @@ -0,0 +1,18 @@ +metric: COCO +num_classes: 80 + +TrainDataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + +EvalDataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + +TestDataset: + !ImageFolder + anno_path: annotations/instances_val2017.json diff --git a/configs/mask_rcnn_r50_fpn_1x.yml b/configs/_base_/models/mask_rcnn_r50_fpn.yml similarity index 84% rename from configs/mask_rcnn_r50_fpn_1x.yml rename to configs/_base_/models/mask_rcnn_r50_fpn.yml index 26bb1df9ddacd506acc9c359caf8d96c74e92ee6..35001d6ec84793fe096f3def784782cfde3f4eaa 100644 --- a/configs/mask_rcnn_r50_fpn_1x.yml +++ b/configs/_base_/models/mask_rcnn_r50_fpn.yml @@ -1,13 +1,6 @@ architecture: MaskRCNN -use_gpu: true -max_iters: 180000 -log_iter: 20 -save_dir: output -snapshot_iter: 10000 pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -metric: COCO weights: output/mask_rcnn_r50_fpn_1x/model_final -num_classes: 81 load_static_weights: True # Model Achitecture @@ -126,25 +119,3 @@ MaskHead: MaskPostProcess: mask_resolution: 28 - - -# Train -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -_READER_: 'mask_reader.yml' diff --git a/configs/yolov3_darknet.yml b/configs/_base_/models/yolov3_darknet53.yml similarity index 68% rename from configs/yolov3_darknet.yml rename to configs/_base_/models/yolov3_darknet53.yml index a7a3e7bc24082421128bd5c281827999c91784e5..376bb618b6bf65c230bbfe36767b4246e3267ad3 100644 --- a/configs/yolov3_darknet.yml +++ b/configs/_base_/models/yolov3_darknet53.yml @@ -1,13 +1,6 @@ architecture: YOLOv3 -use_gpu: true -max_iters: 500000 -log_iter: 20 -save_dir: output -snapshot_iter: 50000 -metric: COCO pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar weights: output/yolov3_darknet/model_final -num_classes: 80 use_fine_grained_loss: false load_static_weights: True @@ -48,25 +41,3 @@ BBoxPostProcess: nms_top_k: 1000 normalized: false background_label: -1 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: - - 400000 - - 450000 - - !LinearWarmup - start_factor: 0. - steps: 4000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -_READER_: 'yolov3_reader.yml' diff --git a/configs/_base_/optimizers/rcnn_1x.yml b/configs/_base_/optimizers/rcnn_1x.yml new file mode 100644 index 0000000000000000000000000000000000000000..40d6ee63e53543d049305bcba4325280e1b5822d --- /dev/null +++ b/configs/_base_/optimizers/rcnn_1x.yml @@ -0,0 +1,19 @@ +max_iters: 180000 + +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [120000, 160000] + - !LinearWarmup + start_factor: 0.3333333333333333 + steps: 500 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 diff --git a/configs/_base_/optimizers/yolov3_270e.yml b/configs/_base_/optimizers/yolov3_270e.yml new file mode 100644 index 0000000000000000000000000000000000000000..2a49a0f0cd9c47c39914c8d107308de8f53a984c --- /dev/null +++ b/configs/_base_/optimizers/yolov3_270e.yml @@ -0,0 +1,21 @@ +max_iters: 500000 + +LearningRate: + base_lr: 0.001 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 400000 + - 450000 + - !LinearWarmup + start_factor: 0. + steps: 4000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 diff --git a/configs/faster_reader.yml b/configs/_base_/readers/faster_reader.yml similarity index 82% rename from configs/faster_reader.yml rename to configs/_base_/readers/faster_reader.yml index e31610685534455283e9d9e6ea1edb1384ec2b78..0a5b1b53d0e5b7f317dcac0e028d93ddd6371ec3 100644 --- a/configs/faster_reader.yml +++ b/configs/_base_/readers/faster_reader.yml @@ -1,11 +1,6 @@ TrainReader: inputs_def: fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'] - dataset: - !COCODataSet - image_dir: train2017 - anno_path: annotations/instances_train2017.json - dataset_dir: dataset/coco sample_transforms: - !DecodeImage to_rgb: True @@ -37,11 +32,6 @@ TrainReader: EvalReader: inputs_def: fields: ['image', 'im_info', 'im_id', 'im_shape'] - dataset: - !COCODataSet - image_dir: val2017 - anno_path: annotations/instances_val2017.json - dataset_dir: dataset/coco sample_transforms: - !DecodeImage to_rgb: true @@ -71,9 +61,6 @@ EvalReader: TestReader: inputs_def: fields: ['image', 'im_info', 'im_id', 'im_shape'] - dataset: - !ImageFolder - anno_path: annotations/instances_val2017.json sample_transforms: - !DecodeImage to_rgb: true diff --git a/configs/mask_reader.yml b/configs/_base_/readers/mask_reader.yml similarity index 83% rename from configs/mask_reader.yml rename to configs/_base_/readers/mask_reader.yml index c5c486965ed09690b715406a7ff0f061c022787d..c690a34f453019244700fa322059cc464e2225d0 100644 --- a/configs/mask_reader.yml +++ b/configs/_base_/readers/mask_reader.yml @@ -1,11 +1,6 @@ TrainReader: inputs_def: fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_mask'] - dataset: - !COCODataSet - image_dir: train2017 - anno_path: annotations/instances_train2017.json - dataset_dir: dataset/coco sample_transforms: - !DecodeImage to_rgb: true @@ -39,11 +34,6 @@ TrainReader: EvalReader: inputs_def: fields: ['image', 'im_info', 'im_id', 'im_shape'] - dataset: - !COCODataSet - image_dir: val2017 - anno_path: annotations/instances_val2017.json - dataset_dir: dataset/coco sample_transforms: - !DecodeImage to_rgb: true @@ -74,9 +64,6 @@ EvalReader: TestReader: inputs_def: fields: ['image', 'im_info', 'im_id', 'im_shape'] - dataset: - !ImageFolder - anno_path: annotations/instances_val2017.json sample_transforms: - !DecodeImage to_rgb: true diff --git a/configs/yolov3_reader.yml b/configs/_base_/readers/yolov3_reader.yml similarity index 83% rename from configs/yolov3_reader.yml rename to configs/_base_/readers/yolov3_reader.yml index 5e11364b0ad831e68513f60ea68ca511e1018a8b..09ef366be893ffd45dc517d4c98825e679eb81e9 100644 --- a/configs/yolov3_reader.yml +++ b/configs/_base_/readers/yolov3_reader.yml @@ -2,12 +2,6 @@ TrainReader: inputs_def: fields: ['image', 'gt_bbox', 'gt_class', 'gt_score'] num_max_boxes: 50 - dataset: - !COCODataSet - image_dir: train2017 - anno_path: annotations/instances_train2017.json - dataset_dir: dataset/coco - with_background: false sample_transforms: - !DecodeImage to_rgb: True @@ -59,12 +53,6 @@ EvalReader: inputs_def: fields: ['image', 'im_size', 'im_id'] num_max_boxes: 50 - dataset: - !COCODataSet - image_dir: val2017 - anno_path: annotations/instances_val2017.json - dataset_dir: dataset/coco - with_background: false sample_transforms: - !DecodeImage to_rgb: True @@ -90,10 +78,6 @@ TestReader: inputs_def: image_shape: [3, 608, 608] fields: ['image', 'im_size', 'im_id'] - dataset: - !ImageFolder - anno_path: annotations/instances_val2017.json - with_background: false sample_transforms: - !DecodeImage to_rgb: True diff --git a/configs/_base_/runtime.yml b/configs/_base_/runtime.yml new file mode 100644 index 0000000000000000000000000000000000000000..ac0a9960aaf9283fd8176c3ac30ad6bf1f0f252b --- /dev/null +++ b/configs/_base_/runtime.yml @@ -0,0 +1,4 @@ +use_gpu: true +log_iter: 50 +save_dir: output +snapshot_iter: 10000 diff --git a/configs/cascade_rcnn_r50_1x.yml b/configs/cascade_rcnn_r50_1x.yml deleted file mode 100644 index fd7219a5350eb8cdc420318822e1fe6f72f29a77..0000000000000000000000000000000000000000 --- a/configs/cascade_rcnn_r50_1x.yml +++ /dev/null @@ -1,125 +0,0 @@ -architecture: CascadeRCNN -use_gpu: true -max_iters: 180000 -log_iter: 50 -save_dir: output -snapshot_iter: 10000 -pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams -metric: COCO -weights: output/cascade_rcnn_r50_1x/model_final -num_classes: 81 -num_stages: 3 -open_debug: False - -# Model Achitecture -CascadeRCNN: - # model anchor info flow - anchor: AnchorRPN - proposal: Proposal - mask: Mask - # model feat info flow - backbone: ResNet - rpn_head: RPNHead - bbox_head: BBoxHead - mask_head: MaskHead - -ResNet: - norm_type: 'affine' - depth: 50 - freeze_at: 'res2' - -RPNHead: - rpn_feat: - name: RPNFeat - feat_in: 1024 - feat_out: 1024 - anchor_per_position: 15 - -BBoxHead: - bbox_feat: - name: BBoxFeat - feat_in: 1024 - feat_out: 512 - roi_extractor: - resolution: 14 - sampling_ratio: 0 - spatial_scale: 0.0625 - extractor_type: 'RoIAlign' - -MaskHead: - mask_feat: - name: MaskFeat - feat_in: 2048 - feat_out: 256 - feat_in: 256 - resolution: 14 - -AnchorRPN: - anchor_generator: - name: AnchorGeneratorRPN - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_target_generator: - name: AnchorTargetGeneratorRPN - batch_size_per_im: 256 - fg_fraction: 0.5 - negative_overlap: 0.3 - positive_overlap: 0.7 - straddle_thresh: 0.0 - -Proposal: - proposal_generator: - name: ProposalGenerator - min_size: 0.0 - nms_thresh: 0.7 - train_pre_nms_top_n: 2000 - train_post_nms_top_n: 2000 - infer_pre_nms_top_n: 2000 - infer_post_nms_top_n: 2000 - return_rois_num: True - proposal_target_generator: - name: ProposalTargetGenerator - batch_size_per_im: 512 - bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],[0.05, 0.05, 0.1, 0.1],[0.333333, 0.333333, 0.666666, 0.666666]] - bg_thresh_hi: [0.5, 0.6, 0.7] - bg_thresh_lo: [0.0, 0.0, 0.0] - fg_thresh: [0.5, 0.6, 0.7] - fg_fraction: 0.25 - bbox_post_process: # used in infer - name: BBoxPostProcess - # decode -> clip -> nms - decode_clip_nms: - name: DecodeClipNms - keep_top_k: 100 - score_threshold: 0.05 - nms_threshold: 0.5 - -Mask: - mask_target_generator: - name: MaskTargetGenerator - resolution: 14 - mask_post_process: - name: MaskPostProcess - -# Train -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -_READER_: 'mask_reader.yml' diff --git a/configs/faster_rcnn_r50_1x.yml b/configs/faster_rcnn_r50_1x.yml deleted file mode 100644 index 307a6999e4ab743b6006262dc35f02e19e6984a1..0000000000000000000000000000000000000000 --- a/configs/faster_rcnn_r50_1x.yml +++ /dev/null @@ -1,107 +0,0 @@ -architecture: FasterRCNN -use_gpu: true -max_iters: 180000 -log_iter: 50 -save_dir: output -snapshot_iter: 10000 -pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams -metric: COCO -weights: output/faster_rcnn_r50_1x/model_final -num_classes: 81 -open_debug: False - -# Model Achitecture -FasterRCNN: - # model anchor info flow - anchor: AnchorRPN - proposal: Proposal - # model feat info flow - backbone: ResNet - rpn_head: RPNHead - bbox_head: BBoxHead - -ResNet: - depth: 50 - norm_type: 'affine' - freeze_at: 'res2' - -RPNHead: - rpn_feat: - name: RPNFeat - feat_in: 1024 - feat_out: 1024 - anchor_per_position: 15 - -BBoxHead: - bbox_feat: - name: BBoxFeat - roi_extractor: - name: RoIExtractor - resolution: 14 - sampling_ratio: 0 - spatial_scale: 0.0625 - extractor_type: 'RoIAlign' - feat_out: 512 - -AnchorRPN: - anchor_generator: - name: AnchorGeneratorRPN - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_target_generator: - name: AnchorTargetGeneratorRPN - batch_size_per_im: 256 - fg_fraction: 0.5 - negative_overlap: 0.3 - positive_overlap: 0.7 - straddle_thresh: 0.0 - -Proposal: - proposal_generator: - name: ProposalGenerator - min_size: 0.0 - nms_thresh: 0.7 - train_pre_nms_top_n: 12000 - train_post_nms_top_n: 2000 - infer_pre_nms_top_n: 12000 # used in infer - infer_post_nms_top_n: 2000 # used in infer - return_rois_num: True - proposal_target_generator: - name: ProposalTargetGenerator - batch_size_per_im: 512 - bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],] - bg_thresh_hi: [0.5,] - bg_thresh_lo: [0.0,] - fg_thresh: [0.5,] - fg_fraction: 0.25 - bbox_post_process: # used in infer - name: BBoxPostProcess - # decode -> clip -> nms - decode_clip_nms: - name: DecodeClipNms - keep_top_k: 100 - score_threshold: 0.05 - nms_threshold: 0.5 - -# Train -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -_READER_: 'faster_reader.yml' diff --git a/configs/mask_rcnn_r50_1x.yml b/configs/mask_rcnn_r50_1x.yml deleted file mode 100644 index 467cf73b99a971dba2d3c535ebf5062d5a2adbc0..0000000000000000000000000000000000000000 --- a/configs/mask_rcnn_r50_1x.yml +++ /dev/null @@ -1,127 +0,0 @@ -architecture: MaskRCNN -use_gpu: true -max_iters: 180000 -log_iter: 50 -save_dir: output -snapshot_iter: 10000 -pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams -metric: COCO -weights: output/mask_rcnn_r50_1x/model_final -num_classes: 81 -open_debug: False - -# Model Achitecture -MaskRCNN: - # model anchor info flow - anchor: AnchorRPN - proposal: Proposal - mask: Mask - # model feat info flow - backbone: ResNet - rpn_head: RPNHead - bbox_head: BBoxHead - mask_head: MaskHead - -ResNet: - norm_type: 'affine' - depth: 50 - freeze_at: 'res2' - -RPNHead: - rpn_feat: - name: RPNFeat - feat_in: 1024 - feat_out: 1024 - anchor_per_position: 15 - -BBoxHead: - bbox_feat: - name: BBoxFeat - roi_extractor: - name: RoIExtractor - resolution: 14 - sampling_ratio: 0 - spatial_scale: 0.0625 - extractor_type: 'RoIAlign' - feat_in: 1024 - feat_out: 512 - -MaskHead: - mask_feat: - name: MaskFeat - feat_in: 2048 - feat_out: 256 - mask_stages: 1 - feat_in: 256 - resolution: 14 - mask_stages: 1 - -AnchorRPN: - anchor_generator: - name: AnchorGeneratorRPN - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_target_generator: - name: AnchorTargetGeneratorRPN - batch_size_per_im: 256 - fg_fraction: 0.5 - negative_overlap: 0.3 - positive_overlap: 0.7 - straddle_thresh: 0.0 - -Proposal: - proposal_generator: - name: ProposalGenerator - min_size: 0.0 - nms_thresh: 0.7 - train_pre_nms_top_n: 12000 - train_post_nms_top_n: 2000 - infer_pre_nms_top_n: 12000 - infer_post_nms_top_n: 2000 - return_rois_num: True - proposal_target_generator: - name: ProposalTargetGenerator - batch_size_per_im: 512 - bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],] - bg_thresh_hi: [0.5,] - bg_thresh_lo: [0.0,] - fg_thresh: [0.5,] - fg_fraction: 0.25 - bbox_post_process: # used in infer - name: BBoxPostProcess - # decode -> clip -> nms - decode_clip_nms: - name: DecodeClipNms - keep_top_k: 100 - score_threshold: 0.05 - nms_threshold: 0.5 - -Mask: - mask_target_generator: - name: MaskTargetGenerator - resolution: 14 - mask_post_process: - name: MaskPostProcess - -# Train -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -_READER_: 'mask_reader.yml' diff --git a/configs/mask_rcnn_r50_fpn_1x_coco.yml b/configs/mask_rcnn_r50_fpn_1x_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..c99ec39932145c81f9e58720a18ef5351e307801 --- /dev/null +++ b/configs/mask_rcnn_r50_fpn_1x_coco.yml @@ -0,0 +1,7 @@ +_BASE_: [ + './_base_/models/mask_rcnn_r50_fpn.yml', + './_base_/optimizers/rcnn_1x.yml', + './_base_/datasets/coco.yml', + './_base_/readers/mask_reader.yml', + './_base_/runtime.yml', +] diff --git a/configs/yolov3_darknet53_270e_coco.yml b/configs/yolov3_darknet53_270e_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..b31e7e8af3be01e2e1ebc353e688e856ef595d7c --- /dev/null +++ b/configs/yolov3_darknet53_270e_coco.yml @@ -0,0 +1,7 @@ +_BASE_: [ + './_base_/models/yolov3_darknet53.yml', + './_base_/optimizers/yolov3_270e.yml', + './_base_/datasets/coco.yml', + './_base_/readers/yolov3_reader.yml', + './_base_/runtime.yml', +] diff --git a/ppdet/core/workspace.py b/ppdet/core/workspace.py index 666ca53ad69024439439ddf5e958c61fef44ae02..f8dc10dca0dc29cf0afa9eb1e5d0964b4ab70dc6 100644 --- a/ppdet/core/workspace.py +++ b/ppdet/core/workspace.py @@ -66,7 +66,44 @@ class AttrDict(dict): global_config = AttrDict() -READER_KEY = '_READER_' +BASE_KEY = '_BASE_' + + +# parse and load _BASE_ recursively +def _load_config_with_base(file_path): + with open(file_path) as f: + file_cfg = yaml.load(f, Loader=yaml.Loader) + + # NOTE: cfgs outside have higher priority than cfgs in _BASE_ + if BASE_KEY in file_cfg: + all_base_cfg = AttrDict() + base_ymls = list(file_cfg[BASE_KEY]) + for base_yml in base_ymls: + if base_yml.startswith("~"): + base_yml = os.path.expanduser(base_yml) + if not base_yml.startswith('/'): + base_yml = os.path.join(os.path.dirname(file_path), base_yml) + + with open(base_yml) as f: + base_cfg = _load_config_with_base(base_yml) + all_base_cfg = merge_config(base_cfg, all_base_cfg) + + del file_cfg[BASE_KEY] + return merge_config(file_cfg, all_base_cfg) + + return file_cfg + + +WITHOUT_BACKGROUND_ARCHS = ['YOLOv3'] + + +def _parse_with_background(): + arch = global_config.architecture + with_background = arch not in WITHOUT_BACKGROUND_ARCHS + global_config['with_background'] = with_background + global_config['TrainReader']['with_background'] = with_background + global_config['EvalReader']['with_background'] = with_background + global_config['TestReader']['with_background'] = with_background def load_config(file_path): @@ -81,22 +118,13 @@ def load_config(file_path): _, ext = os.path.splitext(file_path) assert ext in ['.yml', '.yaml'], "only support yaml files for now" - cfg = AttrDict() - with open(file_path) as f: - cfg = merge_config(yaml.load(f, Loader=yaml.Loader), cfg) - - if READER_KEY in cfg: - reader_cfg = cfg[READER_KEY] - if reader_cfg.startswith("~"): - reader_cfg = os.path.expanduser(reader_cfg) - if not reader_cfg.startswith('/'): - reader_cfg = os.path.join(os.path.dirname(file_path), reader_cfg) + # load config from file and merge into global config + cfg = _load_config_with_base(file_path) + merge_config(cfg) - with open(reader_cfg) as f: - merge_config(yaml.load(f, Loader=yaml.Loader)) - del cfg[READER_KEY] + # parse config from merged config + _parse_with_background() - merge_config(cfg) return global_config diff --git a/ppdet/data/reader.py b/ppdet/data/reader.py index ae670c27a1a165deaec1e8ec17816d3d2cd16757..282da1cb22dbed24f3c32716a03e0d50b9edcce0 100644 --- a/ppdet/data/reader.py +++ b/ppdet/data/reader.py @@ -158,6 +158,8 @@ class Reader(object): """ Args: dataset (DataSet): DataSet object + with_background (bool): whether load background as a class. if True, + total class number will be class number of dataset + 1. default True. sample_transforms (list of BaseOperator): a list of sample transforms operators. batch_transforms (list of BaseOperator): a list of batch transforms @@ -188,6 +190,7 @@ class Reader(object): def __init__(self, dataset=None, + with_background=True, sample_transforms=None, batch_transforms=None, batch_size=None, @@ -206,7 +209,7 @@ class Reader(object): inputs_def=None, devices_num=1): self._dataset = dataset - self._roidbs = self._dataset.get_roidb() + self._roidbs = self._dataset.get_roidb(with_background) self._fields = copy.deepcopy(inputs_def[ 'fields']) if inputs_def else None @@ -416,7 +419,7 @@ class Reader(object): self._parallel.stop() -def create_reader(cfg, max_iter=0, global_cfg=None, devices_num=1): +def create_reader(dataset, cfg, max_iter=0, global_cfg=None, devices_num=1): """ Return iterable data reader. @@ -432,7 +435,8 @@ def create_reader(cfg, max_iter=0, global_cfg=None, devices_num=1): 'use_fine_grained_loss', False) cfg['num_classes'] = getattr(global_cfg, 'num_classes', 80) cfg['devices_num'] = devices_num - reader = Reader(**cfg)() + + reader = Reader(dataset=dataset, **cfg)() def _reader(): n = 0 diff --git a/ppdet/data/source/coco.py b/ppdet/data/source/coco.py index cb823f25ed49722596aa177d8be88ea09da9c9f9..81e0630f2ba25db15622f6912acb6fdc6eaeeeb2 100644 --- a/ppdet/data/source/coco.py +++ b/ppdet/data/source/coco.py @@ -33,25 +33,20 @@ class COCODataSet(DataSet): image_dir (str): directory for images. anno_path (str): json file path. sample_num (int): number of samples to load, -1 means all. - with_background (bool): whether load background as a class. - if True, total class number will be 81. default True. """ def __init__(self, image_dir=None, anno_path=None, dataset_dir=None, - sample_num=-1, - with_background=True): + sample_num=-1): super(COCODataSet, self).__init__( image_dir=image_dir, anno_path=anno_path, dataset_dir=dataset_dir, - sample_num=sample_num, - with_background=with_background) + sample_num=sample_num) self.anno_path = anno_path self.sample_num = sample_num - self.with_background = with_background # `roidbs` is list of dict whose structure is: # { # 'im_file': im_fname, # image file name @@ -69,7 +64,7 @@ class COCODataSet(DataSet): self.cname2cid = None self.load_image_only = False - def load_roidb_and_cname2cid(self): + def load_roidb_and_cname2cid(self, with_background=True): anno_path = os.path.join(self.dataset_dir, self.anno_path) image_dir = os.path.join(self.dataset_dir, self.image_dir) @@ -85,7 +80,7 @@ class COCODataSet(DataSet): # when with_background = True, mapping category to classid, like: # background:0, first_class:1, second_class:2, ... catid2clsid = dict({ - catid: i + int(self.with_background) + catid: i + int(with_background) for i, catid in enumerate(cat_ids) }) cname2cid = dict({ diff --git a/ppdet/data/source/dataset.py b/ppdet/data/source/dataset.py index 6964b144ffef38573ca3465fb1f298fd7b7cebc3..eecd14f73bbf1810733e16db74692f2022b3b882 100644 --- a/ppdet/data/source/dataset.py +++ b/ppdet/data/source/dataset.py @@ -40,7 +40,6 @@ class DataSet(object): image_dir=None, anno_path=None, sample_num=-1, - with_background=True, use_default_label=None, **kwargs): super(DataSet, self).__init__() @@ -48,7 +47,6 @@ class DataSet(object): self.image_dir = image_dir if image_dir is not None else '' self.dataset_dir = dataset_dir if dataset_dir is not None else '' self.sample_num = sample_num - self.with_background = with_background self.use_default_label = use_default_label self.cname2cid = None @@ -59,13 +57,13 @@ class DataSet(object): raise NotImplementedError('%s.load_roidb_and_cname2cid not available' % (self.__class__.__name__)) - def get_roidb(self): + def get_roidb(self, with_background=True): if not self.roidbs: data_dir = get_dataset_path(self.dataset_dir, self.anno_path, self.image_dir) if data_dir: self.dataset_dir = data_dir - self.load_roidb_and_cname2cid() + self.load_roidb_and_cname2cid(with_background) return self.roidbs @@ -116,12 +114,10 @@ class ImageFolder(DataSet): image_dir=None, anno_path=None, sample_num=-1, - with_background=True, use_default_label=None, **kwargs): super(ImageFolder, self).__init__(dataset_dir, image_dir, anno_path, - sample_num, with_background, - use_default_label) + sample_num, use_default_label) self.roidbs = None self._imid2path = {} diff --git a/ppdet/data/source/voc.py b/ppdet/data/source/voc.py index 560ed17ea24028963c51ecddedcf4ac095a2e9f8..498ca5d16426cc0e67c8119aeff69c63e89b26b3 100644 --- a/ppdet/data/source/voc.py +++ b/ppdet/data/source/voc.py @@ -40,8 +40,6 @@ class VOCDataSet(DataSet): sample_num (int): number of samples to load, -1 means all. use_default_label (bool): whether use the default mapping of label to integer index. Default True. - with_background (bool): whether load background as a class, - default True. label_list (str): if use_default_label is False, will load mapping between category and class index. """ @@ -52,14 +50,12 @@ class VOCDataSet(DataSet): anno_path=None, sample_num=-1, use_default_label=True, - with_background=True, label_list='label_list.txt'): super(VOCDataSet, self).__init__( image_dir=image_dir, anno_path=anno_path, sample_num=sample_num, - dataset_dir=dataset_dir, - with_background=with_background) + dataset_dir=dataset_dir) # roidbs is list of dict whose structure is: # { # 'im_file': im_fname, # image file name @@ -78,7 +74,7 @@ class VOCDataSet(DataSet): self.use_default_label = use_default_label self.label_list = label_list - def load_roidb_and_cname2cid(self): + def load_roidb_and_cname2cid(self, with_background=True): anno_path = os.path.join(self.dataset_dir, self.anno_path) image_dir = os.path.join(self.dataset_dir, self.image_dir) @@ -96,12 +92,12 @@ class VOCDataSet(DataSet): raise ValueError("label_list {} does not exists".format( label_path)) with open(label_path, 'r') as fr: - label_id = int(self.with_background) + label_id = int(with_background) for line in fr.readlines(): cname2cid[line.strip()] = label_id label_id += 1 else: - cname2cid = pascalvoc_label(self.with_background) + cname2cid = pascalvoc_label(with_background) with open(anno_path, 'r') as fr: while True: diff --git a/ppdet/data/source/widerface.py b/ppdet/data/source/widerface.py index 7aab1533705ce3e853eb6c9cc1c20dbd9d8e064c..f866d8d0fc6cb41b5b7ac7226245a0fe6fefe0c0 100644 --- a/ppdet/data/source/widerface.py +++ b/ppdet/data/source/widerface.py @@ -32,8 +32,6 @@ class WIDERFaceDataSet(DataSet): image_dir (str): directory for images. anno_path (str): root directory for voc annotation data sample_num (int): number of samples to load, -1 means all - with_background (bool): whether load background as a class. - if True, total class number will be 2. default True. """ def __init__(self, @@ -41,22 +39,19 @@ class WIDERFaceDataSet(DataSet): image_dir=None, anno_path=None, sample_num=-1, - with_background=True, with_lmk=False): super(WIDERFaceDataSet, self).__init__( image_dir=image_dir, anno_path=anno_path, sample_num=sample_num, - dataset_dir=dataset_dir, - with_background=with_background) + dataset_dir=dataset_dir) self.anno_path = anno_path self.sample_num = sample_num - self.with_background = with_background self.roidbs = None self.cname2cid = None self.with_lmk = with_lmk - def load_roidb_and_cname2cid(self): + def load_roidb_and_cname2cid(self, with_background=True): anno_path = os.path.join(self.dataset_dir, self.anno_path) image_dir = os.path.join(self.dataset_dir, self.image_dir) @@ -65,7 +60,7 @@ class WIDERFaceDataSet(DataSet): records = [] ct = 0 file_lists = self._load_file_list(txt_file) - cname2cid = widerface_label(self.with_background) + cname2cid = widerface_label(with_background) for item in file_lists: im_fname = item[0] diff --git a/ppdet/modeling/__init__.py b/ppdet/modeling/__init__.py index 8819fdf04ca2dc16fd2303c7226f0ba4cc69b558..154bb3bdf78a477d9dc00d0c71c656274d3a4aeb 100644 --- a/ppdet/modeling/__init__.py +++ b/ppdet/modeling/__init__.py @@ -4,6 +4,7 @@ from . import mask from . import backbone from . import neck from . import head +from . import loss from . import architecture from . import post_process @@ -13,5 +14,6 @@ from .mask import * from .backbone import * from .neck import * from .head import * +from .loss import * from .architecture import * from .post_process import * diff --git a/ppdet/modeling/architecture/cascade_rcnn.py b/ppdet/modeling/architecture/cascade_rcnn.py index ccc1ef4abda26a5bf61f92055038269eb4ee19aa..2f76616f5c0ae709eb5201676c72fcf3ddb4f773 100644 --- a/ppdet/modeling/architecture/cascade_rcnn.py +++ b/ppdet/modeling/architecture/cascade_rcnn.py @@ -87,11 +87,11 @@ class CascadeRCNN(BaseArch): mask_out = self.mask.post_process(self.gbd) self.gbd.update(mask_out) - def loss(self, ): + def get_loss(self, ): outs = {} losses = [] - rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(self.gbd) + rpn_cls_loss, rpn_reg_loss = self.rpn_head.get_loss(self.gbd) outs['loss_rpn_cls'] = rpn_cls_loss outs['loss_rpn_reg'] = rpn_reg_loss losses.extend([rpn_cls_loss, rpn_reg_loss]) @@ -100,7 +100,7 @@ class CascadeRCNN(BaseArch): bbox_reg_loss_list = [] for i in range(self.num_stages): self.gbd.update_v('stage', i) - bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(self.gbd) + bbox_cls_loss, bbox_reg_loss = self.bbox_head.get_loss(self.gbd) bbox_cls_loss_list.append(bbox_cls_loss) bbox_reg_loss_list.append(bbox_reg_loss) outs['loss_bbox_cls_' + str(i)] = bbox_cls_loss @@ -108,7 +108,7 @@ class CascadeRCNN(BaseArch): losses.extend(bbox_cls_loss_list) losses.extend(bbox_reg_loss_list) - mask_loss = self.mask_head.loss(self.gbd) + mask_loss = self.mask_head.get_loss(self.gbd) outs['mask_loss'] = mask_loss losses.append(mask_loss) @@ -116,7 +116,7 @@ class CascadeRCNN(BaseArch): outs['loss'] = loss return outs - def infer(self, ): + def get_pred(self, ): outs = { 'bbox': self.gbd['predicted_bbox'].numpy(), 'bbox_nums': self.gbd['predicted_bbox_nums'].numpy(), diff --git a/ppdet/modeling/architecture/faster_rcnn.py b/ppdet/modeling/architecture/faster_rcnn.py index 50faec0213379fd23434b1a68c73cc2e9a7a76dc..47e4c7acfa89d76054431f25a15943de7b13a318 100644 --- a/ppdet/modeling/architecture/faster_rcnn.py +++ b/ppdet/modeling/architecture/faster_rcnn.py @@ -55,9 +55,9 @@ class FasterRCNN(BaseArch): bbox_out = self.proposal.post_process(self.gbd) self.gbd.update(bbox_out) - def loss(self, ): - rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(self.gbd) - bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(self.gbd) + def get_loss(self, ): + rpn_cls_loss, rpn_reg_loss = self.rpn_head.get_loss(self.gbd) + bbox_cls_loss, bbox_reg_loss = self.bbox_head.get_loss(self.gbd) losses = [rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss] loss = fluid.layers.sum(losses) out = { @@ -69,7 +69,7 @@ class FasterRCNN(BaseArch): } return out - def infer(self, ): + def get_pred(self, ): outs = { "bbox": self.gbd['predicted_bbox'].numpy(), "bbox_nums": self.gbd['predicted_bbox_nums'].numpy(), diff --git a/ppdet/modeling/architecture/mask_rcnn.py b/ppdet/modeling/architecture/mask_rcnn.py index 615e88d0d8407fdf8e287a8e353ddc8bbe69b358..00c13ec02c2d468fda093990f813d63c7deb8491 100644 --- a/ppdet/modeling/architecture/mask_rcnn.py +++ b/ppdet/modeling/architecture/mask_rcnn.py @@ -95,30 +95,30 @@ class MaskRCNN(BaseArch): self.bboxes, bbox_feat, rois_has_mask_int32, spatial_scale) - def loss(self, ): + def get_loss(self, ): loss = {} # RPN loss rpn_loss_inputs = self.anchor.generate_loss_inputs( self.inputs, self.rpn_head_out, self.anchor_out) - loss_rpn = self.rpn_head.loss(rpn_loss_inputs) + loss_rpn = self.rpn_head.get_loss(rpn_loss_inputs) loss.update(loss_rpn) # BBox loss bbox_targets = self.proposal.get_targets() - loss_bbox = self.bbox_head.loss(self.bbox_head_out, bbox_targets) + loss_bbox = self.bbox_head.get_loss(self.bbox_head_out, bbox_targets) loss.update(loss_bbox) # Mask loss mask_targets = self.mask.get_targets() - loss_mask = self.mask_head.loss(self.mask_head_out, mask_targets) + loss_mask = self.mask_head.get_loss(self.mask_head_out, mask_targets) loss.update(loss_mask) total_loss = fluid.layers.sums(list(loss.values())) loss.update({'loss': total_loss}) return loss - def infer(self, ): + def get_pred(self, ): mask = self.mask_post_process(self.bboxes, self.mask_head_out, self.inputs['im_info']) bbox, bbox_num = self.bboxes diff --git a/ppdet/modeling/architecture/meta_arch.py b/ppdet/modeling/architecture/meta_arch.py index 1a4f4f2591465a3d3920ef886f9bb06a76cd5322..b3b4842d7d4aa3d54efe0746f6e65e1bd629b474 100644 --- a/ppdet/modeling/architecture/meta_arch.py +++ b/ppdet/modeling/architecture/meta_arch.py @@ -22,9 +22,9 @@ class BaseArch(nn.Layer): self.model_arch() if mode == 'train': - out = self.loss() + out = self.get_loss() elif mode == 'infer': - out = self.infer() + out = self.get_pred() else: raise "Now, only support train or infer mode!" return out @@ -45,8 +45,8 @@ class BaseArch(nn.Layer): def model_arch(self): raise NotImplementedError("Should implement model_arch method!") - def loss(self, ): - raise NotImplementedError("Should implement loss method!") + def get_loss(self, ): + raise NotImplementedError("Should implement get_loss method!") - def infer(self, ): - raise NotImplementedError("Should implement infer method!") + def get_pred(self, ): + raise NotImplementedError("Should implement get_pred method!") diff --git a/ppdet/modeling/architecture/yolo.py b/ppdet/modeling/architecture/yolo.py index e09dd4f668ae8c3d0df9f51306a687aed10457cc..1d21d17041809bdd1ee7324ba1c86ee74d696be0 100644 --- a/ppdet/modeling/architecture/yolo.py +++ b/ppdet/modeling/architecture/yolo.py @@ -39,11 +39,11 @@ class YOLOv3(BaseArch): # YOLO Head self.yolo_head_outs = self.yolo_head(body_feats) - def loss(self, ): - yolo_loss = self.yolo_head.loss(self.inputs, self.yolo_head_outs) - return yolo_loss + def get_loss(self, ): + loss = self.yolo_head.get_loss(self.inputs, self.yolo_head_outs) + return loss - def infer(self, ): + def get_pred(self, ): bbox, bbox_num = self.post_process(self.yolo_head_outs, self.yolo_head.mask_anchors, self.inputs['im_size']) diff --git a/ppdet/modeling/head/bbox_head.py b/ppdet/modeling/head/bbox_head.py index 7678039e869877557b5bc3dc95ff7207daadda1c..3dd52e5f14dcbcba255076d991caf4380f73979d 100644 --- a/ppdet/modeling/head/bbox_head.py +++ b/ppdet/modeling/head/bbox_head.py @@ -164,7 +164,7 @@ class BBoxHead(Layer): loss_bbox_reg = fluid.layers.reduce_mean(loss_bbox_reg) return loss_bbox_cls, loss_bbox_reg - def loss(self, bbox_head_out, targets): + def get_loss(self, bbox_head_out, targets): loss_bbox = {} for lvl, (bboxhead, target) in enumerate(zip(bbox_head_out, targets)): score, delta = bboxhead diff --git a/ppdet/modeling/head/mask_head.py b/ppdet/modeling/head/mask_head.py index 47f202a1b99d9da73962ed13ebbe157eccfd738e..363ad0a34c9883773b4a279da4fd6d26e68c37c0 100644 --- a/ppdet/modeling/head/mask_head.py +++ b/ppdet/modeling/head/mask_head.py @@ -178,7 +178,7 @@ class MaskHead(Layer): spatial_scale, stage) return mask_head_out - def loss(self, mask_head_out, mask_target): + def get_loss(self, mask_head_out, mask_target): mask_logits = fluid.layers.flatten(mask_head_out) mask_label = fluid.layers.cast(x=mask_target, dtype='float32') mask_label.stop_gradient = True diff --git a/ppdet/modeling/head/rpn_head.py b/ppdet/modeling/head/rpn_head.py index 0f83ffc9e927f2d7e39ab79f576c85c5ea444ca0..5fa4e4ee3c2eac273f24da947abad4192bf87899 100644 --- a/ppdet/modeling/head/rpn_head.py +++ b/ppdet/modeling/head/rpn_head.py @@ -85,7 +85,7 @@ class RPNHead(Layer): rpn_head_out.append((rrs, rrd)) return rpn_feats, rpn_head_out - def loss(self, loss_inputs): + def get_loss(self, loss_inputs): # cls loss score_tgt = fluid.layers.cast( x=loss_inputs['rpn_score_target'], dtype='float32') diff --git a/ppdet/modeling/head/yolo_head.py b/ppdet/modeling/head/yolo_head.py index aa4bf3a17332f1719fa3aceadc4e8050dfcd3766..2fa370b62c8edc66a002d8e5c933655042bfdd10 100644 --- a/ppdet/modeling/head/yolo_head.py +++ b/ppdet/modeling/head/yolo_head.py @@ -67,5 +67,5 @@ class YOLOv3Head(nn.Layer): yolo_outputs.append(yolo_output) return yolo_outputs - def loss(self, inputs, head_outputs): - return self.loss(inputs, head_outputs, anchors, anchor_masks) + def get_loss(self, inputs, head_outputs): + return self.loss(inputs, head_outputs, self.anchors, self.anchor_masks) diff --git a/ppdet/modeling/loss/yolo_loss.py b/ppdet/modeling/loss/yolo_loss.py index 8fcc121aa9ca35f7902c9809253688867a3e840c..bfef53a1dd0a3787200e22d34170d58f62afafd3 100644 --- a/ppdet/modeling/loss/yolo_loss.py +++ b/ppdet/modeling/loss/yolo_loss.py @@ -21,6 +21,8 @@ from paddle.fluid.regularizer import L2Decay from ppdet.core.workspace import register from ..backbone.darknet import ConvBNLayer +__all__ = ['YOLOv3Loss'] + @register class YOLOv3Loss(nn.Layer): @@ -33,6 +35,7 @@ class YOLOv3Loss(nn.Layer): downsample=32, use_fine_grained_loss=False): super(YOLOv3Loss, self).__init__() + self.num_classes = num_classes self.ignore_thresh = ignore_thresh self.label_smooth = label_smooth self.downsample = downsample diff --git a/ppdet/utils/check.py b/ppdet/utils/check.py index 3ebbe889d4b4b1e1e452a5bc038f364e95464204..c3e019595734f3fef1bd7e706f0e57d5edc0e69a 100644 --- a/ppdet/utils/check.py +++ b/ppdet/utils/check.py @@ -92,20 +92,11 @@ def check_config(cfg): if 'log_iter' not in cfg: cfg.log_iter = 20 - train_dataset = cfg['TrainReader']['dataset'] - eval_dataset = cfg['EvalReader']['dataset'] - test_dataset = cfg['TestReader']['dataset'] - assert train_dataset.with_background == eval_dataset.with_background, \ - "'with_background' of TrainReader is not equal to EvalReader." - assert train_dataset.with_background == test_dataset.with_background, \ - "'with_background' of TrainReader is not equal to TestReader." - - actual_num_classes = int(cfg.num_classes) - int( - train_dataset.with_background) logger.debug("The 'num_classes'(number of classes) you set is {}, " \ "and 'with_background' in 'dataset' sets {}.\n" \ "So please note the actual number of categories is {}." - .format(cfg.num_classes, train_dataset.with_background, - actual_num_classes)) + .format(cfg.num_classes, cfg.with_background, + cfg.num_classes + 1)) + cfg.num_classes = cfg.num_classes + int(cfg.with_background) return cfg diff --git a/tools/eval.py b/tools/eval.py index 9a5534bfac832d3d93cc1367f9d3bea6bd6a69a0..520032e3dbf94e1318d1a917d635fb40dd36d656 100755 --- a/tools/eval.py +++ b/tools/eval.py @@ -59,7 +59,8 @@ def run(FLAGS, cfg): devices_num = 1 else: devices_num = int(os.environ.get('CPU_NUM', 1)) - eval_reader = create_reader(cfg.EvalReader, devices_num=devices_num) + eval_reader = create_reader( + cfg.EvalDataset, cfg.EvalReader, devices_num=devices_num) # Run Eval outs_res = [] diff --git a/tools/infer.py b/tools/infer.py index c16f0d5c70dbc2e1fa9092f2214b7fdb04d40731..8868235b258e663bfcab7736701cab05db1a699a 100755 --- a/tools/infer.py +++ b/tools/infer.py @@ -144,7 +144,7 @@ def run(FLAGS, cfg): model = load_dygraph_ckpt(model, ckpt=cfg.weights) # Data Reader - test_reader = create_reader(cfg.TestReader) + test_reader = create_reader(cfg.TestDataset, cfg.TestReader) # Run Infer for iter_id, data in enumerate(test_reader()): diff --git a/tools/train.py b/tools/train.py index c37d45f20ecc037ac4bf71200a361041799b1894..5eb1b4e878a831c4b220b5703d02c8267f8e7665 100755 --- a/tools/train.py +++ b/tools/train.py @@ -132,6 +132,7 @@ def run(FLAGS, cfg): devices_num = int(os.environ.get('CPU_NUM', 1)) train_reader = create_reader( + cfg.TrainDataset, cfg.TrainReader, (cfg.max_iters - start_iter), cfg, devices_num=devices_num)