From 37f7e6ee4ed50b0b37f4432fe21fa4447c15ee82 Mon Sep 17 00:00:00 2001 From: sunxl1988 <935448535@qq.com> Date: Wed, 19 Aug 2020 06:27:13 +0000 Subject: [PATCH] test=dygraph reconstruct config and reader --- .../cascade_mask_rcnn.yml} | 34 -- .../faster_rcnn.yml} | 33 -- .../mask_rcnn.yml} | 61 +- .../mask_rcnn_fpn.yml} | 41 +- .../yolov3.yml} | 35 -- configs/base/cascade_rcnn_r50_1x.yml | 18 + configs/base/faster_rcnn_r50_1x.yml | 17 + configs/base/mask_rcnn_r50_1x.yml | 16 + configs/base/mask_rcnn_r50_fpn_1x.yml | 16 + configs/base/yolov3_darknet.yml | 18 + configs/example/mask_rcnn_r50_1x.yml | 16 + configs/example/mask_rcnn_r50_fpn_1x.yml | 13 + configs/faster_reader.yml | 95 ---- configs/mask_reader.yml | 99 ---- configs/optimize/rcnn.yml | 20 + configs/optimize/yolo.yml | 20 + configs/reader/faster_rcnn.yml | 52 ++ configs/reader/mask_rcnn.yml | 59 ++ configs/reader/yolo.yml | 74 +++ configs/yolov3_reader.yml | 111 ---- ppdet/data/__init__.py | 19 +- ppdet/data/loader.py | 235 ++++++++ ppdet/data/parallel_map.py | 311 ---------- ppdet/data/reader.py | 449 --------------- ppdet/data/sampler.py | 183 ++++++ ppdet/data/shared_queue/__init__.py | 25 - ppdet/data/shared_queue/queue.py | 106 ---- ppdet/data/shared_queue/sharedmemory.py | 532 ------------------ ppdet/data/source/__init__.py | 24 +- ppdet/data/source/coco.py | 89 +-- ppdet/data/source/dataset.py | 148 +---- ppdet/data/source/voc.py | 216 ------- ppdet/data/source/widerface.py | 170 ------ ppdet/data/transform/batch_operators.py | 34 +- ppdet/data/transform/operators.py | 284 +++++----- ppdet/modeling/architecture/mask_rcnn.py | 2 +- ppdet/modeling/architecture/meta_arch.py | 22 +- ppdet/modeling/head/mask_head.py | 2 +- ppdet/modeling/mask.py | 2 +- ppdet/optimizer.py | 90 ++- ppdet/py_op/bbox.py | 2 +- ppdet/py_op/mask.py | 2 +- ppdet/py_op/target.py | 6 +- tools/eval.py | 23 +- tools/train.py | 114 ++-- 45 files changed, 1108 insertions(+), 2830 deletions(-) rename configs/{cascade_rcnn_r50_1x.yml => architechture/cascade_mask_rcnn.yml} (75%) rename configs/{faster_rcnn_r50_1x.yml => architechture/faster_rcnn.yml} (72%) rename configs/{mask_rcnn_r50_1x.yml => architechture/mask_rcnn.yml} (66%) rename configs/{mask_rcnn_r50_fpn_1x.yml => architechture/mask_rcnn_fpn.yml} (75%) rename configs/{yolov3_darknet.yml => architechture/yolov3.yml} (58%) create mode 100644 configs/base/cascade_rcnn_r50_1x.yml create mode 100644 configs/base/faster_rcnn_r50_1x.yml create mode 100644 configs/base/mask_rcnn_r50_1x.yml create mode 100644 configs/base/mask_rcnn_r50_fpn_1x.yml create mode 100644 configs/base/yolov3_darknet.yml create mode 100644 configs/example/mask_rcnn_r50_1x.yml create mode 100644 configs/example/mask_rcnn_r50_fpn_1x.yml delete mode 100644 configs/faster_reader.yml delete mode 100644 configs/mask_reader.yml create mode 100644 configs/optimize/rcnn.yml create mode 100644 configs/optimize/yolo.yml create mode 100644 configs/reader/faster_rcnn.yml create mode 100644 configs/reader/mask_rcnn.yml create mode 100644 configs/reader/yolo.yml delete mode 100644 configs/yolov3_reader.yml create mode 100644 ppdet/data/loader.py delete mode 100644 ppdet/data/parallel_map.py delete mode 100644 ppdet/data/reader.py create mode 100644 ppdet/data/sampler.py delete mode 100644 ppdet/data/shared_queue/__init__.py delete mode 100644 ppdet/data/shared_queue/queue.py delete mode 100644 ppdet/data/shared_queue/sharedmemory.py delete mode 100644 ppdet/data/source/voc.py delete mode 100644 ppdet/data/source/widerface.py diff --git a/configs/cascade_rcnn_r50_1x.yml b/configs/architechture/cascade_mask_rcnn.yml similarity index 75% rename from configs/cascade_rcnn_r50_1x.yml rename to configs/architechture/cascade_mask_rcnn.yml index e6e1f7d6b..e6cd4d3d1 100644 --- a/configs/cascade_rcnn_r50_1x.yml +++ b/configs/architechture/cascade_mask_rcnn.yml @@ -1,16 +1,3 @@ -architecture: CascadeRCNN -use_gpu: true -max_iters: 180000 -log_smooth_window: 50 -save_dir: output -snapshot_iter: 10000 -pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams -metric: COCO -weights: output/cascade_rcnn_r50_1x/model_final -num_classes: 81 -num_stages: 3 -open_debug: False - # Model Achitecture CascadeRCNN: # model anchor info flow @@ -102,24 +89,3 @@ Mask: resolution: 14 mask_post_process: name: MaskPostProcess - -# Train -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -_READER_: 'mask_reader.yml' diff --git a/configs/faster_rcnn_r50_1x.yml b/configs/architechture/faster_rcnn.yml similarity index 72% rename from configs/faster_rcnn_r50_1x.yml rename to configs/architechture/faster_rcnn.yml index d36b45abd..4aa508277 100644 --- a/configs/faster_rcnn_r50_1x.yml +++ b/configs/architechture/faster_rcnn.yml @@ -1,15 +1,3 @@ -architecture: FasterRCNN -use_gpu: true -max_iters: 180000 -log_smooth_window: 50 -save_dir: output -snapshot_iter: 10000 -pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams -metric: COCO -weights: output/faster_rcnn_r50_1x/model_final -num_classes: 81 -open_debug: False - # Model Achitecture FasterRCNN: # model anchor info flow @@ -84,24 +72,3 @@ Proposal: keep_top_k: 100 score_threshold: 0.05 nms_threshold: 0.5 - -# Train -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -_READER_: 'faster_reader.yml' diff --git a/configs/mask_rcnn_r50_1x.yml b/configs/architechture/mask_rcnn.yml similarity index 66% rename from configs/mask_rcnn_r50_1x.yml rename to configs/architechture/mask_rcnn.yml index 7f089140a..1a2dd9f2f 100644 --- a/configs/mask_rcnn_r50_1x.yml +++ b/configs/architechture/mask_rcnn.yml @@ -1,15 +1,3 @@ -architecture: MaskRCNN -use_gpu: true -max_iters: 180000 -log_smooth_window: 50 -save_dir: output -snapshot_iter: 10000 -pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams -metric: COCO -weights: output/mask_rcnn_r50_1x/model_final -num_classes: 81 -open_debug: False - # Model Achitecture MaskRCNN: # model anchor info flow @@ -23,9 +11,12 @@ MaskRCNN: mask_head: MaskHead ResNet: - norm_type: 'affine' + # index 0 stands for res2 depth: 50 - freeze_at: 'res2' + norm_type: bn + freeze_at: 0 + return_idx: [2] + num_stages: 3 RPNHead: rpn_feat: @@ -41,20 +32,23 @@ BBoxHead: name: RoIExtractor resolution: 14 sampling_ratio: 0 - spatial_scale: 0.0625 - extractor_type: 'RoIAlign' - feat_in: 1024 - feat_out: 512 + start_level: 0 + end_level: 0 + head_feat: + name: Res5Feat + feat_in: 1024 + feat_out: 512 + with_pool: true + in_feat: 2048 MaskHead: mask_feat: name: MaskFeat + num_convs: 0 feat_in: 2048 feat_out: 256 - mask_stages: 1 + share_bbox_feat: true feat_in: 256 - resolution: 14 - mask_stages: 1 AnchorRPN: anchor_generator: @@ -80,7 +74,6 @@ Proposal: train_post_nms_top_n: 2000 infer_pre_nms_top_n: 12000 infer_post_nms_top_n: 2000 - return_rois_num: True proposal_target_generator: name: ProposalTargetGenerator batch_size_per_im: 512 @@ -101,27 +94,7 @@ Proposal: Mask: mask_target_generator: name: MaskTargetGenerator - resolution: 14 + mask_resolution: 14 mask_post_process: name: MaskPostProcess - -# Train -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -_READER_: 'mask_reader.yml' + mask_resolution: 14 diff --git a/configs/mask_rcnn_r50_fpn_1x.yml b/configs/architechture/mask_rcnn_fpn.yml similarity index 75% rename from configs/mask_rcnn_r50_fpn_1x.yml rename to configs/architechture/mask_rcnn_fpn.yml index a1c90e3c0..7b9540546 100644 --- a/configs/mask_rcnn_r50_fpn_1x.yml +++ b/configs/architechture/mask_rcnn_fpn.yml @@ -1,15 +1,3 @@ -architecture: MaskRCNN -use_gpu: true -max_iters: 180000 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 10000 -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -metric: COCO -weights: output/mask_rcnn_r50_fpn_1x/model_final -num_classes: 81 -load_static_weights: True - # Model Achitecture MaskRCNN: # model anchor info flow @@ -38,7 +26,6 @@ FPN: max_level: 4 spatial_scale: [0.25, 0.125, 0.0625, 0.03125] - RPNHead: rpn_feat: name: RPNFeat @@ -55,10 +42,10 @@ BBoxHead: resolution: 7 sampling_ratio: 2 head_feat: - name: TwoFCHead + name: TwoFCFeat in_dim: 256 mlp_dim: 1024 - in_feat: 1024 + #in_feat: 1024 MaskHead: mask_feat: @@ -78,7 +65,7 @@ AnchorRPN: name: AnchorGeneratorRPN aspect_ratios: [0.5, 1.0, 2.0] anchor_start_size: 32 - stride: [4., 4.] + stride: [4.0, 4.0] anchor_target_generator: name: AnchorTargetGeneratorRPN batch_size_per_im: 256 @@ -120,25 +107,3 @@ Mask: mask_post_process: name: MaskPostProcess mask_resolution: 28 - - -# Train -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -_READER_: 'mask_reader.yml' diff --git a/configs/yolov3_darknet.yml b/configs/architechture/yolov3.yml similarity index 58% rename from configs/yolov3_darknet.yml rename to configs/architechture/yolov3.yml index 7a1215def..108893fe6 100644 --- a/configs/yolov3_darknet.yml +++ b/configs/architechture/yolov3.yml @@ -1,16 +1,3 @@ -architecture: YOLOv3 -use_gpu: true -max_iters: 500000 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 10000 -metric: COCO -pretrain_weights: https://paddlemodels.bj.bcebos.com/yolo/darknet53.pdparams -weights: output/yolov3_darknet/model_final -num_classes: 80 -use_fine_grained_loss: false -open_debug: False - YOLOv3: anchor: AnchorYOLO backbone: DarkNet @@ -51,25 +38,3 @@ AnchorYOLO: nms_top_k: 1000 normalized: false background_label: -1 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: - - 400000 - - 450000 - - !LinearWarmup - start_factor: 0. - steps: 4000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -_READER_: 'yolov3_reader.yml' diff --git a/configs/base/cascade_rcnn_r50_1x.yml b/configs/base/cascade_rcnn_r50_1x.yml new file mode 100644 index 000000000..dc863cd8b --- /dev/null +++ b/configs/base/cascade_rcnn_r50_1x.yml @@ -0,0 +1,18 @@ +architecture: CascadeRCNN +num_stages: 3 +pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams +weights: output/cascade_rcnn_r50_1x/model_final +use_gpu: true +epoch: 24 +worker_num: 0 +use_prefetch: False +log_smooth_window: 20 +save_dir: output +metric: COCO +num_classes: 81 +open_debug: False + + +_READER_: '../reader/mask_rcnn.yml' +_ARCHITECHTURE_: '../architechture/cascade_mask_rcnn.yml' +_OPTIMIZE_: '../optimize/rcnn.yml' diff --git a/configs/base/faster_rcnn_r50_1x.yml b/configs/base/faster_rcnn_r50_1x.yml new file mode 100644 index 000000000..716905a72 --- /dev/null +++ b/configs/base/faster_rcnn_r50_1x.yml @@ -0,0 +1,17 @@ +architecture: FasterRCNN +pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams +weights: output/faster_rcnn_r50_1x/model_final +use_gpu: true +worker_num: 0 +use_prefetch: False +epoch: 24 +log_smooth_window: 20 +save_dir: output +metric: COCO +num_classes: 81 +open_debug: False + + +_READER_: '../reader/faster_rcnn.yml' +_ARCHITECHTURE_: '../architechture/faster_rcnn.yml' +_OPTIMIZE_: '../optimize/rcnn.yml' diff --git a/configs/base/mask_rcnn_r50_1x.yml b/configs/base/mask_rcnn_r50_1x.yml new file mode 100644 index 000000000..3a1aee939 --- /dev/null +++ b/configs/base/mask_rcnn_r50_1x.yml @@ -0,0 +1,16 @@ +architecture: MaskRCNN +pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams +weights: output/mask_rcnn_r50_1x/model_final +use_gpu: true +epoch: 24 +use_prefetch: False +worker_num: 0 +log_smooth_window: 20 +save_dir: output +metric: COCO +num_classes: 81 +load_static_weights: true + +_READER_: '../reader/mask_rcnn.yml' +_ARCHITECHTURE_: '../architechture/mask_rcnn.yml' +_OPTIMIZE_: '../optimize/rcnn.yml' diff --git a/configs/base/mask_rcnn_r50_fpn_1x.yml b/configs/base/mask_rcnn_r50_fpn_1x.yml new file mode 100644 index 000000000..beba68b70 --- /dev/null +++ b/configs/base/mask_rcnn_r50_fpn_1x.yml @@ -0,0 +1,16 @@ +architecture: MaskRCNN +pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams +weights: output/mask_rcnn_r50_1x/model_final +use_gpu: true +epoch: 24 +use_prefetch: False +worker_num: 0 +log_smooth_window: 20 +save_dir: output +metric: COCO +num_classes: 81 +load_static_weights: true + +_READER_: '../reader/mask_rcnn.yml' +_ARCHITECHTURE_: '../architechture/mask_rcnn_fpn.yml' +_OPTIMIZE_: '../optimize/rcnn.yml' diff --git a/configs/base/yolov3_darknet.yml b/configs/base/yolov3_darknet.yml new file mode 100644 index 000000000..2473a641e --- /dev/null +++ b/configs/base/yolov3_darknet.yml @@ -0,0 +1,18 @@ +architecture: YOLOv3 +pretrain_weights: https://paddlemodels.bj.bcebos.com/yolo/darknet53.pdparams +weights: output/yolov3_darknet/model_final +use_gpu: true +worker_num: 0 +use_prefetch: False +epoch: 300 +log_smooth_window: 20 +save_dir: output +metric: COCO +num_classes: 80 +use_fine_grained_loss: false +open_debug: False + + +_READER_: '../reader/yolo.yml' +_ARCHITECHTURE_: '../architechture/yolov3.yml' +_OPTIMIZE_: '../optimize/yolo.yml' diff --git a/configs/example/mask_rcnn_r50_1x.yml b/configs/example/mask_rcnn_r50_1x.yml new file mode 100644 index 000000000..341c927fa --- /dev/null +++ b/configs/example/mask_rcnn_r50_1x.yml @@ -0,0 +1,16 @@ +_BASE_: "configs/base/mask_rcnn_r50_1x.yml" + +use_gpu: true +worker_num: 0 +epoch: 24 +log_smooth_window: 20 +save_dir: output +metric: COCO +num_classes: 81 + +TrainReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly'] + dataset: + name: COCODataset + dataset_dir: /home/ai/dataset/COCO17 diff --git a/configs/example/mask_rcnn_r50_fpn_1x.yml b/configs/example/mask_rcnn_r50_fpn_1x.yml new file mode 100644 index 000000000..1339aec8c --- /dev/null +++ b/configs/example/mask_rcnn_r50_fpn_1x.yml @@ -0,0 +1,13 @@ +_BASE_: "configs/base/mask_rcnn_r50_fpn_1x.yml" + +use_gpu: true +worker_num: 0 +epoch: 24 +log_smooth_window: 20 +save_dir: output +metric: COCO +num_classes: 81 +weights: output/mask_r50_fpn_1x/model_final.pdparams +EvalReader: + dataset: + dataset_dir: /home/ai/dataset/COCO17 diff --git a/configs/faster_reader.yml b/configs/faster_reader.yml deleted file mode 100644 index e31610685..000000000 --- a/configs/faster_reader.yml +++ /dev/null @@ -1,95 +0,0 @@ -TrainReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'] - dataset: - !COCODataSet - image_dir: train2017 - anno_path: annotations/instances_train2017.json - dataset_dir: dataset/coco - sample_transforms: - - !DecodeImage - to_rgb: True - - !RandomFlipImage - prob: 0.5 - - !NormalizeImage - is_channel_first: false - is_scale: true - mean: [0.485,0.456,0.406] - std: [0.229, 0.224,0.225] - - !ResizeImage - target_size: 800 - max_size: 1333 - interp: 1 - use_cv2: true - - !Permute - to_bgr: false - channel_first: true - batch_transforms: - - !PadBatch - pad_to_stride: 0 - use_padded_im_info: False - pad_gt: true - batch_size: 1 - shuffle: true - worker_num: 2 - use_process: false - -EvalReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'im_shape'] - dataset: - !COCODataSet - image_dir: val2017 - anno_path: annotations/instances_val2017.json - dataset_dir: dataset/coco - sample_transforms: - - !DecodeImage - to_rgb: true - - !NormalizeImage - is_channel_first: false - is_scale: true - mean: [0.485,0.456,0.406] - std: [0.229, 0.224,0.225] - - !ResizeImage - interp: 1 - max_size: 1333 - target_size: 800 - use_cv2: true - - !Permute - channel_first: true - to_bgr: false - batch_transforms: - - !PadBatch - pad_to_stride: 32 - use_padded_im_info: false - pad_gt: True - batch_size: 2 - shuffle: false - drop_empty: false - worker_num: 2 - -TestReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'im_shape'] - dataset: - !ImageFolder - anno_path: annotations/instances_val2017.json - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeImage - is_channel_first: false - is_scale: true - mean: [0.485,0.456,0.406] - std: [0.229, 0.224,0.225] - - !ResizeImage - interp: 1 - max_size: 1333 - target_size: 800 - use_cv2: true - - !Permute - channel_first: true - to_bgr: false - batch_size: 1 - shuffle: false diff --git a/configs/mask_reader.yml b/configs/mask_reader.yml deleted file mode 100644 index c5c486965..000000000 --- a/configs/mask_reader.yml +++ /dev/null @@ -1,99 +0,0 @@ -TrainReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_mask'] - dataset: - !COCODataSet - image_dir: train2017 - anno_path: annotations/instances_train2017.json - dataset_dir: dataset/coco - sample_transforms: - - !DecodeImage - to_rgb: true - - !RandomFlipImage - prob: 0.5 - is_mask_flip: true - - !NormalizeImage - is_channel_first: false - is_scale: true - mean: [0.485,0.456,0.406] - std: [0.229, 0.224,0.225] - - !ResizeImage - target_size: 800 - max_size: 1333 - interp: 1 - use_cv2: true - - !Permute - to_bgr: false - channel_first: true - batch_transforms: - - !PadBatch - pad_to_stride: 32 - use_padded_im_info: false - pad_gt: True - batch_size: 1 - shuffle: true - worker_num: 2 - drop_last: false - use_process: false - -EvalReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'im_shape'] - dataset: - !COCODataSet - image_dir: val2017 - anno_path: annotations/instances_val2017.json - dataset_dir: dataset/coco - sample_transforms: - - !DecodeImage - to_rgb: true - - !NormalizeImage - is_channel_first: false - is_scale: true - mean: [0.485,0.456,0.406] - std: [0.229, 0.224,0.225] - - !ResizeImage - interp: 1 - max_size: 1333 - target_size: 800 - use_cv2: true - - !Permute - channel_first: true - to_bgr: false - batch_transforms: - - !PadBatch - pad_to_stride: 32 - use_padded_im_info: false - pad_gt: True - batch_size: 1 - shuffle: false - drop_last: false - drop_empty: false - worker_num: 2 - -TestReader: - inputs_def: - fields: ['image', 'im_info', 'im_id', 'im_shape'] - dataset: - !ImageFolder - anno_path: annotations/instances_val2017.json - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeImage - is_channel_first: false - is_scale: true - mean: [0.485,0.456,0.406] - std: [0.229, 0.224,0.225] - - !ResizeImage - interp: 1 - max_size: 1333 - target_size: 800 - use_cv2: true - - !Permute - channel_first: true - to_bgr: false - batch_size: 1 - shuffle: false - drop_last: false diff --git a/configs/optimize/rcnn.yml b/configs/optimize/rcnn.yml new file mode 100644 index 000000000..afec45906 --- /dev/null +++ b/configs/optimize/rcnn.yml @@ -0,0 +1,20 @@ +Optimize: + learning_rate: + name: BaseLR + base_lr: 0.01 + decay: + name: PiecewiseDecay + gamma: 0.1 + milestones: [16, 22] + warmup: + name: LinearWarmup + start_factor: 0.3333333333333333 + steps: 500 + + optimizer: + name: Momentum + momentum: 0.9 + + regularizer: + name: L2 + factor: 0.0001 diff --git a/configs/optimize/yolo.yml b/configs/optimize/yolo.yml new file mode 100644 index 000000000..a7bdccfed --- /dev/null +++ b/configs/optimize/yolo.yml @@ -0,0 +1,20 @@ +Optimize: + learning_rate: + name: BaseLR + base_lr: 0.01 + decay: + name: PiecewiseDecay + gamma: 0.1 + milestones: [200, 250] + warmup: + name: LinearWarmup + start_factor: 0. + steps: 4000 + + optimizer: + name: Momentum + momentum: 0.9 + + regularizer: + name: L2 + factor: 0.0005 diff --git a/configs/reader/faster_rcnn.yml b/configs/reader/faster_rcnn.yml new file mode 100644 index 000000000..4407940da --- /dev/null +++ b/configs/reader/faster_rcnn.yml @@ -0,0 +1,52 @@ +TrainReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'] + dataset: + name: COCODataset + dataset_dir: /home/ai/dataset/COCO17/ + image_dir: train2017 + anno_path: annotations/instances_train2017.json + sample_transforms: + - DecodeImage: {to_rgb: true} + - RandomFlipImage: {prob: 0.5} + - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} + - Permute: {to_bgr: false, channel_first: true} + batch_transforms: + - PadBatch: {pad_to_stride: 0, use_padded_im_info: false, pad_gt: True} + batch_size: 1 + shuffle: true + +EvalReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'im_shape'] + dataset: + name: COCODataset + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: /home/ai/dataset/COCO17 + sample_transforms: + - DecodeImage: {to_rgb: true} + - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - ResizeImage: {interp: 1, max_size: 1333, target_size: 800, use_cv2: true} + - Permute: {channel_first: true, to_bgr: false} + batch_transforms: + - PadBatch: {pad_to_stride: 0, use_padded_im_info: false, pad_gt: True} + batch_size: 2 + shuffle: false + drop_empty: false + +TestReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'im_shape'] + dataset: + name: ImageFolder + anno_path: annotations/instances_val2017.json + sample_transforms: + - DecodeImage: {to_rgb: true, with_mixup: false} + - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - ResizeImage: {interp: 1, max_size: 1333, target_size: 800, use_cv2: true} + - Permute: {channel_first: true, to_bgr: false} + batch_size: 1 + shuffle: false + drop_last: false diff --git a/configs/reader/mask_rcnn.yml b/configs/reader/mask_rcnn.yml new file mode 100644 index 000000000..690e2e54c --- /dev/null +++ b/configs/reader/mask_rcnn.yml @@ -0,0 +1,59 @@ +worker_num: 0 +use_prefetch: False + +TrainReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly'] + dataset: + name: COCODataset + dataset_dir: dataset/coco + image_dir: train2017 + anno_path: annotations/instances_train2017.json + sample_transforms: + - DecodeImage: {to_rgb: true} + - RandomFlipImage: {prob: 0.5, is_mask_flip: true} + - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} + - Permute: {to_bgr: false, channel_first: true} + batch_transforms: + - PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: True} + batch_size: 1 + shuffle: true + drop_last: false + + +EvalReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'im_shape'] + dataset: + name: COCODataset + dataset_dir: dataset/coco + image_dir: val2017 + anno_path: annotations/instances_val2017.json + sample_transforms: + - DecodeImage: {to_rgb: true} + - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - ResizeImage: {interp: 1, max_size: 1333, target_size: 800, use_cv2: true} + - Permute: {channel_first: true, to_bgr: false} + batch_transforms: + - PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: True} + batch_size: 1 + shuffle: false + drop_last: false + drop_empty: false + + +TestReader: + inputs_def: + fields: ['image', 'im_info', 'im_id', 'im_shape'] + dataset: + name: ImageFolder + anno_path: annotations/instances_val2017.json + sample_transforms: + - DecodeImage: {to_rgb: true, with_mixup: false} + - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - ResizeImage: {interp: 1, max_size: 1333, target_size: 800, use_cv2: true} + - Permute: {channel_first: true, to_bgr: false} + batch_size: 1 + shuffle: false + drop_last: false diff --git a/configs/reader/yolo.yml b/configs/reader/yolo.yml new file mode 100644 index 000000000..cc0bd95a8 --- /dev/null +++ b/configs/reader/yolo.yml @@ -0,0 +1,74 @@ +worker_num: 0 +use_prefetch: False + +TrainReader: + inputs_def: + fields: ['image', 'gt_bbox', 'gt_class', 'gt_score'] + num_max_boxes: 50 + dataset: + name: COCODataset + dataset_dir: dataset/coco + image_dir: train2017 + anno_path: annotations/instances_train2017.json + with_background: false + sample_transforms: + - DecodeImage: {to_rgb: True, with_mixup: True} + - MixupImage: {alpha: 1.5, beta: 1.5} + - ColorDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomCrop: {} + - RandomFlipImage: {is_normalized: false} + - NormalizeBox: {} + - PadBox: {num_max_boxes: 50} + - BboxXYXY2XYWH: {} + batch_transforms: + - RandomShape: {sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_inter: True} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True, is_channel_first: false} + - Permute: {to_bgr: false, channel_first: True} + # Gt2YoloTarget is only used when use_fine_grained_loss set as true, + # this operator will be deleted automatically if use_fine_grained_loss + # is set as false + - Gt2YoloTarget: { + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]], + downsample_ratios: [32, 16, 8]} + batch_size: 8 + shuffle: true + drop_last: true + + +EvalReader: + inputs_def: + fields: ['image', 'im_size', 'im_id'] + num_max_boxes: 50 + dataset: + name: COCODataset + dataset_dir: dataset/coco + image_dir: val2017 + anno_path: annotations/instances_val2017.json + with_background: false + sample_transforms: + - DecodeImage: {to_rgb: True} + - ResizeImage: {target_size: 608, interp: 2} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True, is_channel_first: false} + - PadBox: {num_max_boxes: 50} + - Permute: {to_bgr: false, channel_first: True} + batch_size: 8 + drop_empty: false + +TestReader: + inputs_def: + image_shape: [3, 608, 608] + fields: ['image', 'im_size', 'im_id'] + dataset: + name: ImageFolder + anno_path: annotations/instances_val2017.json + with_background: false + sample_transforms: + - DecodeImage: {to_rgb: True} + - ResizeImage: {target_size: 608, interp: 2} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True, is_channel_first: false} + - Permute: {to_bgr: false, channel_first: True} + batch_size: 1 diff --git a/configs/yolov3_reader.yml b/configs/yolov3_reader.yml deleted file mode 100644 index 2a8463f1e..000000000 --- a/configs/yolov3_reader.yml +++ /dev/null @@ -1,111 +0,0 @@ -TrainReader: - inputs_def: - fields: ['image', 'gt_bbox', 'gt_class', 'gt_score'] - num_max_boxes: 50 - dataset: - !COCODataSet - image_dir: train2017 - anno_path: annotations/instances_train2017.json - dataset_dir: dataset/coco - with_background: false - sample_transforms: - - !DecodeImage - to_rgb: True - with_mixup: True - - !MixupImage - alpha: 1.5 - beta: 1.5 - - !ColorDistort {} - - !RandomExpand - fill_value: [123.675, 116.28, 103.53] - - !RandomCrop {} - - !RandomFlipImage - is_normalized: false - - !NormalizeBox {} - - !PadBox - num_max_boxes: 50 - - !BboxXYXY2XYWH {} - batch_transforms: - - !RandomShape - sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608] - random_inter: True - - !NormalizeImage - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - is_scale: True - is_channel_first: false - - !Permute - to_bgr: false - channel_first: True - # Gt2YoloTarget is only used when use_fine_grained_loss set as true, - # this operator will be deleted automatically if use_fine_grained_loss - # is set as false - - !Gt2YoloTarget - anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchors: [[10, 13], [16, 30], [33, 23], - [30, 61], [62, 45], [59, 119], - [116, 90], [156, 198], [373, 326]] - downsample_ratios: [32, 16, 8] - batch_size: 8 - shuffle: true - mixup_epoch: 250 - drop_last: true - worker_num: 8 - bufsize: 16 - use_process: true - - -EvalReader: - inputs_def: - fields: ['image', 'im_size', 'im_id'] - num_max_boxes: 50 - dataset: - !COCODataSet - image_dir: val2017 - anno_path: annotations/instances_val2017.json - dataset_dir: dataset/coco - with_background: false - sample_transforms: - - !DecodeImage - to_rgb: True - - !ResizeImage - target_size: 608 - interp: 2 - - !NormalizeImage - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - is_scale: True - is_channel_first: false - - !PadBox - num_max_boxes: 50 - - !Permute - to_bgr: false - channel_first: True - batch_size: 8 - drop_empty: false - worker_num: 8 - bufsize: 16 - -TestReader: - inputs_def: - image_shape: [3, 608, 608] - fields: ['image', 'im_size', 'im_id'] - dataset: - !ImageFolder - anno_path: annotations/instances_val2017.json - with_background: false - sample_transforms: - - !DecodeImage - to_rgb: True - - !ResizeImage - target_size: 608 - interp: 2 - - !NormalizeImage - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - is_scale: True - is_channel_first: false - - !Permute - to_bgr: false - channel_first: True - batch_size: 1 diff --git a/ppdet/data/__init__.py b/ppdet/data/__init__.py index 1a6576e78..0ebe96551 100644 --- a/ppdet/data/__init__.py +++ b/ppdet/data/__init__.py @@ -1,19 +1,4 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -from .reader import * from .source import * from .transform import * +from .sampler import * +from .loader import * diff --git a/ppdet/data/loader.py b/ppdet/data/loader.py new file mode 100644 index 000000000..ab74a479d --- /dev/null +++ b/ppdet/data/loader.py @@ -0,0 +1,235 @@ +import copy +import traceback +import logging +import threading +import sys +if sys.version_info >= (3, 0): + import queue as Queue +else: + import Queue +import numpy as np +from paddle.io import DataLoader +from ppdet.core.workspace import register, serializable, create +from .sampler import DistributedBatchSampler +from .transform import operators +from .transform import batch_operators + +logger = logging.getLogger(__name__) + + +class Compose(object): + def __init__(self, transforms, fields=None, from_=operators, + num_classes=81): + self.transforms = transforms + self.transforms_cls = [] + for t in self.transforms: + for k, v in t.items(): + print(k, v) + op_cls = getattr(from_, k) + self.transforms_cls.append(op_cls(**v)) + if hasattr(op_cls, 'num_classes'): + op_cls.num_classes = num_classes + + self.fields = fields + + def __call__(self, data): + if self.fields is not None: + data_new = [] + for item in data: + data_new.append(dict(zip(self.fields, item))) + data = data_new + + for f in self.transforms_cls: + try: + data = f(data) + except Exception as e: + stack_info = traceback.format_exc() + logger.warn("fail to map op [{}] with error: {} and stack:\n{}". + format(f, e, str(stack_info))) + raise e + + if self.fields is not None: + data_new = [] + for item in data: + batch = [] + for k in self.fields: + batch.append(item[k]) + data_new.append(batch) + batch_size = len(data_new) + data_new = list(zip(*data_new)) + if batch_size > 1: + data = [ + np.array(item).astype(item[0].dtype) for item in data_new + ] + else: + data = data_new + + return data + + +class Prefetcher(threading.Thread): + def __init__(self, iterator, prefetch_num=1): + threading.Thread.__init__(self) + self.queue = Queue.Queue(prefetch_num) + self.iterator = iterator + self.daemon = True + self.start() + + def run(self): + for item in self.iterator: + self.queue.put(item) + self.queue.put(None) + + def next(self): + next_item = self.queue.get() + if next_item is None: + raise StopIteration + return next_item + + # Python 3 compatibility + def __next__(self): + return self.next() + + def __iter__(self): + return self + + +class DataLoaderPrefetch(DataLoader): + def __init__(self, + dataset, + batch_sampler, + collate_fn, + num_workers, + places, + return_list, + prefetch_num=1): + super(DataLoaderPrefetch, self).__init__( + dataset=dataset, + batch_sampler=batch_sampler, + collate_fn=collate_fn, + num_workers=num_workers, + places=places, + return_list=return_list) + self.prefetch_num = prefetch_num + + def __iter__(self): + return Prefetcher(super().__iter__(), self.prefetch_num) + + +class BaseDataLoader(object): + __share__ = ['num_classes'] + __inject__ = ['dataset'] + + def __init__(self, + inputs_def=None, + dataset=None, + sample_transforms=None, + batch_transforms=None, + batch_size=1, + shuffle=False, + drop_last=False, + drop_empty=True, + num_classes=81): + # dataset + self._dataset = dataset #create(dataset['name']) + self._dataset.parse_dataset() + # out fields + self._fields = copy.deepcopy(inputs_def[ + 'fields']) if inputs_def else None + # sample transform + self._sample_transforms = Compose( + sample_transforms, num_classes=num_classes) + # get data + self._dataset.set_out(self._sample_transforms, self._fields) + + # batch transfrom + if batch_transforms: + self._batch_transforms = Compose(batch_transforms, self._fields, + batch_operators, num_classes) + + # batch sampler + self._batch_sampler = DistributedBatchSampler( + self._dataset, + batch_size=batch_size, + shuffle=shuffle, + drop_last=drop_last) + + self.batch_size = batch_size + + def __call__(self, + worker_num, + device, + return_list=False, + use_prefetch=False, + prefetch_num=None): + if use_prefetch: + loader = DataLoaderPrefetch( + dataset=self._dataset, + batch_sampler=self._batch_sampler, + collate_fn=self._batch_transforms, + num_workers=worker_num, + places=device, + return_list=return_list, + prefetch_num=prefetch_num + if prefetch_num is not None else self.batch_size) + else: + loader = DataLoader( + dataset=self._dataset, + batch_sampler=self._batch_sampler, + collate_fn=self._batch_transforms, + num_workers=worker_num, + places=device, + return_list=return_list) + + return loader, len(self._batch_sampler) + + +@register +class TrainReader(BaseDataLoader): + def __init__(self, + inputs_def=None, + dataset=None, + sample_transforms=None, + batch_transforms=None, + batch_size=1, + shuffle=False, + drop_last=False, + drop_empty=True, + num_classes=81): + super(TrainReader, self).__init__( + inputs_def, dataset, sample_transforms, batch_transforms, + batch_size, shuffle, drop_last, drop_empty, num_classes) + + +@register +class EvalReader(BaseDataLoader): + def __init__(self, + inputs_def=None, + dataset=None, + sample_transforms=None, + batch_transforms=None, + batch_size=1, + shuffle=False, + drop_last=False, + drop_empty=True, + num_classes=81): + super(EvalReader, self).__init__(inputs_def, dataset, sample_transforms, + batch_transforms, batch_size, shuffle, + drop_last, drop_empty, num_classes) + + +@register +class TestReader(BaseDataLoader): + def __init__(self, + inputs_def=None, + dataset=None, + sample_transforms=None, + batch_transforms=None, + batch_size=1, + shuffle=False, + drop_last=False, + drop_empty=True, + num_classes=81): + super(TestReader, self).__init__(inputs_def, dataset, sample_transforms, + batch_transforms, batch_size, shuffle, + drop_last, drop_empty, num_classes) diff --git a/ppdet/data/parallel_map.py b/ppdet/data/parallel_map.py deleted file mode 100644 index 789fda1f2..000000000 --- a/ppdet/data/parallel_map.py +++ /dev/null @@ -1,311 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# function: -# transform samples in 'source' using 'worker' - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import six -if six.PY3: - from queue import Empty -else: - from Queue import Empty - -import uuid -import logging -import signal -import threading -import traceback - -logger = logging.getLogger(__name__) - -main_pid = os.getpid() -worker_set = set() - - -class EndSignal(object): - """ signal used to notify worker to exit - """ - - def __init__(self, id, errno=0, errmsg=''): - self.id = id - self.errno = errno - self.errmsg = errmsg - - -class ParallelMap(object): - """ - Transform samples to mapped samples which is similar to - 'basic.MappedDataset', but multiple workers (threads or processes) - will be used - - Notes: - this class is not thread-safe - """ - - def __init__(self, - source, - worker, - worker_num, - bufsize=100, - use_process=False, - memsize='3G'): - self._worker_num = worker_num - self._bufsize = bufsize - self._use_process = use_process - if self._use_process and sys.platform == "win32": - logger.debug("Use multi-thread reader instead of " - "multi-process reader on Windows.") - self._use_process = False - if self._use_process and type(memsize) is str: - assert memsize[-1].lower() in ['g', 'm'], \ - "invalid param for memsize[%s], should be " \ - "ended with 'G' or 'g' or 'M' or 'm'" % (memsize) - power = 3 if memsize[-1].lower() == 'g' else 2 - self._memsize = int(memsize[:-1]) * (1024**power) - self._started = False - self._source = source - self._worker = worker - self._exit = False - self._setup() - self._souce_drained = False - - def __iter__(self): - return self - - def __next__(self): - return self.next() - - def _setup(self): - """setup input/output queues and workers """ - use_process = self._use_process - - bufsize = self._bufsize - if use_process: - from .shared_queue import SharedQueue as Queue - from multiprocessing import Process as Worker - from multiprocessing import Event - memsize = self._memsize - self._inq = Queue(bufsize, memsize=memsize) - self._outq = Queue(bufsize, memsize=memsize) - else: - if six.PY3: - from queue import Queue - else: - from Queue import Queue - from threading import Thread as Worker - from threading import Event - self._inq = Queue(bufsize) - self._outq = Queue(bufsize) - - consumer_num = self._worker_num - id = str(uuid.uuid4())[-3:] - self._producer = threading.Thread( - target=self._produce, - args=('producer-' + id, self._source, self._inq)) - self._producer.daemon = True - - self._consumers = [] - self._consumer_endsig = {} - global worker_set - for i in range(consumer_num): - consumer_id = 'consumer-' + id + '-' + str(i) - p = Worker( - target=self._consume, - args=(consumer_id, self._inq, self._outq, self._worker)) - self._consumers.append(p) - p.daemon = True - setattr(p, 'id', consumer_id) - if use_process: - worker_set.add(p) - - self._epoch = -1 - self._feeding_ev = Event() - self._produced = 0 # produced sample in self._produce - self._consumed = 0 # consumed sample in self.next - - def _produce(self, id, source, inq): - """Fetch data from source and feed it to 'inq' queue""" - endsig = EndSignal(id) - while True: - self._feeding_ev.wait() - if self._exit: - break - try: - s = source.next() - inq.put(s) - self._produced += 1 - except StopIteration: - self._souce_drained = True - self._feeding_ev.clear() - self._feeding_ev.wait() - except Exception as e: - endsig.errno = -1 - endsig.errmsg = "producer[{}] failed with error: {}" \ - .format(id, str(e)) - inq.put(endsig) - break - - def _consume(self, id, inq, outq, worker): - """Fetch data from 'inq', process it and put result to 'outq'""" - if self._use_process: - # handle SIGTERM signal to exit to prevent print stack frame - signal.signal(signal.SIGTERM, lambda signum, frame: sys.exit()) - - endsig = EndSignal(id) - while True: - sample = inq.get() - if isinstance(sample, EndSignal): - endsig.errno = sample.errno - endsig.errmsg = "consumer[{}] exits for reason[{}]" \ - .format(id, sample.errmsg) - outq.put(endsig) - break - - try: - result = worker(sample) - outq.put(result) - except Exception as e: - endsig.errno = -2 - endsig.errmsg = "consumer[{}] failed to map with error:[{}]" \ - .format(id, str(e)) - outq.put(endsig) - break - - def drained(self): - assert self._epoch >= 0, "first epoch has not started yet" - return self._source.drained() and self._produced == self._consumed - - def stop(self): - """ notify to exit - """ - self._exit = True - self._feeding_ev.set() - for _ in range(len(self._consumers)): - self._inq.put(EndSignal(0, "notify consumers to exit")) - - def _consumer_healthy(self): - abnormal_num = 0 - for w in self._consumers: - if not w.is_alive() and w.id not in self._consumer_endsig: - abnormal_num += 1 - if self._use_process: - errmsg = "consumer[{}] exit abnormally with exitcode[{}]" \ - .format(w.pid, w.exitcode) - else: - errmsg = "consumer[{}] exit abnormally".format(w.ident) - - logger.warn(errmsg) - - if abnormal_num > 0: - logger.warn("{} consumers have exited abnormally!!!" \ - .format(abnormal_num)) - - return abnormal_num == 0 - - def next(self): - """ get next transformed sample - """ - if self._epoch < 0: - self.reset() - - if self.drained(): - raise StopIteration() - - while not self._exit: - try: - sample = self._outq.get(timeout=3) - except Empty as e: - if not self._consumer_healthy(): - raise StopIteration() - else: - continue - - if isinstance(sample, EndSignal): - self._consumer_endsig[sample.id] = sample - logger.warn("recv endsignal from outq with errmsg[{}]" \ - .format(sample.errmsg)) - - if len(self._consumer_endsig.keys()) < len(self._consumers): - self._inq.put(sample) - else: - self._exit = True - raise StopIteration("all consumers exited, no more samples") - else: - self._consumed += 1 - return sample - - raise StopIteration() - - def reset(self): - """ reset for a new epoch of samples - """ - assert not self._exit, "cannot reset for already stopped dataset" - - if self._epoch < 0: - self._epoch = 0 - for w in self._consumers: - w.start() - self._producer.start() - else: - assert self._consumer_healthy(), "cannot start another pass of data" \ - " for some consumers exited abnormally before!!!" - - if not self.drained(): - logger.warn("reset before epoch[{}] finishes".format( - self._epoch)) - self._produced = self._produced - self._consumed - else: - self._produced = 0 - - self._epoch += 1 - - assert len(self._consumer_endsig.keys()) == 0, "some consumers already exited," \ - + " cannot start another epoch" - - self._source.reset() - self._souce_drained = False - self._consumed = 0 - self._feeding_ev.set() - - -# FIXME: fix me if you have better impliment -# handle terminate reader process, do not print stack frame -signal.signal(signal.SIGTERM, lambda signum, frame: sys.exit()) - - -# FIXME(dkp): KeyboardInterrupt should be handled inside ParallelMap -# and do such as: 1. exit workers 2. close queues 3. release shared -# memory, HACK KeyboardInterrupt with global signal.SIGINT handler -# here, should be refined later -def _term_workers(sig_num, frame): - global worker_set, main_pid - # only do subporcess killing in main process - if os.getpid() != main_pid: - return - - logger.info("KeyboardInterrupt: main proc {} exit, kill subprocess {}" \ - .format(os.getpid(), [w.pid for w in worker_set])) - for w in worker_set: - if w.pid is not None: - os.kill(w.pid, signal.SIGINT) - sys.exit() - - -signal.signal(signal.SIGINT, _term_workers) diff --git a/ppdet/data/reader.py b/ppdet/data/reader.py deleted file mode 100644 index 7d808b589..000000000 --- a/ppdet/data/reader.py +++ /dev/null @@ -1,449 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import copy -import functools -import collections -import traceback -import numpy as np -import logging - -from ppdet.core.workspace import register, serializable - -from .parallel_map import ParallelMap -from .transform.batch_operators import Gt2YoloTarget - -__all__ = ['Reader', 'create_reader'] - -logger = logging.getLogger(__name__) - - -class Compose(object): - def __init__(self, transforms, ctx=None): - self.transforms = transforms - self.ctx = ctx - - def __call__(self, data): - ctx = self.ctx if self.ctx else {} - for f in self.transforms: - try: - data = f(data, ctx) - except Exception as e: - stack_info = traceback.format_exc() - logger.warn("fail to map op [{}] with error: {} and stack:\n{}". - format(f, e, str(stack_info))) - raise e - return data - - -def _calc_img_weights(roidbs): - """ calculate the probabilities of each sample - """ - imgs_cls = [] - num_per_cls = {} - img_weights = [] - for i, roidb in enumerate(roidbs): - img_cls = set([k for cls in roidbs[i]['gt_class'] for k in cls]) - imgs_cls.append(img_cls) - for c in img_cls: - if c not in num_per_cls: - num_per_cls[c] = 1 - else: - num_per_cls[c] += 1 - - for i in range(len(roidbs)): - weights = 0 - for c in imgs_cls[i]: - weights += 1 / num_per_cls[c] - img_weights.append(weights) - # probabilities sum to 1 - img_weights = img_weights / np.sum(img_weights) - return img_weights - - -def _has_empty(item): - def empty(x): - if isinstance(x, np.ndarray) and x.size == 0: - return True - elif isinstance(x, collections.Sequence) and len(x) == 0: - return True - else: - return False - - if isinstance(item, collections.Sequence) and len(item) == 0: - return True - if item is None: - return True - if empty(item): - return True - return False - - -def _segm(samples): - assert 'gt_poly' in samples - segms = samples['gt_poly'] - if 'is_crowd' in samples: - is_crowd = samples['is_crowd'] - if len(segms) != 0: - assert len(segms) == is_crowd.shape[0] - - gt_masks = [] - valid = True - for i in range(len(segms)): - segm = segms[i] - gt_segm = [] - if 'is_crowd' in samples and is_crowd[i]: - gt_segm.append([[0, 0]]) - else: - for poly in segm: - if len(poly) == 0: - valid = False - break - gt_segm.append(np.array(poly).reshape(-1, 2)) - if (not valid) or len(gt_segm) == 0: - break - gt_masks.append(gt_segm) - return gt_masks - - -def batch_arrange(batch_samples, fields): - def im_shape(samples, dim=3): - # hard code - assert 'h' in samples - assert 'w' in samples - if dim == 3: # RCNN, .. - return np.array((samples['h'], samples['w'], 1), dtype=np.float32) - else: # YOLOv3, .. - return np.array((samples['h'], samples['w']), dtype=np.int32) - - arrange_batch = [] - for samples in batch_samples: - one_ins = () - for i, field in enumerate(fields): - if field == 'gt_mask': - one_ins += (_segm(samples), ) - elif field == 'im_shape': - one_ins += (im_shape(samples), ) - elif field == 'im_size': - one_ins += (im_shape(samples, 2), ) - else: - if field == 'is_difficult': - field = 'difficult' - assert field in samples, '{} not in samples'.format(field) - one_ins += (samples[field], ) - arrange_batch.append(one_ins) - return arrange_batch - - -@register -@serializable -class Reader(object): - """ - Args: - dataset (DataSet): DataSet object - sample_transforms (list of BaseOperator): a list of sample transforms - operators. - batch_transforms (list of BaseOperator): a list of batch transforms - operators. - batch_size (int): batch size. - shuffle (bool): whether shuffle dataset or not. Default False. - drop_last (bool): whether drop last batch or not. Default False. - drop_empty (bool): whether drop sample when it's gt is empty or not. - Default True. - mixup_epoch (int): mixup epoc number. Default is -1, meaning - not use mixup. - cutmix_epoch (int): cutmix epoc number. Default is -1, meaning - not use cutmix. - class_aware_sampling (bool): whether use class-aware sampling or not. - Default False. - worker_num (int): number of working threads/processes. - Default -1, meaning not use multi-threads/multi-processes. - use_process (bool): whether use multi-processes or not. - It only works when worker_num > 1. Default False. - bufsize (int): buffer size for multi-threads/multi-processes, - please note, one instance in buffer is one batch data. - memsize (str): size of shared memory used in result queue when - use_process is true. Default 3G. - inputs_def (dict): network input definition use to get input fields, - which is used to determine the order of returned data. - devices_num (int): number of devices. - """ - - def __init__(self, - dataset=None, - sample_transforms=None, - batch_transforms=None, - batch_size=None, - shuffle=False, - drop_last=False, - drop_empty=True, - mixup_epoch=-1, - cutmix_epoch=-1, - class_aware_sampling=False, - worker_num=-1, - use_process=False, - use_fine_grained_loss=False, - num_classes=80, - bufsize=-1, - memsize='3G', - inputs_def=None, - devices_num=1): - self._dataset = dataset - self._roidbs = self._dataset.get_roidb() - self._fields = copy.deepcopy(inputs_def[ - 'fields']) if inputs_def else None - - # transform - self._sample_transforms = Compose(sample_transforms, - {'fields': self._fields}) - self._batch_transforms = None - - if use_fine_grained_loss: - for bt in batch_transforms: - if isinstance(bt, Gt2YoloTarget): - bt.num_classes = num_classes - elif batch_transforms: - batch_transforms = [ - bt for bt in batch_transforms - if not isinstance(bt, Gt2YoloTarget) - ] - - if batch_transforms: - self._batch_transforms = Compose(batch_transforms, - {'fields': self._fields}) - - # data - if inputs_def and inputs_def.get('multi_scale', False): - from ppdet.modeling.architectures.input_helper import multiscale_def - im_shape = inputs_def[ - 'image_shape'] if 'image_shape' in inputs_def else [ - 3, None, None - ] - _, ms_fields = multiscale_def(im_shape, inputs_def['num_scales'], - inputs_def['use_flip']) - self._fields += ms_fields - self._batch_size = batch_size - self._shuffle = shuffle - self._drop_last = drop_last - self._drop_empty = drop_empty - - # sampling - self._mixup_epoch = mixup_epoch - self._cutmix_epoch = cutmix_epoch - self._class_aware_sampling = class_aware_sampling - - self._load_img = False - self._sample_num = len(self._roidbs) - - if self._class_aware_sampling: - self.img_weights = _calc_img_weights(self._roidbs) - self._indexes = None - - self._pos = -1 - self._epoch = -1 - - self._curr_iter = 0 - - # multi-process - self._worker_num = worker_num - self._parallel = None - if self._worker_num > -1: - task = functools.partial(self.worker, self._drop_empty) - bufsize = devices_num * 2 if bufsize == -1 else bufsize - self._parallel = ParallelMap(self, task, worker_num, bufsize, - use_process, memsize) - - def __call__(self): - if self._worker_num > -1: - return self._parallel - else: - return self - - def __iter__(self): - return self - - def reset(self): - """implementation of Dataset.reset - """ - if self._epoch < 0: - self._epoch = 0 - else: - self._epoch += 1 - - self.indexes = [i for i in range(self.size())] - if self._class_aware_sampling: - self.indexes = np.random.choice( - self._sample_num, - self._sample_num, - replace=True, - p=self.img_weights) - - if self._shuffle: - trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) - np.random.seed(self._epoch + trainer_id) - np.random.shuffle(self.indexes) - - if self._mixup_epoch > 0 and len(self.indexes) < 2: - logger.debug("Disable mixup for dataset samples " - "less than 2 samples") - self._mixup_epoch = -1 - if self._cutmix_epoch > 0 and len(self.indexes) < 2: - logger.info("Disable cutmix for dataset samples " - "less than 2 samples") - self._cutmix_epoch = -1 - - self._pos = 0 - - def __next__(self): - return self.next() - - def next(self): - if self._epoch < 0: - self.reset() - if self.drained(): - raise StopIteration - batch = self._load_batch() - self._curr_iter += 1 - if self._drop_last and len(batch) < self._batch_size: - raise StopIteration - if self._worker_num > -1: - return batch - else: - return self.worker(self._drop_empty, batch) - - def _load_batch(self): - batch = [] - bs = 0 - while bs != self._batch_size: - if self._pos >= self.size(): - break - pos = self.indexes[self._pos] - sample = copy.deepcopy(self._roidbs[pos]) - sample["curr_iter"] = self._curr_iter - self._pos += 1 - - if self._drop_empty and self._fields and 'gt_mask' in self._fields: - if _has_empty(_segm(sample)): - #logger.warn('gt_mask is empty or not valid in {}'.format( - # sample['im_file'])) - continue - if self._drop_empty and self._fields and 'gt_bbox' in self._fields: - if _has_empty(sample['gt_bbox']): - #logger.warn('gt_bbox {} is empty or not valid in {}, ' - # 'drop this sample'.format( - # sample['im_file'], sample['gt_bbox'])) - continue - - if self._load_img: - sample['image'] = self._load_image(sample['im_file']) - - if self._epoch < self._mixup_epoch: - num = len(self.indexes) - mix_idx = np.random.randint(1, num) - mix_idx = self.indexes[(mix_idx + self._pos - 1) % num] - sample['mixup'] = copy.deepcopy(self._roidbs[mix_idx]) - sample['mixup']["curr_iter"] = self._curr_iter - if self._load_img: - sample['mixup']['image'] = self._load_image(sample['mixup'][ - 'im_file']) - if self._epoch < self._cutmix_epoch: - num = len(self.indexes) - mix_idx = np.random.randint(1, num) - sample['cutmix'] = copy.deepcopy(self._roidbs[mix_idx]) - sample['cutmix']["curr_iter"] = self._curr_iter - if self._load_img: - sample['cutmix']['image'] = self._load_image(sample[ - 'cutmix']['im_file']) - - batch.append(sample) - bs += 1 - return batch - - def worker(self, drop_empty=True, batch_samples=None): - """ - sample transform and batch transform. - """ - batch = [] - for sample in batch_samples: - sample = self._sample_transforms(sample) - if drop_empty and 'gt_bbox' in sample: - if _has_empty(sample['gt_bbox']): - #logger.warn('gt_bbox {} is empty or not valid in {}, ' - # 'drop this sample'.format( - # sample['im_file'], sample['gt_bbox'])) - continue - batch.append(sample) - if len(batch) > 0 and self._batch_transforms: - batch = self._batch_transforms(batch) - if len(batch) > 0 and self._fields: - batch = batch_arrange(batch, self._fields) - return batch - - def _load_image(self, filename): - with open(filename, 'rb') as f: - return f.read() - - def size(self): - """ implementation of Dataset.size - """ - return self._sample_num - - def drained(self): - """ implementation of Dataset.drained - """ - assert self._epoch >= 0, 'The first epoch has not begin!' - return self._pos >= self.size() - - def stop(self): - if self._parallel: - self._parallel.stop() - - -def create_reader(cfg, max_iter=0, global_cfg=None, devices_num=1): - """ - Return iterable data reader. - - Args: - max_iter (int): number of iterations. - """ - if not isinstance(cfg, dict): - raise TypeError("The config should be a dict when creating reader.") - - # synchornize use_fine_grained_loss/num_classes from global_cfg to reader cfg - if global_cfg: - cfg['use_fine_grained_loss'] = getattr(global_cfg, - 'use_fine_grained_loss', False) - cfg['num_classes'] = getattr(global_cfg, 'num_classes', 80) - cfg['devices_num'] = devices_num - reader = Reader(**cfg)() - - def _reader(): - n = 0 - while True: - for _batch in reader: - if len(_batch) > 0: - yield _batch - n += 1 - if max_iter > 0 and n == max_iter: - return - reader.reset() - if max_iter <= 0: - return - - return _reader diff --git a/ppdet/data/sampler.py b/ppdet/data/sampler.py new file mode 100644 index 000000000..161b7546b --- /dev/null +++ b/ppdet/data/sampler.py @@ -0,0 +1,183 @@ +import os +import sys +import six +import time +import math +import socket +import contextlib +import numpy as np + +from paddle import fluid +from paddle.io import BatchSampler +from paddle.fluid.layers import collective +from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy + +_parallel_context_initialized = False + + +class DistributedBatchSampler(BatchSampler): + def __init__(self, dataset, batch_size, shuffle=False, drop_last=False): + self.dataset = dataset + + assert isinstance(batch_size, int) and batch_size > 0, \ + "batch_size should be a positive integer" + self.batch_size = batch_size + assert isinstance(shuffle, bool), \ + "shuffle should be a boolean value" + self.shuffle = shuffle + assert isinstance(drop_last, bool), \ + "drop_last should be a boolean number" + + self.drop_last = drop_last + self.nranks = ParallelEnv().nranks + self.local_rank = ParallelEnv().local_rank + self.epoch = 0 + self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.nranks)) + self.total_size = self.num_samples * self.nranks + + def __iter__(self): + num_samples = len(self.dataset) + indices = np.arange(num_samples).tolist() + indices += indices[:(self.total_size - len(indices))] + assert len(indices) == self.total_size + if self.shuffle: + np.random.RandomState(self.epoch).shuffle(indices) + self.epoch += 1 + + # subsample + def _get_indices_by_batch_size(indices): + subsampled_indices = [] + last_batch_size = self.total_size % (self.batch_size * self.nranks) + assert last_batch_size % self.nranks == 0 + last_local_batch_size = last_batch_size // self.nranks + + for i in range(self.local_rank * self.batch_size, + len(indices) - last_batch_size, + self.batch_size * self.nranks): + subsampled_indices.extend(indices[i:i + self.batch_size]) + + indices = indices[len(indices) - last_batch_size:] + subsampled_indices.extend(indices[ + self.local_rank * last_local_batch_size:( + self.local_rank + 1) * last_local_batch_size]) + return subsampled_indices + + if self.nranks > 1: + indices = _get_indices_by_batch_size(indices) + + assert len(indices) == self.num_samples + _sample_iter = iter(indices) + + batch_indices = [] + for idx in _sample_iter: + batch_indices.append(idx) + if len(batch_indices) == self.batch_size: + yield batch_indices + batch_indices = [] + if not self.drop_last and len(batch_indices) > 0: + yield batch_indices + + def __len__(self): + num_samples = self.num_samples + num_samples += int(not self.drop_last) * (self.batch_size - 1) + return num_samples // self.batch_size + + def set_epoch(self, epoch): + self.epoch = epoch + + +def _all_gather(x, nranks, ring_id=0, use_calc_stream=True): + return collective._c_allgather( + x, nranks, ring_id=ring_id, use_calc_stream=use_calc_stream) + + +def wait_server_ready(endpoints): + assert not isinstance(endpoints, six.string_types) + while True: + all_ok = True + not_ready_endpoints = [] + for ep in endpoints: + ip_port = ep.split(":") + with contextlib.closing( + socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + sock.settimeout(2) + result = sock.connect_ex((ip_port[0], int(ip_port[1]))) + if result != 0: + all_ok = False + not_ready_endpoints.append(ep) + if not all_ok: + time.sleep(3) + else: + break + + +def init_communicator(program, rank, nranks, wait_port, current_endpoint, + endpoints): + if nranks < 2: + return + other_endpoints = endpoints[:] + other_endpoints.remove(current_endpoint) + if rank == 0 and wait_port: + wait_server_ready(other_endpoints) + block = program.global_block() + nccl_id_var = block.create_var( + name=fluid.unique_name.generate('nccl_id'), + persistable=True, + type=fluid.core.VarDesc.VarType.RAW) + + block.append_op( + type='c_gen_nccl_id', + inputs={}, + outputs={'Out': nccl_id_var}, + attrs={ + 'rank': rank, + 'endpoint': current_endpoint, + 'other_endpoints': other_endpoints + }) + + block.append_op( + type='c_comm_init', + inputs={'X': nccl_id_var}, + outputs={}, + attrs={ + 'nranks': nranks, + 'rank': rank, + 'ring_id': 0, + }) + + +def prepare_distributed_context(place=None): + if place is None: + place = fluid.CUDAPlace(ParallelEnv().dev_id) if ParallelEnv().nranks > 1 \ + else fluid.CUDAPlace(0) + + strategy = ParallelStrategy() + strategy.nranks = ParallelEnv().nranks + strategy.local_rank = ParallelEnv().local_rank + strategy.trainer_endpoints = ParallelEnv().trainer_endpoints + strategy.current_endpoint = ParallelEnv().current_endpoint + + if strategy.nranks < 2: + return + + global _parallel_context_initialized + + if not _parallel_context_initialized and isinstance(place, fluid.CUDAPlace): + + def _init_context(): + communicator_prog = fluid.Program() + init_communicator(communicator_prog, strategy.local_rank, + strategy.nranks, True, strategy.current_endpoint, + strategy.trainer_endpoints) + exe = fluid.Executor(place) + exe.run(communicator_prog) + + fluid.disable_dygraph() + _init_context() + fluid.enable_dygraph(place) + + else: + assert ("Only support CUDAPlace for now.") + + _parallel_context_initialized = True + return strategy diff --git a/ppdet/data/shared_queue/__init__.py b/ppdet/data/shared_queue/__init__.py deleted file mode 100644 index f118eb76a..000000000 --- a/ppdet/data/shared_queue/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -__all__ = ['SharedBuffer', 'SharedMemoryMgr', 'SharedQueue'] - -from .sharedmemory import SharedBuffer -from .sharedmemory import SharedMemoryMgr -from .sharedmemory import SharedMemoryError -from .queue import SharedQueue diff --git a/ppdet/data/shared_queue/queue.py b/ppdet/data/shared_queue/queue.py deleted file mode 100644 index 8f0ba8ab4..000000000 --- a/ppdet/data/shared_queue/queue.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import sys -import six -if six.PY3: - import pickle - from io import BytesIO as StringIO - from queue import Empty -else: - import cPickle as pickle - from cStringIO import StringIO - from Queue import Empty - -import logging -import traceback -import multiprocessing as mp -from multiprocessing.queues import Queue -from .sharedmemory import SharedMemoryMgr - -logger = logging.getLogger(__name__) - - -class SharedQueueError(ValueError): - """ SharedQueueError - """ - pass - - -class SharedQueue(Queue): - """ a Queue based on shared memory to communicate data between Process, - and it's interface is compatible with 'multiprocessing.queues.Queue' - """ - - def __init__(self, maxsize=0, mem_mgr=None, memsize=None, pagesize=None): - """ init - """ - if six.PY3: - super(SharedQueue, self).__init__(maxsize, ctx=mp.get_context()) - else: - super(SharedQueue, self).__init__(maxsize) - - if mem_mgr is not None: - self._shared_mem = mem_mgr - else: - self._shared_mem = SharedMemoryMgr( - capacity=memsize, pagesize=pagesize) - - def put(self, obj, **kwargs): - """ put an object to this queue - """ - obj = pickle.dumps(obj, -1) - buff = None - try: - buff = self._shared_mem.malloc(len(obj)) - buff.put(obj) - super(SharedQueue, self).put(buff, **kwargs) - except Exception as e: - stack_info = traceback.format_exc() - err_msg = 'failed to put a element to SharedQueue '\ - 'with stack info[%s]' % (stack_info) - logger.warn(err_msg) - - if buff is not None: - buff.free() - raise e - - def get(self, **kwargs): - """ get an object from this queue - """ - buff = None - try: - buff = super(SharedQueue, self).get(**kwargs) - data = buff.get() - return pickle.load(StringIO(data)) - except Empty as e: - raise e - except Exception as e: - stack_info = traceback.format_exc() - err_msg = 'failed to get element from SharedQueue '\ - 'with stack info[%s]' % (stack_info) - logger.warn(err_msg) - raise e - finally: - if buff is not None: - buff.free() - - def release(self): - self._shared_mem.release() - self._shared_mem = None diff --git a/ppdet/data/shared_queue/sharedmemory.py b/ppdet/data/shared_queue/sharedmemory.py deleted file mode 100644 index 8b1d3ab40..000000000 --- a/ppdet/data/shared_queue/sharedmemory.py +++ /dev/null @@ -1,532 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# utils for memory management which is allocated on sharedmemory, -# note that these structures may not be thread-safe - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import os -import time -import math -import struct -import sys -import six - -if six.PY3: - import pickle -else: - import cPickle as pickle - -import json -import uuid -import random -import numpy as np -import weakref -import logging -from multiprocessing import Lock -from multiprocessing import RawArray - -logger = logging.getLogger(__name__) - - -class SharedMemoryError(ValueError): - """ SharedMemoryError - """ - pass - - -class SharedBufferError(SharedMemoryError): - """ SharedBufferError - """ - pass - - -class MemoryFullError(SharedMemoryError): - """ MemoryFullError - """ - - def __init__(self, errmsg=''): - super(MemoryFullError, self).__init__() - self.errmsg = errmsg - - -def memcopy(dst, src, offset=0, length=None): - """ copy data from 'src' to 'dst' in bytes - """ - length = length if length is not None else len(src) - assert type(dst) == np.ndarray, 'invalid type for "dst" in memcopy' - if type(src) is not np.ndarray: - if type(src) is str and six.PY3: - src = src.encode() - src = np.frombuffer(src, dtype='uint8', count=len(src)) - - dst[:] = src[offset:offset + length] - - -class SharedBuffer(object): - """ Buffer allocated from SharedMemoryMgr, and it stores data on shared memory - - note that: - every instance of this should be freed explicitely by calling 'self.free' - """ - - def __init__(self, owner, capacity, pos, size=0, alloc_status=''): - """ Init - - Args: - owner (str): manager to own this buffer - capacity (int): capacity in bytes for this buffer - pos (int): page position in shared memory - size (int): bytes already used - alloc_status (str): debug info about allocator when allocate this - """ - self._owner = owner - self._cap = capacity - self._pos = pos - self._size = size - self._alloc_status = alloc_status - assert self._pos >= 0 and self._cap > 0, \ - "invalid params[%d:%d] to construct SharedBuffer" \ - % (self._pos, self._cap) - - def owner(self): - """ get owner - """ - return SharedMemoryMgr.get_mgr(self._owner) - - def put(self, data, override=False): - """ put data to this buffer - - Args: - data (str): data to be stored in this buffer - - Returns: - None - - Raises: - SharedMemoryError when not enough space in this buffer - """ - assert type(data) in [str, bytes], \ - 'invalid type[%s] for SharedBuffer::put' % (str(type(data))) - if self._size > 0 and not override: - raise SharedBufferError('already has already been setted before') - - if self.capacity() < len(data): - raise SharedBufferError('data[%d] is larger than size of buffer[%s]'\ - % (len(data), str(self))) - - self.owner().put_data(self, data) - self._size = len(data) - - def get(self, offset=0, size=None, no_copy=True): - """ get the data stored this buffer - - Args: - offset (int): position for the start point to 'get' - size (int): size to get - - Returns: - data (np.ndarray('uint8')): user's data in numpy - which is passed in by 'put' - None: if no data stored in - """ - offset = offset if offset >= 0 else self._size + offset - if self._size <= 0: - return None - - size = self._size if size is None else size - assert offset + size <= self._cap, 'invalid offset[%d] '\ - 'or size[%d] for capacity[%d]' % (offset, size, self._cap) - return self.owner().get_data(self, offset, size, no_copy=no_copy) - - def size(self): - """ bytes of used memory - """ - return self._size - - def resize(self, size): - """ resize the used memory to 'size', should not be greater than capacity - """ - assert size >= 0 and size <= self._cap, \ - "invalid size[%d] for resize" % (size) - - self._size = size - - def capacity(self): - """ size of allocated memory - """ - return self._cap - - def __str__(self): - """ human readable format - """ - return "SharedBuffer(owner:%s, pos:%d, size:%d, "\ - "capacity:%d, alloc_status:[%s], pid:%d)" \ - % (str(self._owner), self._pos, self._size, \ - self._cap, self._alloc_status, os.getpid()) - - def free(self): - """ free this buffer to it's owner - """ - if self._owner is not None: - self.owner().free(self) - self._owner = None - self._cap = 0 - self._pos = -1 - self._size = 0 - return True - else: - return False - - -class PageAllocator(object): - """ allocator used to malloc and free shared memory which - is split into pages - """ - s_allocator_header = 12 - - def __init__(self, base, total_pages, page_size): - """ init - """ - self._magic_num = 1234321000 + random.randint(100, 999) - self._base = base - self._total_pages = total_pages - self._page_size = page_size - - header_pages = int( - math.ceil((total_pages + self.s_allocator_header) / page_size)) - - self._header_pages = header_pages - self._free_pages = total_pages - header_pages - self._header_size = self._header_pages * page_size - self._reset() - - def _dump_alloc_info(self, fname): - hpages, tpages, pos, used = self.header() - - start = self.s_allocator_header - end = start + self._page_size * hpages - alloc_flags = self._base[start:end].tostring() - info = { - 'magic_num': self._magic_num, - 'header_pages': hpages, - 'total_pages': tpages, - 'pos': pos, - 'used': used - } - info['alloc_flags'] = alloc_flags - fname = fname + '.' + str(uuid.uuid4())[:6] - with open(fname, 'wb') as f: - f.write(pickle.dumps(info, -1)) - logger.warn('dump alloc info to file[%s]' % (fname)) - - def _reset(self): - alloc_page_pos = self._header_pages - used_pages = self._header_pages - header_info = struct.pack( - str('III'), self._magic_num, alloc_page_pos, used_pages) - assert len(header_info) == self.s_allocator_header, \ - 'invalid size of header_info' - - memcopy(self._base[0:self.s_allocator_header], header_info) - self.set_page_status(0, self._header_pages, '1') - self.set_page_status(self._header_pages, self._free_pages, '0') - - def header(self): - """ get header info of this allocator - """ - header_str = self._base[0:self.s_allocator_header].tostring() - magic, pos, used = struct.unpack(str('III'), header_str) - - assert magic == self._magic_num, \ - 'invalid header magic[%d] in shared memory' % (magic) - return self._header_pages, self._total_pages, pos, used - - def empty(self): - """ are all allocatable pages available - """ - header_pages, pages, pos, used = self.header() - return header_pages == used - - def full(self): - """ are all allocatable pages used - """ - header_pages, pages, pos, used = self.header() - return header_pages + used == pages - - def __str__(self): - header_pages, pages, pos, used = self.header() - desc = '{page_info[magic:%d,total:%d,used:%d,header:%d,alloc_pos:%d,pagesize:%d]}' \ - % (self._magic_num, pages, used, header_pages, pos, self._page_size) - return 'PageAllocator:%s' % (desc) - - def set_alloc_info(self, alloc_pos, used_pages): - """ set allocating position to new value - """ - memcopy(self._base[4:12], struct.pack(str('II'), alloc_pos, used_pages)) - - def set_page_status(self, start, page_num, status): - """ set pages from 'start' to 'end' with new same status 'status' - """ - assert status in ['0', '1'], 'invalid status[%s] for page status '\ - 'in allocator[%s]' % (status, str(self)) - start += self.s_allocator_header - end = start + page_num - assert start >= 0 and end <= self._header_size, 'invalid end[%d] of pages '\ - 'in allocator[%s]' % (end, str(self)) - memcopy(self._base[start:end], str(status * page_num)) - - def get_page_status(self, start, page_num, ret_flag=False): - start += self.s_allocator_header - end = start + page_num - assert start >= 0 and end <= self._header_size, 'invalid end[%d] of pages '\ - 'in allocator[%s]' % (end, str(self)) - status = self._base[start:end].tostring().decode() - if ret_flag: - return status - - zero_num = status.count('0') - if zero_num == 0: - return (page_num, 1) - else: - return (zero_num, 0) - - def malloc_page(self, page_num): - header_pages, pages, pos, used = self.header() - end = pos + page_num - if end > pages: - pos = self._header_pages - end = pos + page_num - - start_pos = pos - flags = '' - while True: - flags = self.get_page_status(pos, page_num, ret_flag=True) - - if flags.count('0') == page_num: - break - - # not found enough pages, so shift to next few pages - free_pos = flags.rfind('1') + 1 - pos += free_pos - end = pos + page_num - if end > pages: - pos = self._header_pages - end = pos + page_num - flags = '' - - # not found available pages after scan all pages - if pos <= start_pos and end >= start_pos: - logger.debug('not found available pages after scan all pages') - break - - page_status = (flags.count('0'), 0) - if page_status != (page_num, 0): - free_pages = self._total_pages - used - if free_pages == 0: - err_msg = 'all pages have been used:%s' % (str(self)) - else: - err_msg = 'not found enough pages[avail:%d, expect:%d] '\ - 'with total free pages[%d]' % (page_status[0], page_num, free_pages) - err_msg = 'failed to malloc %d pages at pos[%d] for reason[%s] '\ - 'and allocator status[%s]' % (page_num, pos, err_msg, str(self)) - raise MemoryFullError(err_msg) - - self.set_page_status(pos, page_num, '1') - used += page_num - self.set_alloc_info(end, used) - return pos - - def free_page(self, start, page_num): - """ free 'page_num' pages start from 'start' - """ - page_status = self.get_page_status(start, page_num) - assert page_status == (page_num, 1), \ - 'invalid status[%s] when free [%d, %d]' \ - % (str(page_status), start, page_num) - self.set_page_status(start, page_num, '0') - _, _, pos, used = self.header() - used -= page_num - self.set_alloc_info(pos, used) - - -DEFAULT_SHARED_MEMORY_SIZE = 1024 * 1024 * 1024 - - -class SharedMemoryMgr(object): - """ manage a continouse block of memory, provide - 'malloc' to allocate new buffer, and 'free' to free buffer - """ - s_memory_mgrs = weakref.WeakValueDictionary() - s_mgr_num = 0 - s_log_statis = False - - @classmethod - def get_mgr(cls, id): - """ get a SharedMemoryMgr with size of 'capacity' - """ - assert id in cls.s_memory_mgrs, 'invalid id[%s] for memory managers' % ( - id) - return cls.s_memory_mgrs[id] - - def __init__(self, capacity=None, pagesize=None): - """ init - """ - logger.debug('create SharedMemoryMgr') - - pagesize = 64 * 1024 if pagesize is None else pagesize - assert type(pagesize) is int, "invalid type of pagesize[%s]" \ - % (str(pagesize)) - - capacity = DEFAULT_SHARED_MEMORY_SIZE if capacity is None else capacity - assert type(capacity) is int, "invalid type of capacity[%s]" \ - % (str(capacity)) - - assert capacity > 0, '"size of shared memory should be greater than 0' - self._released = False - self._cap = capacity - self._page_size = pagesize - - assert self._cap % self._page_size == 0, \ - "capacity[%d] and pagesize[%d] are not consistent" \ - % (self._cap, self._page_size) - self._total_pages = self._cap // self._page_size - - self._pid = os.getpid() - SharedMemoryMgr.s_mgr_num += 1 - self._id = self._pid * 100 + SharedMemoryMgr.s_mgr_num - SharedMemoryMgr.s_memory_mgrs[self._id] = self - self._locker = Lock() - self._setup() - - def _setup(self): - self._shared_mem = RawArray('c', self._cap) - self._base = np.frombuffer( - self._shared_mem, dtype='uint8', count=self._cap) - self._locker.acquire() - try: - self._allocator = PageAllocator(self._base, self._total_pages, - self._page_size) - finally: - self._locker.release() - - def malloc(self, size, wait=True): - """ malloc a new SharedBuffer - - Args: - size (int): buffer size to be malloc - wait (bool): whether to wait when no enough memory - - Returns: - SharedBuffer - - Raises: - SharedMemoryError when not found available memory - """ - page_num = int(math.ceil(size / self._page_size)) - size = page_num * self._page_size - - start = None - ct = 0 - errmsg = '' - while True: - self._locker.acquire() - try: - start = self._allocator.malloc_page(page_num) - alloc_status = str(self._allocator) - except MemoryFullError as e: - start = None - errmsg = e.errmsg - if not wait: - raise e - finally: - self._locker.release() - - if start is None: - time.sleep(0.1) - if ct % 100 == 0: - logger.warn('not enough space for reason[%s]' % (errmsg)) - - ct += 1 - else: - break - - return SharedBuffer(self._id, size, start, alloc_status=alloc_status) - - def free(self, shared_buf): - """ free a SharedBuffer - - Args: - shared_buf (SharedBuffer): buffer to be freed - - Returns: - None - - Raises: - SharedMemoryError when failed to release this buffer - """ - assert shared_buf._owner == self._id, "invalid shared_buf[%s] "\ - "for it's not allocated from me[%s]" % (str(shared_buf), str(self)) - cap = shared_buf.capacity() - start_page = shared_buf._pos - page_num = cap // self._page_size - - #maybe we don't need this lock here - self._locker.acquire() - try: - self._allocator.free_page(start_page, page_num) - finally: - self._locker.release() - - def put_data(self, shared_buf, data): - """ fill 'data' into 'shared_buf' - """ - assert len(data) <= shared_buf.capacity(), 'too large data[%d] '\ - 'for this buffer[%s]' % (len(data), str(shared_buf)) - start = shared_buf._pos * self._page_size - end = start + len(data) - assert start >= 0 and end <= self._cap, "invalid start "\ - "position[%d] when put data to buff:%s" % (start, str(shared_buf)) - self._base[start:end] = np.frombuffer(data, 'uint8', len(data)) - - def get_data(self, shared_buf, offset, size, no_copy=True): - """ extract 'data' from 'shared_buf' in range [offset, offset + size) - """ - start = shared_buf._pos * self._page_size - start += offset - if no_copy: - return self._base[start:start + size] - else: - return self._base[start:start + size].tostring() - - def __str__(self): - return 'SharedMemoryMgr:{id:%d, %s}' % (self._id, str(self._allocator)) - - def __del__(self): - if SharedMemoryMgr.s_log_statis: - logger.info('destroy [%s]' % (self)) - - if not self._released and not self._allocator.empty(): - logger.debug('not empty when delete this SharedMemoryMgr[%s]' % - (self)) - else: - self._released = True - - if self._id in SharedMemoryMgr.s_memory_mgrs: - del SharedMemoryMgr.s_memory_mgrs[self._id] - SharedMemoryMgr.s_mgr_num -= 1 diff --git a/ppdet/data/source/__init__.py b/ppdet/data/source/__init__.py index c5c26a16f..4483b57f7 100644 --- a/ppdet/data/source/__init__.py +++ b/ppdet/data/source/__init__.py @@ -1,21 +1,9 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - +from . import dataset from . import coco -from . import voc -from . import widerface +#from . import voc +#from . import widerface +from .dataset import * from .coco import * -from .voc import * -from .widerface import * +#from .voc import * +#from .widerface import * diff --git a/ppdet/data/source/coco.py b/ppdet/data/source/coco.py index cb823f25e..fe0d02f9d 100644 --- a/ppdet/data/source/coco.py +++ b/ppdet/data/source/coco.py @@ -1,75 +1,28 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import os import numpy as np - -from .dataset import DataSet +import logging from ppdet.core.workspace import register, serializable +from .dataset import DetDataset -import logging logger = logging.getLogger(__name__) @register @serializable -class COCODataSet(DataSet): - """ - Load COCO records with annotations in json file 'anno_path' - - Args: - dataset_dir (str): root directory for dataset. - image_dir (str): directory for images. - anno_path (str): json file path. - sample_num (int): number of samples to load, -1 means all. - with_background (bool): whether load background as a class. - if True, total class number will be 81. default True. - """ - +class COCODataset(DetDataset): def __init__(self, + dataset_dir=None, image_dir=None, anno_path=None, - dataset_dir=None, - sample_num=-1, - with_background=True): - super(COCODataSet, self).__init__( - image_dir=image_dir, - anno_path=anno_path, - dataset_dir=dataset_dir, - sample_num=sample_num, - with_background=with_background) - self.anno_path = anno_path - self.sample_num = sample_num - self.with_background = with_background - # `roidbs` is list of dict whose structure is: - # { - # 'im_file': im_fname, # image file name - # 'im_id': img_id, # image id - # 'h': im_h, # height of image - # 'w': im_w, # width - # 'is_crowd': is_crowd, - # 'gt_score': gt_score, - # 'gt_class': gt_class, - # 'gt_bbox': gt_bbox, - # 'gt_poly': gt_poly, - # } - self.roidbs = None - # a dict used to map category name to class id - self.cname2cid = None + with_background=True, + sample_num=-1): + super(COCODataset, self).__init__(dataset_dir, image_dir, anno_path, + with_background, sample_num) self.load_image_only = False + self.load_semantic = False + #self.parse_dataset() - def load_roidb_and_cname2cid(self): + def parse_dataset(self): anno_path = os.path.join(self.dataset_dir, self.anno_path) image_dir = os.path.join(self.dataset_dir, self.image_dir) @@ -104,11 +57,11 @@ class COCODataSet(DataSet): im_w = float(img_anno['width']) im_h = float(img_anno['height']) - im_fname = os.path.join(image_dir, - im_fname) if image_dir else im_fname - if not os.path.exists(im_fname): + im_path = os.path.join(image_dir, + im_fname) if image_dir else im_fname + if not os.path.exists(im_path): logger.warn('Illegal image file: {}, and it will be ' - 'ignored'.format(im_fname)) + 'ignored'.format(im_path)) continue if im_w < 0 or im_h < 0: @@ -118,7 +71,7 @@ class COCODataSet(DataSet): continue coco_rec = { - 'im_file': im_fname, + 'im_file': im_path, 'im_id': np.array([img_id]), 'h': im_h, 'w': im_w, @@ -127,6 +80,7 @@ class COCODataSet(DataSet): if not self.load_image_only: ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False) instances = coco.loadAnns(ins_anno_ids) + bboxes = [] for inst in instances: x, y, box_w, box_h = inst['bbox'] @@ -134,7 +88,6 @@ class COCODataSet(DataSet): y1 = max(0, y) x2 = min(im_w - 1, x1 + max(0, box_w - 1)) y2 = min(im_h - 1, y1 + max(0, box_h - 1)) - if inst['area'] > 0 and x2 >= x1 and y2 >= y1: inst['clean_bbox'] = [x1, y1, x2, y2] bboxes.append(inst) @@ -143,7 +96,6 @@ class COCODataSet(DataSet): 'Found an invalid bbox in annotations: im_id: {}, ' 'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format( img_id, float(inst['area']), x1, y1, x2, y2)) - num_bbox = len(bboxes) gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) @@ -168,9 +120,14 @@ class COCODataSet(DataSet): 'gt_score': gt_score, 'gt_poly': gt_poly, }) + # TODO: remove load_semantic + if self.load_semantic: + seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps', + 'train2017', im_fname[:-3] + 'png') + coco_rec.update({'semantic': seg_path}) logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format( - im_fname, img_id, im_h, im_w)) + im_path, img_id, im_h, im_w)) records.append(coco_rec) ct += 1 if self.sample_num > 0 and ct >= self.sample_num: diff --git a/ppdet/data/source/dataset.py b/ppdet/data/source/dataset.py index 6964b144f..9f38a6c87 100644 --- a/ppdet/data/source/dataset.py +++ b/ppdet/data/source/dataset.py @@ -1,140 +1,68 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import os import numpy as np - +from collections import OrderedDict try: from collections.abc import Sequence except Exception: from collections import Sequence - +from paddle.io import Dataset from ppdet.core.workspace import register, serializable from ppdet.utils.download import get_dataset_path @serializable -class DataSet(object): - """ - Dataset, e.g., coco, pascal voc - - Args: - annotation (str): annotation file path - image_dir (str): directory where image files are stored - shuffle (bool): shuffle samples - """ - +class DetDataset(Dataset): def __init__(self, dataset_dir=None, image_dir=None, anno_path=None, - sample_num=-1, with_background=True, - use_default_label=None, + sample_num=-1, **kwargs): - super(DataSet, self).__init__() + super(DetDataset, self).__init__() + self.dataset_dir = dataset_dir if dataset_dir is not None else '' self.anno_path = anno_path self.image_dir = image_dir if image_dir is not None else '' - self.dataset_dir = dataset_dir if dataset_dir is not None else '' - self.sample_num = sample_num self.with_background = with_background - self.use_default_label = use_default_label - - self.cname2cid = None - self._imid2path = None - - def load_roidb_and_cname2cid(self): - """load dataset""" - raise NotImplementedError('%s.load_roidb_and_cname2cid not available' % - (self.__class__.__name__)) - - def get_roidb(self): - if not self.roidbs: - data_dir = get_dataset_path(self.dataset_dir, self.anno_path, - self.image_dir) - if data_dir: - self.dataset_dir = data_dir - self.load_roidb_and_cname2cid() - - return self.roidbs - - def get_cname2cid(self): - if not self.cname2cid: - self.load_roidb_and_cname2cid() - return self.cname2cid - - def get_anno(self): - if self.anno_path is None: - return - return os.path.join(self.dataset_dir, self.anno_path) - - def get_imid2path(self): - return self._imid2path + self.sample_num = sample_num + def __len__(self, ): + return len(self.roidbs) -def _is_valid_file(f, extensions=('.jpg', '.jpeg', '.png', '.bmp')): - return f.lower().endswith(extensions) + def __getitem__(self, idx): + # data batch + roidb = self.roidbs[idx] + # data augment + roidb = self.transform(roidb) + # data item + out = OrderedDict() + for k in self.fields: + out[k] = roidb[k] + return out.values() + def set_out(self, sample_transform, fields): + self.transform = sample_transform + self.fields = fields -def _make_dataset(dir): - dir = os.path.expanduser(dir) - if not os.path.isdir(d): - raise ('{} should be a dir'.format(dir)) - images = [] - for root, _, fnames in sorted(os.walk(dir, followlinks=True)): - for fname in sorted(fnames): - path = os.path.join(root, fname) - if is_valid_file(path): - images.append(path) - return images + def parse_dataset(self): + raise NotImplemented( + "Need to implement parse_dataset method of Dataset") @register @serializable -class ImageFolder(DataSet): - """ - Args: - dataset_dir (str): root directory for dataset. - image_dir(list|str): list of image folders or list of image files - anno_path (str): annotation file path. - samples (int): number of samples to load, -1 means all - """ - +class ImageFolder(DetDataset): def __init__(self, dataset_dir=None, image_dir=None, anno_path=None, - sample_num=-1, with_background=True, - use_default_label=None, + sample_num=-1, **kwargs): super(ImageFolder, self).__init__(dataset_dir, image_dir, anno_path, - sample_num, with_background, - use_default_label) - self.roidbs = None - self._imid2path = {} + with_background, sample_num) - def get_roidb(self): - if not self.roidbs: - self.roidbs = self._load_images() - return self.roidbs - - def set_images(self, images): - self.image_dir = images - self.roidbs = self._load_images() - - def _parse(self): + def parse_dataset(self): image_dir = self.image_dir if not isinstance(image_dir, Sequence): image_dir = [image_dir] @@ -145,20 +73,4 @@ class ImageFolder(DataSet): images.extend(_make_dataset(im_dir)) elif os.path.isfile(im_dir) and _is_valid_file(im_dir): images.append(im_dir) - return images - - def _load_images(self): - images = self._parse() - ct = 0 - records = [] - for image in images: - assert image != '' and os.path.isfile(image), \ - "Image {} not found".format(image) - if self.sample_num > 0 and ct >= self.sample_num: - break - rec = {'im_id': np.array([ct]), 'im_file': image} - self._imid2path[ct] = image - ct += 1 - records.append(rec) - assert len(records) > 0, "No image file found" - return records + self.roidbs = images diff --git a/ppdet/data/source/voc.py b/ppdet/data/source/voc.py deleted file mode 100644 index 560ed17ea..000000000 --- a/ppdet/data/source/voc.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np - -import xml.etree.ElementTree as ET - -from ppdet.core.workspace import register, serializable - -from .dataset import DataSet -import logging -logger = logging.getLogger(__name__) - - -@register -@serializable -class VOCDataSet(DataSet): - """ - Load dataset with PascalVOC format. - - Notes: - `anno_path` must contains xml file and image file path for annotations. - - Args: - dataset_dir (str): root directory for dataset. - image_dir (str): directory for images. - anno_path (str): voc annotation file path. - sample_num (int): number of samples to load, -1 means all. - use_default_label (bool): whether use the default mapping of - label to integer index. Default True. - with_background (bool): whether load background as a class, - default True. - label_list (str): if use_default_label is False, will load - mapping between category and class index. - """ - - def __init__(self, - dataset_dir=None, - image_dir=None, - anno_path=None, - sample_num=-1, - use_default_label=True, - with_background=True, - label_list='label_list.txt'): - super(VOCDataSet, self).__init__( - image_dir=image_dir, - anno_path=anno_path, - sample_num=sample_num, - dataset_dir=dataset_dir, - with_background=with_background) - # roidbs is list of dict whose structure is: - # { - # 'im_file': im_fname, # image file name - # 'im_id': im_id, # image id - # 'h': im_h, # height of image - # 'w': im_w, # width - # 'is_crowd': is_crowd, - # 'gt_class': gt_class, - # 'gt_score': gt_score, - # 'gt_bbox': gt_bbox, - # 'difficult': difficult - # } - self.roidbs = None - # 'cname2id' is a dict to map category name to class id - self.cname2cid = None - self.use_default_label = use_default_label - self.label_list = label_list - - def load_roidb_and_cname2cid(self): - anno_path = os.path.join(self.dataset_dir, self.anno_path) - image_dir = os.path.join(self.dataset_dir, self.image_dir) - - # mapping category name to class id - # if with_background is True: - # background:0, first_class:1, second_class:2, ... - # if with_background is False: - # first_class:0, second_class:1, ... - records = [] - ct = 0 - cname2cid = {} - if not self.use_default_label: - label_path = os.path.join(self.dataset_dir, self.label_list) - if not os.path.exists(label_path): - raise ValueError("label_list {} does not exists".format( - label_path)) - with open(label_path, 'r') as fr: - label_id = int(self.with_background) - for line in fr.readlines(): - cname2cid[line.strip()] = label_id - label_id += 1 - else: - cname2cid = pascalvoc_label(self.with_background) - - with open(anno_path, 'r') as fr: - while True: - line = fr.readline() - if not line: - break - img_file, xml_file = [os.path.join(image_dir, x) \ - for x in line.strip().split()[:2]] - if not os.path.exists(img_file): - logger.warn( - 'Illegal image file: {}, and it will be ignored'.format( - img_file)) - continue - if not os.path.isfile(xml_file): - logger.warn('Illegal xml file: {}, and it will be ignored'. - format(xml_file)) - continue - tree = ET.parse(xml_file) - if tree.find('id') is None: - im_id = np.array([ct]) - else: - im_id = np.array([int(tree.find('id').text)]) - - objs = tree.findall('object') - im_w = float(tree.find('size').find('width').text) - im_h = float(tree.find('size').find('height').text) - if im_w < 0 or im_h < 0: - logger.warn( - 'Illegal width: {} or height: {} in annotation, ' - 'and {} will be ignored'.format(im_w, im_h, xml_file)) - continue - gt_bbox = [] - gt_class = [] - gt_score = [] - is_crowd = [] - difficult = [] - for i, obj in enumerate(objs): - cname = obj.find('name').text - _difficult = int(obj.find('difficult').text) - x1 = float(obj.find('bndbox').find('xmin').text) - y1 = float(obj.find('bndbox').find('ymin').text) - x2 = float(obj.find('bndbox').find('xmax').text) - y2 = float(obj.find('bndbox').find('ymax').text) - x1 = max(0, x1) - y1 = max(0, y1) - x2 = min(im_w - 1, x2) - y2 = min(im_h - 1, y2) - if x2 > x1 and y2 > y1: - gt_bbox.append([x1, y1, x2, y2]) - gt_class.append([cname2cid[cname]]) - gt_score.append([1.]) - is_crowd.append([0]) - difficult.append([_difficult]) - else: - logger.warn( - 'Found an invalid bbox in annotations: xml_file: {}' - ', x1: {}, y1: {}, x2: {}, y2: {}.'.format( - xml_file, x1, y1, x2, y2)) - gt_bbox = np.array(gt_bbox).astype('float32') - gt_class = np.array(gt_class).astype('int32') - gt_score = np.array(gt_score).astype('float32') - is_crowd = np.array(is_crowd).astype('int32') - difficult = np.array(difficult).astype('int32') - voc_rec = { - 'im_file': img_file, - 'im_id': im_id, - 'h': im_h, - 'w': im_w, - 'is_crowd': is_crowd, - 'gt_class': gt_class, - 'gt_score': gt_score, - 'gt_bbox': gt_bbox, - 'difficult': difficult - } - if len(objs) != 0: - records.append(voc_rec) - - ct += 1 - if self.sample_num > 0 and ct >= self.sample_num: - break - assert len(records) > 0, 'not found any voc record in %s' % ( - self.anno_path) - logger.debug('{} samples in file {}'.format(ct, anno_path)) - self.roidbs, self.cname2cid = records, cname2cid - - -def pascalvoc_label(with_background=True): - labels_map = { - 'aeroplane': 1, - 'bicycle': 2, - 'bird': 3, - 'boat': 4, - 'bottle': 5, - 'bus': 6, - 'car': 7, - 'cat': 8, - 'chair': 9, - 'cow': 10, - 'diningtable': 11, - 'dog': 12, - 'horse': 13, - 'motorbike': 14, - 'person': 15, - 'pottedplant': 16, - 'sheep': 17, - 'sofa': 18, - 'train': 19, - 'tvmonitor': 20 - } - if not with_background: - labels_map = {k: v - 1 for k, v in labels_map.items()} - return labels_map diff --git a/ppdet/data/source/widerface.py b/ppdet/data/source/widerface.py deleted file mode 100644 index 7aab15337..000000000 --- a/ppdet/data/source/widerface.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import logging -logger = logging.getLogger(__name__) - -from ppdet.core.workspace import register, serializable -from .dataset import DataSet - - -@register -@serializable -class WIDERFaceDataSet(DataSet): - """ - Load WiderFace records with 'anno_path' - - Args: - dataset_dir (str): root directory for dataset. - image_dir (str): directory for images. - anno_path (str): root directory for voc annotation data - sample_num (int): number of samples to load, -1 means all - with_background (bool): whether load background as a class. - if True, total class number will be 2. default True. - """ - - def __init__(self, - dataset_dir=None, - image_dir=None, - anno_path=None, - sample_num=-1, - with_background=True, - with_lmk=False): - super(WIDERFaceDataSet, self).__init__( - image_dir=image_dir, - anno_path=anno_path, - sample_num=sample_num, - dataset_dir=dataset_dir, - with_background=with_background) - self.anno_path = anno_path - self.sample_num = sample_num - self.with_background = with_background - self.roidbs = None - self.cname2cid = None - self.with_lmk = with_lmk - - def load_roidb_and_cname2cid(self): - anno_path = os.path.join(self.dataset_dir, self.anno_path) - image_dir = os.path.join(self.dataset_dir, self.image_dir) - - txt_file = anno_path - - records = [] - ct = 0 - file_lists = self._load_file_list(txt_file) - cname2cid = widerface_label(self.with_background) - - for item in file_lists: - im_fname = item[0] - im_id = np.array([ct]) - gt_bbox = np.zeros((len(item) - 1, 4), dtype=np.float32) - gt_class = np.ones((len(item) - 1, 1), dtype=np.int32) - gt_lmk_labels = np.zeros((len(item) - 1, 10), dtype=np.float32) - lmk_ignore_flag = np.zeros((len(item) - 1, 1), dtype=np.int32) - for index_box in range(len(item)): - if index_box < 1: - continue - gt_bbox[index_box - 1] = item[index_box][0] - if self.with_lmk: - gt_lmk_labels[index_box - 1] = item[index_box][1] - lmk_ignore_flag[index_box - 1] = item[index_box][2] - im_fname = os.path.join(image_dir, - im_fname) if image_dir else im_fname - widerface_rec = { - 'im_file': im_fname, - 'im_id': im_id, - 'gt_bbox': gt_bbox, - 'gt_class': gt_class, - } - if self.with_lmk: - widerface_rec['gt_keypoint'] = gt_lmk_labels - widerface_rec['keypoint_ignore'] = lmk_ignore_flag - - if len(item) != 0: - records.append(widerface_rec) - - ct += 1 - if self.sample_num > 0 and ct >= self.sample_num: - break - assert len(records) > 0, 'not found any widerface in %s' % (anno_path) - logger.debug('{} samples in file {}'.format(ct, anno_path)) - self.roidbs, self.cname2cid = records, cname2cid - - def _load_file_list(self, input_txt): - with open(input_txt, 'r') as f_dir: - lines_input_txt = f_dir.readlines() - - file_dict = {} - num_class = 0 - for i in range(len(lines_input_txt)): - line_txt = lines_input_txt[i].strip('\n\t\r') - if '.jpg' in line_txt: - if i != 0: - num_class += 1 - file_dict[num_class] = [] - file_dict[num_class].append(line_txt) - if '.jpg' not in line_txt: - if len(line_txt) <= 6: - continue - result_boxs = [] - split_str = line_txt.split(' ') - xmin = float(split_str[0]) - ymin = float(split_str[1]) - w = float(split_str[2]) - h = float(split_str[3]) - # Filter out wrong labels - if w < 0 or h < 0: - logger.warn('Illegal box with w: {}, h: {} in ' - 'img: {}, and it will be ignored'.format( - w, h, file_dict[num_class][0])) - continue - xmin = max(0, xmin) - ymin = max(0, ymin) - xmax = xmin + w - ymax = ymin + h - gt_bbox = [xmin, ymin, xmax, ymax] - result_boxs.append(gt_bbox) - if self.with_lmk: - assert len(split_str) > 18, 'When `with_lmk=True`, the number' \ - 'of characters per line in the annotation file should' \ - 'exceed 18.' - lmk0_x = float(split_str[5]) - lmk0_y = float(split_str[6]) - lmk1_x = float(split_str[8]) - lmk1_y = float(split_str[9]) - lmk2_x = float(split_str[11]) - lmk2_y = float(split_str[12]) - lmk3_x = float(split_str[14]) - lmk3_y = float(split_str[15]) - lmk4_x = float(split_str[17]) - lmk4_y = float(split_str[18]) - lmk_ignore_flag = 0 if lmk0_x == -1 else 1 - gt_lmk_label = [ - lmk0_x, lmk0_y, lmk1_x, lmk1_y, lmk2_x, lmk2_y, lmk3_x, - lmk3_y, lmk4_x, lmk4_y - ] - result_boxs.append(gt_lmk_label) - result_boxs.append(lmk_ignore_flag) - file_dict[num_class].append(result_boxs) - - return list(file_dict.values()) - - -def widerface_label(with_background=True): - labels_map = {'face': 1} - if not with_background: - labels_map = {k: v - 1 for k, v in labels_map.items()} - return labels_map diff --git a/ppdet/data/transform/batch_operators.py b/ppdet/data/transform/batch_operators.py index 538344438..65cd76c7e 100644 --- a/ppdet/data/transform/batch_operators.py +++ b/ppdet/data/transform/batch_operators.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -24,6 +23,7 @@ except Exception: import logging import cv2 import numpy as np + from .operators import register_op, BaseOperator from .op_helper import jaccard_overlap, gaussian2D @@ -41,28 +41,15 @@ __all__ = [ @register_op class PadBatch(BaseOperator): - """ - Pad a batch of samples so they can be divisible by a stride. - The layout of each image should be 'CHW'. - Args: - pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure - height and width is divisible by `pad_to_stride`. - """ - def __init__(self, pad_to_stride=0, use_padded_im_info=True, pad_gt=False): super(PadBatch, self).__init__() self.pad_to_stride = pad_to_stride self.use_padded_im_info = use_padded_im_info self.pad_gt = pad_gt - def __call__(self, samples, context=None): - """ - Args: - samples (list): a batch of sample, each is dict. - """ + def __call__(self, samples): + coarsest_stride = self.pad_to_stride - #if coarsest_stride == 0: - # return samples max_shape = np.array([data['image'].shape for data in samples]).max( axis=0) @@ -82,9 +69,10 @@ class PadBatch(BaseOperator): data['image'] = padding_im if self.use_padded_im_info: data['im_info'][:2] = max_shape[1:3] + if self.pad_gt: gt_num = [] - if data['gt_poly'] is not None and len(data['gt_poly']) > 0: + if 'gt_poly' in data.keys(): pad_mask = True else: pad_mask = False @@ -93,6 +81,7 @@ class PadBatch(BaseOperator): poly_num = [] poly_part_num = [] point_num = [] + for data in samples: gt_num.append(data['gt_bbox'].shape[0]) if pad_mask: @@ -127,7 +116,6 @@ class PadBatch(BaseOperator): data['gt_bbox'] = gt_box_data data['gt_class'] = gt_class_data data['is_crowd'] = is_crowd_data - return samples @@ -156,7 +144,7 @@ class RandomShape(BaseOperator): ] if random_inter else [] self.resize_box = resize_box - def __call__(self, samples, context=None): + def __call__(self, samples): shape = np.random.choice(self.sizes) method = np.random.choice(self.interps) if self.random_inter \ else cv2.INTER_NEAREST @@ -191,7 +179,7 @@ class PadMultiScaleTest(BaseOperator): super(PadMultiScaleTest, self).__init__() self.pad_to_stride = pad_to_stride - def __call__(self, samples, context=None): + def __call__(self, samples): coarsest_stride = self.pad_to_stride if coarsest_stride == 0: return samples @@ -247,7 +235,7 @@ class Gt2YoloTarget(BaseOperator): self.num_classes = num_classes self.iou_thresh = iou_thresh - def __call__(self, samples, context=None): + def __call__(self, samples): assert len(self.anchor_masks) == len(self.downsample_ratios), \ "anchor_masks', and 'downsample_ratios' should have same length." @@ -430,7 +418,7 @@ class Gt2FCOSTarget(BaseOperator): inside_gt_box = np.min(clipped_box_reg_targets, axis=2) > 0 return inside_gt_box - def __call__(self, samples, context=None): + def __call__(self, samples): assert len(self.object_sizes_of_interest) == len(self.downsample_ratios), \ "object_sizes_of_interest', and 'downsample_ratios' should have same length." @@ -554,7 +542,7 @@ class Gt2TTFTarget(BaseOperator): self.num_classes = num_classes self.alpha = alpha - def __call__(self, samples, context=None): + def __call__(self, samples): output_size = samples[0]['image'].shape[1] feat_size = output_size // self.down_ratio for sample in samples: diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py index eb9f287fa..20b93eb05 100644 --- a/ppdet/data/transform/operators.py +++ b/ppdet/data/transform/operators.py @@ -33,12 +33,10 @@ import random import math import numpy as np import os - import cv2 from PIL import Image, ImageEnhance, ImageDraw -from ppdet.core.workspace import serializable -from ppdet.modeling.ops import AnchorGrid +from ppdet.core.workspace import register, serializable from .op_helper import (satisfy_sample_constraint, filter_and_process, generate_sample_bbox, clip_bbox, data_anchor_sampling, @@ -74,11 +72,12 @@ class BaseOperator(object): name = self.__class__.__name__ self._id = name + '_' + str(uuid.uuid4())[-6:] - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): """ Process a sample. Args: sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx} - context (dict): info about this sample processing Returns: result (dict): a processed sample """ @@ -106,10 +105,10 @@ class DecodeImage(BaseOperator): raise TypeError("{}: input type is invalid.".format(self)) if not isinstance(self.with_mixup, bool): raise TypeError("{}: input type is invalid.".format(self)) - if not isinstance(self.with_cutmix, bool): - raise TypeError("{}: input type is invalid.".format(self)) - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): """ load image if 'im_file' field is not empty but 'image' is""" if 'image' not in sample: with open(sample['im_file'], 'rb') as f: @@ -121,7 +120,9 @@ class DecodeImage(BaseOperator): if self.to_rgb: im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + sample['image'] = im + if 'h' not in sample: sample['h'] = im.shape[0] elif sample['h'] != im.shape[0]: @@ -142,12 +143,20 @@ class DecodeImage(BaseOperator): # make default im_info with [h, w, 1] sample['im_info'] = np.array( [im.shape[0], im.shape[1], 1.], dtype=np.float32) + # decode mixup image if self.with_mixup and 'mixup' in sample: - self.__call__(sample['mixup'], context) + self.__call__(sample['mixup']) + # decode cutmix image if self.with_cutmix and 'cutmix' in sample: - self.__call__(sample['cutmix'], context) + self.__call__(sample['cutmix']) + + # decode semantic label + if 'semantic' in sample.keys() and sample['semantic'] is not None: + sem_file = sample['semantic'] + sem = cv2.imread(sem_file, cv2.IMREAD_GRAYSCALE) + sample['semantic'] = sem.astype('int32') return sample @@ -188,7 +197,9 @@ class MultiscaleTestResize(BaseOperator): and isinstance(self.interp, int)): raise TypeError("{}: input type is invalid.".format(self)) - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): """ Resize the image numpy for multi-scale test. """ origin_ims = {} @@ -292,7 +303,9 @@ class ResizeImage(BaseOperator): int)): raise TypeError("{}: input type is invalid.".format(self)) - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): """ Resize the image numpy. """ im = sample['image'] @@ -332,6 +345,7 @@ class ResizeImage(BaseOperator): resize_w = selected_size resize_h = selected_size + if self.use_cv2: im = cv2.resize( im, @@ -340,6 +354,18 @@ class ResizeImage(BaseOperator): fx=im_scale_x, fy=im_scale_y, interpolation=self.interp) + if 'semantic' in sample.keys() and sample['semantic'] is not None: + semantic = sample['semantic'] + semantic = cv2.resize( + semantic.astype('float32'), + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp) + semantic = np.asarray(semantic).astype('int32') + semantic = np.expand_dims(semantic, 0) + sample['semantic'] = semantic else: if self.max_size != 0: raise TypeError( @@ -406,7 +432,9 @@ class RandomFlipImage(BaseOperator): gt_keypoint[:, i] = width - old_x - 1 return gt_keypoint - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): """Filp the image and bounding box. Operators: 1. Flip the image numpy. @@ -453,9 +481,15 @@ class RandomFlipImage(BaseOperator): if self.is_mask_flip and len(sample['gt_poly']) != 0: sample['gt_poly'] = self.flip_segms(sample['gt_poly'], height, width) + if 'gt_keypoint' in sample.keys(): sample['gt_keypoint'] = self.flip_keypoint( sample['gt_keypoint'], width) + + if 'semantic' in sample.keys() and sample[ + 'semantic'] is not None: + sample['semantic'] = sample['semantic'][:, ::-1] + sample['flipped'] = True sample['image'] = im sample = samples if batch_input else samples[0] @@ -479,7 +513,9 @@ class RandomErasingImage(BaseOperator): self.sh = sh self.r1 = r1 - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): samples = sample batch_input = True if not isinstance(samples, Sequence): @@ -563,7 +599,9 @@ class GridMaskOp(BaseOperator): prob=prob, upper_iter=upper_iter) - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): samples = sample batch_input = True if not isinstance(samples, Sequence): @@ -591,7 +629,9 @@ class AutoAugmentImage(BaseOperator): if not isinstance(self.is_normalized, bool): raise TypeError("{}: input type is invalid.".format(self)) - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): """ Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172 """ @@ -670,7 +710,9 @@ class NormalizeImage(BaseOperator): if reduce(lambda x, y: x * y, self.std) == 0: raise ValueError('{}: std is invalid!'.format(self)) - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): """Normalize the image. Operators: 1.(optional) Scale the image to [0,1] @@ -786,7 +828,9 @@ class RandomDistort(BaseOperator): img = Image.fromarray(img, mode='HSV').convert('RGB') return img - def __call__(self, sample, context): + def __call__( + self, + sample, ): """random distort the image""" ops = [ self.random_brightness, self.random_contrast, @@ -827,7 +871,9 @@ class ExpandImage(BaseOperator): self.mean = mean self.prob = prob - def __call__(self, sample, context): + def __call__( + self, + sample, ): """ Expand the image and modify bounding box. Operators: @@ -911,7 +957,9 @@ class CropImage(BaseOperator): self.satisfy_all = satisfy_all self.avoid_no_bbox = avoid_no_bbox - def __call__(self, sample, context): + def __call__( + self, + sample, ): """ Crop the image and modify bounding box. Operators: @@ -1007,7 +1055,9 @@ class CropImageWithDataAchorSampling(BaseOperator): self.avoid_no_bbox = avoid_no_bbox self.das_anchor_scales = np.array(das_anchor_scales) - def __call__(self, sample, context): + def __call__( + self, + sample, ): """ Crop the image and modify bounding box. Operators: @@ -1140,7 +1190,9 @@ class NormalizeBox(BaseOperator): def __init__(self): super(NormalizeBox, self).__init__() - def __call__(self, sample, context): + def __call__( + self, + sample, ): gt_bbox = sample['gt_bbox'] width = sample['w'] height = sample['h'] @@ -1180,7 +1232,9 @@ class Permute(BaseOperator): isinstance(self.channel_first, bool)): raise TypeError("{}: input type is invalid.".format(self)) - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): samples = sample batch_input = True if not isinstance(samples, Sequence): @@ -1229,7 +1283,9 @@ class MixupImage(BaseOperator): img2.astype('float32') * (1.0 - factor) return img.astype('uint8') - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): if 'mixup' not in sample: return sample factor = np.random.beta(self.alpha, self.beta) @@ -1312,7 +1368,9 @@ class CutmixImage(BaseOperator): img_1[bby1:bby2, bbx1:bbx2, :] = img2[bby1:bby2, bbx1:bbx2, :] return img_1 - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): if 'cutmix' not in sample: return sample factor = np.random.beta(self.alpha, self.beta) @@ -1371,10 +1429,12 @@ class RandomInterpImage(BaseOperator): for interp in interps: self.resizers.append(ResizeImage(target_size, max_size, interp)) - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): """Resise the image numpy by random resizer.""" resizer = random.choice(self.resizers) - return resizer(sample, context) + return resizer(sample, ) @register_op @@ -1393,7 +1453,9 @@ class Resize(BaseOperator): self.target_dim = target_dim self.interp = interp # 'random' for yolov3 - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): w = sample['w'] h = sample['h'] @@ -1517,7 +1579,9 @@ class ColorDistort(BaseOperator): img += delta return img - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): img = sample['image'] if self.random_apply: functions = [ @@ -1600,7 +1664,9 @@ class CornerRandColor(ColorDistort): img_mean *= (1 - alpha) img += img_mean - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): img = sample['image'] if self.is_scale: img = img.astype(np.float32, copy=False) @@ -1633,7 +1699,9 @@ class NormalizePermute(BaseOperator): self.mean = mean self.std = std - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): img = sample['image'] img = img.astype(np.float32) @@ -1707,7 +1775,9 @@ class RandomExpand(BaseOperator): _expand_rle(segm, x, y, height, width, ratio)) return expanded_segms - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): if np.random.uniform(0., 1.) < self.prob: return sample @@ -1839,7 +1909,9 @@ class RandomCrop(BaseOperator): crop_segms.append(_crop_rle(segm, crop, height, width)) return crop_segms - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0: return sample @@ -1986,22 +2058,23 @@ class PadBox(BaseOperator): self.num_max_boxes = num_max_boxes super(PadBox, self).__init__() - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): assert 'gt_bbox' in sample bbox = sample['gt_bbox'] gt_num = min(self.num_max_boxes, len(bbox)) num_max = self.num_max_boxes - fields = context['fields'] if context else [] pad_bbox = np.zeros((num_max, 4), dtype=np.float32) if gt_num > 0: pad_bbox[:gt_num, :] = bbox[:gt_num, :] sample['gt_bbox'] = pad_bbox - if 'gt_class' in fields: + if 'gt_class' in sample.keys(): pad_class = np.zeros((num_max), dtype=np.int32) if gt_num > 0: pad_class[:gt_num] = sample['gt_class'][:gt_num, 0] sample['gt_class'] = pad_class - if 'gt_score' in fields: + if 'gt_score' in sample.keys(): pad_score = np.zeros((num_max), dtype=np.float32) if gt_num > 0: pad_score[:gt_num] = sample['gt_score'][:gt_num, 0] @@ -2009,7 +2082,7 @@ class PadBox(BaseOperator): # in training, for example in op ExpandImage, # the bbox and gt_class is expandded, but the difficult is not, # so, judging by it's length - if 'is_difficult' in fields: + if 'is_difficult' in sample.keys(): pad_diff = np.zeros((num_max), dtype=np.int32) if gt_num > 0: pad_diff[:gt_num] = sample['difficult'][:gt_num, 0] @@ -2026,7 +2099,9 @@ class BboxXYXY2XYWH(BaseOperator): def __init__(self): super(BboxXYXY2XYWH, self).__init__() - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): assert 'gt_bbox' in sample bbox = sample['gt_bbox'] bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2] @@ -2050,7 +2125,9 @@ class Lighting(BaseOperator): self.eigval = np.array(eigval).astype('float32') self.eigvec = np.array(eigvec).astype('float32') - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): alpha = np.random.normal(scale=self.alphastd, size=(3, )) sample['image'] += np.dot(self.eigvec, self.eigval * alpha) return sample @@ -2088,7 +2165,9 @@ class CornerTarget(BaseOperator): self.gaussian_iou = gaussian_iou self.max_tag_len = max_tag_len - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): tl_heatmaps = np.zeros( (self.num_classes, self.output_size[0], self.output_size[1]), dtype=np.float32) @@ -2185,7 +2264,9 @@ class CornerCrop(BaseOperator): self.is_train = is_train self.input_size = input_size - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): im_h, im_w = int(sample['h']), int(sample['w']) if self.is_train: scale = np.random.choice(self.random_scales) @@ -2259,7 +2340,9 @@ class CornerRatio(BaseOperator): self.input_size = input_size self.output_size = output_size - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): scale = (self.input_size + 1) // self.output_size out_height, out_width = (sample['h'] + 1) // scale, ( sample['w'] + 1) // scale @@ -2289,7 +2372,9 @@ class RandomScaledCrop(BaseOperator): self.scale_range = scale_range self.interp = interp - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): w = sample['w'] h = sample['h'] random_scale = np.random.uniform(*self.scale_range) @@ -2338,7 +2423,9 @@ class ResizeAndPad(BaseOperator): self.target_dim = target_dim self.interp = interp - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): w = sample['w'] h = sample['h'] interp = self.interp @@ -2363,109 +2450,6 @@ class ResizeAndPad(BaseOperator): return sample -@register_op -class TargetAssign(BaseOperator): - """Assign regression target and labels. - Args: - image_size (int or list): input image size, a single integer or list of - [h, w]. Default: 512 - min_level (int): min level of the feature pyramid. Default: 3 - max_level (int): max level of the feature pyramid. Default: 7 - anchor_base_scale (int): base anchor scale. Default: 4 - num_scales (int): number of anchor scales. Default: 3 - aspect_ratios (list): aspect ratios. - Default: [(1, 1), (1.4, 0.7), (0.7, 1.4)] - match_threshold (float): threshold for foreground IoU. Default: 0.5 - """ - - def __init__(self, - image_size=512, - min_level=3, - max_level=7, - anchor_base_scale=4, - num_scales=3, - aspect_ratios=[(1, 1), (1.4, 0.7), (0.7, 1.4)], - match_threshold=0.5): - super(TargetAssign, self).__init__() - assert image_size % 2 ** max_level == 0, \ - "image size should be multiple of the max level stride" - self.image_size = image_size - self.min_level = min_level - self.max_level = max_level - self.anchor_base_scale = anchor_base_scale - self.num_scales = num_scales - self.aspect_ratios = aspect_ratios - self.match_threshold = match_threshold - - @property - def anchors(self): - if not hasattr(self, '_anchors'): - anchor_grid = AnchorGrid(self.image_size, self.min_level, - self.max_level, self.anchor_base_scale, - self.num_scales, self.aspect_ratios) - self._anchors = np.concatenate(anchor_grid.generate()) - return self._anchors - - def iou_matrix(self, a, b): - tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2]) - br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) - area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2) - area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) - area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) - area_o = (area_a[:, np.newaxis] + area_b - area_i) - # return area_i / (area_o + 1e-10) - return np.where(area_i == 0., np.zeros_like(area_i), area_i / area_o) - - def match(self, anchors, gt_boxes): - # XXX put smaller matrix first would be a little bit faster - mat = self.iou_matrix(gt_boxes, anchors) - max_anchor_for_each_gt = mat.argmax(axis=1) - max_for_each_anchor = mat.max(axis=0) - anchor_to_gt = mat.argmax(axis=0) - anchor_to_gt[max_for_each_anchor < self.match_threshold] = -1 - # XXX ensure each gt has at least one anchor assigned, - # see `force_match_for_each_row` in TF implementation - one_hot = np.zeros_like(mat) - one_hot[np.arange(mat.shape[0]), max_anchor_for_each_gt] = 1. - max_anchor_indices = one_hot.sum(axis=0).nonzero()[0] - max_gt_indices = one_hot.argmax(axis=0)[max_anchor_indices] - anchor_to_gt[max_anchor_indices] = max_gt_indices - return anchor_to_gt - - def encode(self, anchors, boxes): - wha = anchors[..., 2:] - anchors[..., :2] + 1 - ca = anchors[..., :2] + wha * .5 - whb = boxes[..., 2:] - boxes[..., :2] + 1 - cb = boxes[..., :2] + whb * .5 - offsets = np.empty_like(anchors) - offsets[..., :2] = (cb - ca) / wha - offsets[..., 2:] = np.log(whb / wha) - return offsets - - def __call__(self, sample, context=None): - gt_boxes = sample['gt_bbox'] - gt_labels = sample['gt_class'] - labels = np.full((self.anchors.shape[0], 1), 0, dtype=np.int32) - targets = np.full((self.anchors.shape[0], 4), 0., dtype=np.float32) - sample['gt_label'] = labels - sample['gt_target'] = targets - - if len(gt_boxes) < 1: - sample['fg_num'] = np.array(0, dtype=np.int32) - return sample - - anchor_to_gt = self.match(self.anchors, gt_boxes) - matched_indices = (anchor_to_gt >= 0).nonzero()[0] - labels[matched_indices] = gt_labels[anchor_to_gt[matched_indices]] - - matched_boxes = gt_boxes[anchor_to_gt[matched_indices]] - matched_anchors = self.anchors[matched_indices] - matched_targets = self.encode(matched_anchors, matched_boxes) - targets[matched_indices] = matched_targets - sample['fg_num'] = np.array(len(matched_targets), dtype=np.int32) - return sample - - @register_op class DebugVisibleImage(BaseOperator): """ @@ -2482,7 +2466,9 @@ class DebugVisibleImage(BaseOperator): if not isinstance(self.is_normalized, bool): raise TypeError("{}: input type is invalid.".format(self)) - def __call__(self, sample, context=None): + def __call__( + self, + sample, ): image = Image.open(sample['im_file']).convert('RGB') out_file_name = sample['im_file'].split('/')[-1] width = sample['w'] diff --git a/ppdet/modeling/architecture/mask_rcnn.py b/ppdet/modeling/architecture/mask_rcnn.py index 6880a55f9..608bfd5a3 100644 --- a/ppdet/modeling/architecture/mask_rcnn.py +++ b/ppdet/modeling/architecture/mask_rcnn.py @@ -45,7 +45,7 @@ class MaskRCNN(BaseArch): def model_arch(self): # Backbone body_feats = self.backbone(self.inputs) - spatial_scale = None + spatial_scale = 1. / 16 # Neck if self.neck is not None: diff --git a/ppdet/modeling/architecture/meta_arch.py b/ppdet/modeling/architecture/meta_arch.py index b758e8163..8be6b663d 100644 --- a/ppdet/modeling/architecture/meta_arch.py +++ b/ppdet/modeling/architecture/meta_arch.py @@ -29,20 +29,14 @@ class BaseArch(Layer): raise "Now, only support train or infer mode!" return out - def build_inputs(self, data, input_def): - inputs = {} - for name in input_def: - inputs[name] = [] - batch_size = len(data) - for bs in range(batch_size): - for name, input in zip(input_def, data[bs]): - input_v = np.array(input)[np.newaxis, ...] - inputs[name].append(input_v) - for name in input_def: - inputs[name] = to_variable(np.concatenate(inputs[name])) - return inputs - - def model_arch(self, mode): + def build_inputs(self, inputs, inputs_keys): + out = {} + for i, k in enumerate(inputs_keys): + v = to_variable(inputs[i]) + out[k] = v + return out + + def model_arch(self, ): raise NotImplementedError("Should implement model_arch method!") def loss(self, ): diff --git a/ppdet/modeling/head/mask_head.py b/ppdet/modeling/head/mask_head.py index 3ab92daa4..d22292650 100644 --- a/ppdet/modeling/head/mask_head.py +++ b/ppdet/modeling/head/mask_head.py @@ -13,7 +13,7 @@ class MaskFeat(Layer): __inject__ = ['mask_roi_extractor'] def __init__(self, - mask_roi_extractor, + mask_roi_extractor=None, num_convs=1, feat_in=2048, feat_out=256, diff --git a/ppdet/modeling/mask.py b/ppdet/modeling/mask.py index e8dcf20e3..1968edc1d 100644 --- a/ppdet/modeling/mask.py +++ b/ppdet/modeling/mask.py @@ -47,7 +47,7 @@ class Mask(object): im_info=inputs['im_info'], gt_classes=inputs['gt_class'], is_crowd=inputs['is_crowd'], - gt_segms=inputs['gt_mask'], + gt_segms=inputs['gt_poly'], rois=proposals, rois_num=proposals_num, labels_int32=labels_int32) diff --git a/ppdet/optimizer.py b/ppdet/optimizer.py index 2016cda13..186a1b236 100644 --- a/ppdet/optimizer.py +++ b/ppdet/optimizer.py @@ -1,17 +1,3 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -28,12 +14,13 @@ from paddle.fluid.layers.ops import cos from ppdet.core.workspace import register, serializable -__all__ = ['LearningRate', 'OptimizerBuilder'] +__all__ = ['Optimize'] logger = logging.getLogger(__name__) @serializable +@register class PiecewiseDecay(object): """ Multi step learning rate decay @@ -43,7 +30,7 @@ class PiecewiseDecay(object): milestones (list): steps at which to decay learning rate """ - def __init__(self, gamma=[0.1, 0.01], milestones=[60000, 80000]): + def __init__(self, gamma=[0.1, 0.01], milestones=[8, 11]): super(PiecewiseDecay, self).__init__() if type(gamma) is not list: self.gamma = [] @@ -53,9 +40,13 @@ class PiecewiseDecay(object): self.gamma = gamma self.milestones = milestones - def __call__(self, base_lr=None, boundary=None, value=None): + def __call__(self, + base_lr=None, + boundary=None, + value=None, + step_per_epoch=None): if boundary is not None: - boundary.extend(self.milestones) + boundary.extend(self.milestones * int(step_per_epoch)) if value is not None: for i in self.gamma: @@ -65,6 +56,7 @@ class PiecewiseDecay(object): @serializable +@register class LinearWarmup(object): """ Warm up learning rate linearly @@ -89,11 +81,14 @@ class LinearWarmup(object): value.append(lr) if i > 0: boundary.append(i) + boundary.append(self.steps) + value.append(base_lr) return boundary, value +@serializable @register -class LearningRate(object): +class BaseLR(object): """ Learning Rate configuration @@ -101,26 +96,24 @@ class LearningRate(object): base_lr (float): base learning rate schedulers (list): learning rate schedulers """ - __category__ = 'optim' + __inject__ = ['decay', 'warmup'] - def __init__(self, - base_lr=0.01, - schedulers=[PiecewiseDecay(), LinearWarmup()]): - super(LearningRate, self).__init__() + def __init__(self, base_lr=0.01, decay=None, warmup=None): + super(BaseLR, self).__init__() self.base_lr = base_lr - self.schedulers = schedulers + self.decay = decay + self.warmup = warmup - def __call__(self): - # TODO: split warmup & decay + def __call__(self, step_per_epoch): # warmup - boundary, value = self.schedulers[1](self.base_lr) + boundary, value = self.warmup(self.base_lr) # decay - decay_lr = self.schedulers[0](self.base_lr, boundary, value) + decay_lr = self.decay(self.base_lr, boundary, value, step_per_epoch) return decay_lr @register -class OptimizerBuilder(): +class Optimize(): """ Build optimizer handles @@ -129,35 +122,40 @@ class OptimizerBuilder(): optimizer (object): an `Optimizer` instance """ __category__ = 'optim' + __inject__ = ['learning_rate'] def __init__(self, - clip_grad_by_norm=None, - regularizer={'type': 'L2', - 'factor': .0001}, - optimizer={'type': 'Momentum', - 'momentum': .9}): - self.clip_grad_by_norm = clip_grad_by_norm - self.regularizer = regularizer + learning_rate, + optimizer={'name': 'Momentum', + 'momentum': 0.9}, + regularizer={'name': 'L2', + 'factor': 0.0001}, + clip_grad_by_norm=None): + self.learning_rate = learning_rate self.optimizer = optimizer + self.regularizer = regularizer + self.clip_grad_by_norm = clip_grad_by_norm - def __call__(self, learning_rate, params=None): - if self.clip_grad_by_norm is not None: - fluid.clip.set_gradient_clip( - clip=fluid.clip.GradientClipByGlobalNorm( - clip_norm=self.clip_grad_by_norm)) + def __call__(self, params=None, step_per_epoch=1): if self.regularizer: - reg_type = self.regularizer['type'] + 'Decay' + reg_type = self.regularizer['name'] + 'Decay' reg_factor = self.regularizer['factor'] regularization = getattr(regularizer, reg_type)(reg_factor) else: regularization = None + if self.clip_grad_by_norm is not None: + fluid.clip.set_gradient_clip( + clip=fluid.clip.GradientClipByGlobalNorm( + clip_norm=self.clip_grad_by_norm)) + optim_args = self.optimizer.copy() - optim_type = optim_args['type'] - del optim_args['type'] + optim_type = optim_args['name'] + del optim_args['name'] op = getattr(optimizer, optim_type) - return op(learning_rate=learning_rate, + + return op(learning_rate=self.learning_rate(step_per_epoch), parameter_list=params, regularization=regularization, **optim_args) diff --git a/ppdet/py_op/bbox.py b/ppdet/py_op/bbox.py index dc34a77cd..39d929dfc 100755 --- a/ppdet/py_op/bbox.py +++ b/ppdet/py_op/bbox.py @@ -99,7 +99,7 @@ def clip_bbox(boxes, im_shape): @jit -def bbox_overlaps(bboxes1, bboxes2): +def compute_iou(bboxes1, bboxes2): w1 = np.maximum(bboxes1[:, 2] - bboxes1[:, 0] + 1, 0) h1 = np.maximum(bboxes1[:, 3] - bboxes1[:, 1] + 1, 0) w2 = np.maximum(bboxes2[:, 2] - bboxes2[:, 0] + 1, 0) diff --git a/ppdet/py_op/mask.py b/ppdet/py_op/mask.py index 9de446f85..4114bf2bd 100755 --- a/ppdet/py_op/mask.py +++ b/ppdet/py_op/mask.py @@ -122,7 +122,7 @@ def polys_to_boxes(polys): @jit -def bbox_overlaps_mask(boxes, query_boxes): +def compute_iou_mask(boxes, query_boxes): N = boxes.shape[0] K = query_boxes.shape[0] overlaps = np.zeros((N, K), dtype=boxes.dtype) diff --git a/ppdet/py_op/target.py b/ppdet/py_op/target.py index 0a36a3fdc..509c21587 100755 --- a/ppdet/py_op/target.py +++ b/ppdet/py_op/target.py @@ -89,7 +89,7 @@ def generate_rpn_anchor_target(anchors, @jit def label_anchor(anchors, gt_boxes): - iou = bbox_overlaps(anchors, gt_boxes) + iou = compute_iou(anchors, gt_boxes) # every gt's anchor's index gt_bbox_anchor_inds = iou.argmax(axis=0) @@ -249,7 +249,7 @@ def label_bbox(boxes, class_nums=81, is_cascade_rcnn=False): - iou = bbox_overlaps(boxes, gt_boxes) + iou = compute_iou(boxes, gt_boxes) # every roi's gt box's index roi_gt_bbox_inds = np.zeros((boxes.shape[0]), dtype=np.int32) @@ -384,7 +384,7 @@ def sample_mask(boxes, gt_polys, label_int32, gt_classes, is_crowd, num_classes, masks_fg = np.zeros((fg_inds.shape[0], resolution**2), dtype=np.int32) bbox_fg = boxes[fg_inds] - iou = bbox_overlaps_mask(bbox_fg, boxes_from_polys) + iou = compute_iou_mask(bbox_fg, boxes_from_polys) fg_polys_inds = np.argmax(iou, axis=1) for i in range(bbox_fg.shape[0]): diff --git a/tools/eval.py b/tools/eval.py index d437cd7cb..93658a56f 100755 --- a/tools/eval.py +++ b/tools/eval.py @@ -18,7 +18,6 @@ from ppdet.core.workspace import load_config, merge_config, create from ppdet.utils.check import check_gpu, check_version, check_config from ppdet.utils.cli import ArgsParser from ppdet.utils.eval_utils import coco_eval_results -from ppdet.data.reader import create_reader from ppdet.utils.checkpoint import load_dygraph_ckpt, save_dygraph_ckpt @@ -40,7 +39,15 @@ def parse_args(): return args -def run(FLAGS, cfg): +def run(FLAGS, cfg, place): + + if FLAGS.use_gpu: + devices_num = 1 + else: + devices_num = int(os.environ.get('CPU_NUM', 1)) + + # Data + eval_loader, _ = create('EvalReader')(cfg['worker_num'], place) # Model main_arch = cfg.architecture @@ -49,16 +56,9 @@ def run(FLAGS, cfg): # Init Model model = load_dygraph_ckpt(model, ckpt=cfg.weights) - # Data Reader - if FLAGS.use_gpu: - devices_num = 1 - else: - devices_num = int(os.environ.get('CPU_NUM', 1)) - eval_reader = create_reader(cfg.EvalReader, devices_num=devices_num) - # Run Eval outs_res = [] - for iter_id, data in enumerate(eval_reader()): + for iter_id, data in enumerate(eval_loader): start_time = time.time() # forward @@ -82,7 +82,6 @@ def main(): cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) - check_config(cfg) check_gpu(cfg.use_gpu) check_version() @@ -90,7 +89,7 @@ def main(): .dev_id) if cfg.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): - run(FLAGS, cfg) + run(FLAGS, cfg, place) if __name__ == '__main__': diff --git a/tools/train.py b/tools/train.py index 27f13323b..c528e311e 100755 --- a/tools/train.py +++ b/tools/train.py @@ -17,7 +17,6 @@ import numpy as np from collections import deque import paddle.fluid as fluid from ppdet.core.workspace import load_config, merge_config, create -from ppdet.data.reader import create_reader from ppdet.utils.stats import TrainingStats from ppdet.utils.check import check_gpu, check_version, check_config from ppdet.utils.cli import ArgsParser @@ -86,7 +85,7 @@ def parse_args(): return args -def run(FLAGS, cfg): +def run(FLAGS, cfg, place): env = os.environ FLAGS.dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env if FLAGS.dist: @@ -99,13 +98,16 @@ def run(FLAGS, cfg): random.seed(0) np.random.seed(0) + # Data + train_loader, step_per_epoch = create('TrainReader')( + cfg['worker_num'], place, use_prefetch=cfg['use_prefetch']) + # Model main_arch = cfg.architecture model = create(cfg.architecture) # Optimizer - lr = create('LearningRate')() - optimizer = create('OptimizerBuilder')(lr, model.parameters()) + optimizer = create('Optimize')(model.parameters(), step_per_epoch) # Init Model & Optimzer model = load_dygraph_ckpt( @@ -120,65 +122,56 @@ def run(FLAGS, cfg): strategy = fluid.dygraph.parallel.prepare_context() model = fluid.dygraph.parallel.DataParallel(model, strategy) - # Data Reader + # Run Train start_iter = 0 - if cfg.use_gpu: - devices_num = fluid.core.get_cuda_device_count() - else: - devices_num = int(os.environ.get('CPU_NUM', 1)) - - train_reader = create_reader( - cfg.TrainReader, (cfg.max_iters - start_iter), cfg, devices_num=1) - time_stat = deque(maxlen=cfg.log_smooth_window) start_time = time.time() end_time = time.time() - # Run Train - for iter_id, data in enumerate(train_reader()): - - start_time = end_time - end_time = time.time() - time_stat.append(end_time - start_time) - time_cost = np.mean(time_stat) - eta_sec = (cfg.max_iters - iter_id) * time_cost - eta = str(datetime.timedelta(seconds=int(eta_sec))) - - # Model Forward - model.train() - outputs = model(data, cfg['TrainReader']['inputs_def']['fields'], - 'train') - - # Model Backward - loss = outputs['loss'] - if ParallelEnv().nranks > 1: - loss = model.scale_loss(loss) - loss.backward() - model.apply_collective_grads() - else: - loss.backward() - optimizer.minimize(loss) - model.clear_gradients() - curr_lr = optimizer.current_step_lr() - - if ParallelEnv().nranks < 2 or ParallelEnv().local_rank == 0: - # Log state - if iter_id == 0: - train_stats = TrainingStats(cfg.log_smooth_window, - outputs.keys()) - train_stats.update(outputs) - logs = train_stats.log() - if iter_id % cfg.log_iter == 0: - strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format( - iter_id, curr_lr, logs, time_cost, eta) - logger.info(strs) - # Save Stage - if iter_id > 0 and iter_id % int( - cfg.snapshot_iter) == 0 or iter_id == cfg.max_iters - 1: - cfg_name = os.path.basename(FLAGS.config).split('.')[0] - save_name = str( - iter_id) if iter_id != cfg.max_iters - 1 else "model_final" - save_dir = os.path.join(cfg.save_dir, cfg_name, save_name) - save_dygraph_ckpt(model, optimizer, save_dir) + for e_id in range(int(cfg.epoch)): + for iter_id, data in enumerate(train_loader): + start_time = end_time + end_time = time.time() + time_stat.append(end_time - start_time) + time_cost = np.mean(time_stat) + eta_sec = (cfg.epoch * step_per_epoch - iter_id) * time_cost + eta = str(datetime.timedelta(seconds=int(eta_sec))) + + # Model Forward + model.train() + outputs = model(data, cfg['TrainReader']['inputs_def']['fields'], + 'train') + + # Model Backward + loss = outputs['loss'] + if ParallelEnv().nranks > 1: + loss = model.scale_loss(loss) + loss.backward() + model.apply_collective_grads() + else: + loss.backward() + optimizer.minimize(loss) + model.clear_gradients() + curr_lr = optimizer.current_step_lr() + + if ParallelEnv().nranks < 2 or ParallelEnv().local_rank == 0: + # Log state + if iter_id == 0: + train_stats = TrainingStats(cfg.log_smooth_window, + outputs.keys()) + train_stats.update(outputs) + logs = train_stats.log() + if iter_id % cfg.log_iter == 0: + strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format( + iter_id, curr_lr, logs, time_cost, eta) + logger.info(strs) + + # Save Stage + if fluid.dygraph.parallel.Env().local_rank == 0: + cfg_name = os.path.basename(FLAGS.config).split('.')[0] + save_name = str(e_id + 1) if e_id + 1 != int( + cfg.epoch) else "model_final" + save_dir = os.path.join(cfg.save_dir, cfg_name, save_name) + save_dygraph_ckpt(model, optimizer, save_dir) def main(): @@ -186,7 +179,6 @@ def main(): cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) - check_config(cfg) check_gpu(cfg.use_gpu) check_version() @@ -194,7 +186,7 @@ def main(): if cfg.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): - run(FLAGS, cfg) + run(FLAGS, cfg, place) if __name__ == "__main__": -- GitLab