提交 37f7e6ee 编写于 作者: S sunxl1988

test=dygraph reconstruct config and reader

上级 646996f4
architecture: CascadeRCNN
use_gpu: true
max_iters: 180000
log_smooth_window: 50
save_dir: output
snapshot_iter: 10000
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams
metric: COCO
weights: output/cascade_rcnn_r50_1x/model_final
num_classes: 81
num_stages: 3
open_debug: False
# Model Achitecture # Model Achitecture
CascadeRCNN: CascadeRCNN:
# model anchor info flow # model anchor info flow
...@@ -102,24 +89,3 @@ Mask: ...@@ -102,24 +89,3 @@ Mask:
resolution: 14 resolution: 14
mask_post_process: mask_post_process:
name: MaskPostProcess name: MaskPostProcess
# Train
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
_READER_: 'mask_reader.yml'
architecture: FasterRCNN
use_gpu: true
max_iters: 180000
log_smooth_window: 50
save_dir: output
snapshot_iter: 10000
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams
metric: COCO
weights: output/faster_rcnn_r50_1x/model_final
num_classes: 81
open_debug: False
# Model Achitecture # Model Achitecture
FasterRCNN: FasterRCNN:
# model anchor info flow # model anchor info flow
...@@ -84,24 +72,3 @@ Proposal: ...@@ -84,24 +72,3 @@ Proposal:
keep_top_k: 100 keep_top_k: 100
score_threshold: 0.05 score_threshold: 0.05
nms_threshold: 0.5 nms_threshold: 0.5
# Train
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
_READER_: 'faster_reader.yml'
architecture: MaskRCNN
use_gpu: true
max_iters: 180000
log_smooth_window: 50
save_dir: output
snapshot_iter: 10000
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams
metric: COCO
weights: output/mask_rcnn_r50_1x/model_final
num_classes: 81
open_debug: False
# Model Achitecture # Model Achitecture
MaskRCNN: MaskRCNN:
# model anchor info flow # model anchor info flow
...@@ -23,9 +11,12 @@ MaskRCNN: ...@@ -23,9 +11,12 @@ MaskRCNN:
mask_head: MaskHead mask_head: MaskHead
ResNet: ResNet:
norm_type: 'affine' # index 0 stands for res2
depth: 50 depth: 50
freeze_at: 'res2' norm_type: bn
freeze_at: 0
return_idx: [2]
num_stages: 3
RPNHead: RPNHead:
rpn_feat: rpn_feat:
...@@ -41,20 +32,23 @@ BBoxHead: ...@@ -41,20 +32,23 @@ BBoxHead:
name: RoIExtractor name: RoIExtractor
resolution: 14 resolution: 14
sampling_ratio: 0 sampling_ratio: 0
spatial_scale: 0.0625 start_level: 0
extractor_type: 'RoIAlign' end_level: 0
feat_in: 1024 head_feat:
feat_out: 512 name: Res5Feat
feat_in: 1024
feat_out: 512
with_pool: true
in_feat: 2048
MaskHead: MaskHead:
mask_feat: mask_feat:
name: MaskFeat name: MaskFeat
num_convs: 0
feat_in: 2048 feat_in: 2048
feat_out: 256 feat_out: 256
mask_stages: 1 share_bbox_feat: true
feat_in: 256 feat_in: 256
resolution: 14
mask_stages: 1
AnchorRPN: AnchorRPN:
anchor_generator: anchor_generator:
...@@ -80,7 +74,6 @@ Proposal: ...@@ -80,7 +74,6 @@ Proposal:
train_post_nms_top_n: 2000 train_post_nms_top_n: 2000
infer_pre_nms_top_n: 12000 infer_pre_nms_top_n: 12000
infer_post_nms_top_n: 2000 infer_post_nms_top_n: 2000
return_rois_num: True
proposal_target_generator: proposal_target_generator:
name: ProposalTargetGenerator name: ProposalTargetGenerator
batch_size_per_im: 512 batch_size_per_im: 512
...@@ -101,27 +94,7 @@ Proposal: ...@@ -101,27 +94,7 @@ Proposal:
Mask: Mask:
mask_target_generator: mask_target_generator:
name: MaskTargetGenerator name: MaskTargetGenerator
resolution: 14 mask_resolution: 14
mask_post_process: mask_post_process:
name: MaskPostProcess name: MaskPostProcess
mask_resolution: 14
# Train
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
_READER_: 'mask_reader.yml'
architecture: MaskRCNN
use_gpu: true
max_iters: 180000
log_smooth_window: 20
save_dir: output
snapshot_iter: 10000
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
metric: COCO
weights: output/mask_rcnn_r50_fpn_1x/model_final
num_classes: 81
load_static_weights: True
# Model Achitecture # Model Achitecture
MaskRCNN: MaskRCNN:
# model anchor info flow # model anchor info flow
...@@ -38,7 +26,6 @@ FPN: ...@@ -38,7 +26,6 @@ FPN:
max_level: 4 max_level: 4
spatial_scale: [0.25, 0.125, 0.0625, 0.03125] spatial_scale: [0.25, 0.125, 0.0625, 0.03125]
RPNHead: RPNHead:
rpn_feat: rpn_feat:
name: RPNFeat name: RPNFeat
...@@ -55,10 +42,10 @@ BBoxHead: ...@@ -55,10 +42,10 @@ BBoxHead:
resolution: 7 resolution: 7
sampling_ratio: 2 sampling_ratio: 2
head_feat: head_feat:
name: TwoFCHead name: TwoFCFeat
in_dim: 256 in_dim: 256
mlp_dim: 1024 mlp_dim: 1024
in_feat: 1024 #in_feat: 1024
MaskHead: MaskHead:
mask_feat: mask_feat:
...@@ -78,7 +65,7 @@ AnchorRPN: ...@@ -78,7 +65,7 @@ AnchorRPN:
name: AnchorGeneratorRPN name: AnchorGeneratorRPN
aspect_ratios: [0.5, 1.0, 2.0] aspect_ratios: [0.5, 1.0, 2.0]
anchor_start_size: 32 anchor_start_size: 32
stride: [4., 4.] stride: [4.0, 4.0]
anchor_target_generator: anchor_target_generator:
name: AnchorTargetGeneratorRPN name: AnchorTargetGeneratorRPN
batch_size_per_im: 256 batch_size_per_im: 256
...@@ -120,25 +107,3 @@ Mask: ...@@ -120,25 +107,3 @@ Mask:
mask_post_process: mask_post_process:
name: MaskPostProcess name: MaskPostProcess
mask_resolution: 28 mask_resolution: 28
# Train
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
- !LinearWarmup
start_factor: 0.3333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
_READER_: 'mask_reader.yml'
architecture: YOLOv3
use_gpu: true
max_iters: 500000
log_smooth_window: 20
save_dir: output
snapshot_iter: 10000
metric: COCO
pretrain_weights: https://paddlemodels.bj.bcebos.com/yolo/darknet53.pdparams
weights: output/yolov3_darknet/model_final
num_classes: 80
use_fine_grained_loss: false
open_debug: False
YOLOv3: YOLOv3:
anchor: AnchorYOLO anchor: AnchorYOLO
backbone: DarkNet backbone: DarkNet
...@@ -51,25 +38,3 @@ AnchorYOLO: ...@@ -51,25 +38,3 @@ AnchorYOLO:
nms_top_k: 1000 nms_top_k: 1000
normalized: false normalized: false
background_label: -1 background_label: -1
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 400000
- 450000
- !LinearWarmup
start_factor: 0.
steps: 4000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
_READER_: 'yolov3_reader.yml'
architecture: CascadeRCNN
num_stages: 3
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams
weights: output/cascade_rcnn_r50_1x/model_final
use_gpu: true
epoch: 24
worker_num: 0
use_prefetch: False
log_smooth_window: 20
save_dir: output
metric: COCO
num_classes: 81
open_debug: False
_READER_: '../reader/mask_rcnn.yml'
_ARCHITECHTURE_: '../architechture/cascade_mask_rcnn.yml'
_OPTIMIZE_: '../optimize/rcnn.yml'
architecture: FasterRCNN
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams
weights: output/faster_rcnn_r50_1x/model_final
use_gpu: true
worker_num: 0
use_prefetch: False
epoch: 24
log_smooth_window: 20
save_dir: output
metric: COCO
num_classes: 81
open_debug: False
_READER_: '../reader/faster_rcnn.yml'
_ARCHITECHTURE_: '../architechture/faster_rcnn.yml'
_OPTIMIZE_: '../optimize/rcnn.yml'
architecture: MaskRCNN
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams
weights: output/mask_rcnn_r50_1x/model_final
use_gpu: true
epoch: 24
use_prefetch: False
worker_num: 0
log_smooth_window: 20
save_dir: output
metric: COCO
num_classes: 81
load_static_weights: true
_READER_: '../reader/mask_rcnn.yml'
_ARCHITECHTURE_: '../architechture/mask_rcnn.yml'
_OPTIMIZE_: '../optimize/rcnn.yml'
architecture: MaskRCNN
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/resnet50.pdparams
weights: output/mask_rcnn_r50_1x/model_final
use_gpu: true
epoch: 24
use_prefetch: False
worker_num: 0
log_smooth_window: 20
save_dir: output
metric: COCO
num_classes: 81
load_static_weights: true
_READER_: '../reader/mask_rcnn.yml'
_ARCHITECHTURE_: '../architechture/mask_rcnn_fpn.yml'
_OPTIMIZE_: '../optimize/rcnn.yml'
architecture: YOLOv3
pretrain_weights: https://paddlemodels.bj.bcebos.com/yolo/darknet53.pdparams
weights: output/yolov3_darknet/model_final
use_gpu: true
worker_num: 0
use_prefetch: False
epoch: 300
log_smooth_window: 20
save_dir: output
metric: COCO
num_classes: 80
use_fine_grained_loss: false
open_debug: False
_READER_: '../reader/yolo.yml'
_ARCHITECHTURE_: '../architechture/yolov3.yml'
_OPTIMIZE_: '../optimize/yolo.yml'
_BASE_: "configs/base/mask_rcnn_r50_1x.yml"
use_gpu: true
worker_num: 0
epoch: 24
log_smooth_window: 20
save_dir: output
metric: COCO
num_classes: 81
TrainReader:
inputs_def:
fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
dataset:
name: COCODataset
dataset_dir: /home/ai/dataset/COCO17
_BASE_: "configs/base/mask_rcnn_r50_fpn_1x.yml"
use_gpu: true
worker_num: 0
epoch: 24
log_smooth_window: 20
save_dir: output
metric: COCO
num_classes: 81
weights: output/mask_r50_fpn_1x/model_final.pdparams
EvalReader:
dataset:
dataset_dir: /home/ai/dataset/COCO17
Optimize:
learning_rate:
name: BaseLR
base_lr: 0.01
decay:
name: PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
warmup:
name: LinearWarmup
start_factor: 0.3333333333333333
steps: 500
optimizer:
name: Momentum
momentum: 0.9
regularizer:
name: L2
factor: 0.0001
Optimize:
learning_rate:
name: BaseLR
base_lr: 0.01
decay:
name: PiecewiseDecay
gamma: 0.1
milestones: [200, 250]
warmup:
name: LinearWarmup
start_factor: 0.
steps: 4000
optimizer:
name: Momentum
momentum: 0.9
regularizer:
name: L2
factor: 0.0005
...@@ -2,94 +2,51 @@ TrainReader: ...@@ -2,94 +2,51 @@ TrainReader:
inputs_def: inputs_def:
fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'] fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd']
dataset: dataset:
!COCODataSet name: COCODataset
dataset_dir: /home/ai/dataset/COCO17/
image_dir: train2017 image_dir: train2017
anno_path: annotations/instances_train2017.json anno_path: annotations/instances_train2017.json
dataset_dir: dataset/coco
sample_transforms: sample_transforms:
- !DecodeImage - DecodeImage: {to_rgb: true}
to_rgb: True - RandomFlipImage: {prob: 0.5}
- !RandomFlipImage - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
prob: 0.5 - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true}
- !NormalizeImage - Permute: {to_bgr: false, channel_first: true}
is_channel_first: false
is_scale: true
mean: [0.485,0.456,0.406]
std: [0.229, 0.224,0.225]
- !ResizeImage
target_size: 800
max_size: 1333
interp: 1
use_cv2: true
- !Permute
to_bgr: false
channel_first: true
batch_transforms: batch_transforms:
- !PadBatch - PadBatch: {pad_to_stride: 0, use_padded_im_info: false, pad_gt: True}
pad_to_stride: 0
use_padded_im_info: False
pad_gt: true
batch_size: 1 batch_size: 1
shuffle: true shuffle: true
worker_num: 2
use_process: false
EvalReader: EvalReader:
inputs_def: inputs_def:
fields: ['image', 'im_info', 'im_id', 'im_shape'] fields: ['image', 'im_info', 'im_id', 'im_shape']
dataset: dataset:
!COCODataSet name: COCODataset
image_dir: val2017 image_dir: val2017
anno_path: annotations/instances_val2017.json anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco dataset_dir: /home/ai/dataset/COCO17
sample_transforms: sample_transforms:
- !DecodeImage - DecodeImage: {to_rgb: true}
to_rgb: true - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- !NormalizeImage - ResizeImage: {interp: 1, max_size: 1333, target_size: 800, use_cv2: true}
is_channel_first: false - Permute: {channel_first: true, to_bgr: false}
is_scale: true
mean: [0.485,0.456,0.406]
std: [0.229, 0.224,0.225]
- !ResizeImage
interp: 1
max_size: 1333
target_size: 800
use_cv2: true
- !Permute
channel_first: true
to_bgr: false
batch_transforms: batch_transforms:
- !PadBatch - PadBatch: {pad_to_stride: 0, use_padded_im_info: false, pad_gt: True}
pad_to_stride: 32
use_padded_im_info: false
pad_gt: True
batch_size: 2 batch_size: 2
shuffle: false shuffle: false
drop_empty: false drop_empty: false
worker_num: 2
TestReader: TestReader:
inputs_def: inputs_def:
fields: ['image', 'im_info', 'im_id', 'im_shape'] fields: ['image', 'im_info', 'im_id', 'im_shape']
dataset: dataset:
!ImageFolder name: ImageFolder
anno_path: annotations/instances_val2017.json anno_path: annotations/instances_val2017.json
sample_transforms: sample_transforms:
- !DecodeImage - DecodeImage: {to_rgb: true, with_mixup: false}
to_rgb: true - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
with_mixup: false - ResizeImage: {interp: 1, max_size: 1333, target_size: 800, use_cv2: true}
- !NormalizeImage - Permute: {channel_first: true, to_bgr: false}
is_channel_first: false
is_scale: true
mean: [0.485,0.456,0.406]
std: [0.229, 0.224,0.225]
- !ResizeImage
interp: 1
max_size: 1333
target_size: 800
use_cv2: true
- !Permute
channel_first: true
to_bgr: false
batch_size: 1 batch_size: 1
shuffle: false shuffle: false
drop_last: false
worker_num: 0
use_prefetch: False
TrainReader: TrainReader:
inputs_def: inputs_def:
fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_mask'] fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
dataset: dataset:
!COCODataSet name: COCODataset
dataset_dir: dataset/coco
image_dir: train2017 image_dir: train2017
anno_path: annotations/instances_train2017.json anno_path: annotations/instances_train2017.json
dataset_dir: dataset/coco
sample_transforms: sample_transforms:
- !DecodeImage - DecodeImage: {to_rgb: true}
to_rgb: true - RandomFlipImage: {prob: 0.5, is_mask_flip: true}
- !RandomFlipImage - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
prob: 0.5 - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true}
is_mask_flip: true - Permute: {to_bgr: false, channel_first: true}
- !NormalizeImage
is_channel_first: false
is_scale: true
mean: [0.485,0.456,0.406]
std: [0.229, 0.224,0.225]
- !ResizeImage
target_size: 800
max_size: 1333
interp: 1
use_cv2: true
- !Permute
to_bgr: false
channel_first: true
batch_transforms: batch_transforms:
- !PadBatch - PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: True}
pad_to_stride: 32
use_padded_im_info: false
pad_gt: True
batch_size: 1 batch_size: 1
shuffle: true shuffle: true
worker_num: 2
drop_last: false drop_last: false
use_process: false
EvalReader: EvalReader:
inputs_def: inputs_def:
fields: ['image', 'im_info', 'im_id', 'im_shape'] fields: ['image', 'im_info', 'im_id', 'im_shape']
dataset: dataset:
!COCODataSet name: COCODataset
dataset_dir: dataset/coco
image_dir: val2017 image_dir: val2017
anno_path: annotations/instances_val2017.json anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco
sample_transforms: sample_transforms:
- !DecodeImage - DecodeImage: {to_rgb: true}
to_rgb: true - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- !NormalizeImage - ResizeImage: {interp: 1, max_size: 1333, target_size: 800, use_cv2: true}
is_channel_first: false - Permute: {channel_first: true, to_bgr: false}
is_scale: true
mean: [0.485,0.456,0.406]
std: [0.229, 0.224,0.225]
- !ResizeImage
interp: 1
max_size: 1333
target_size: 800
use_cv2: true
- !Permute
channel_first: true
to_bgr: false
batch_transforms: batch_transforms:
- !PadBatch - PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: True}
pad_to_stride: 32
use_padded_im_info: false
pad_gt: True
batch_size: 1 batch_size: 1
shuffle: false shuffle: false
drop_last: false drop_last: false
drop_empty: false drop_empty: false
worker_num: 2
TestReader: TestReader:
inputs_def: inputs_def:
fields: ['image', 'im_info', 'im_id', 'im_shape'] fields: ['image', 'im_info', 'im_id', 'im_shape']
dataset: dataset:
!ImageFolder name: ImageFolder
anno_path: annotations/instances_val2017.json anno_path: annotations/instances_val2017.json
sample_transforms: sample_transforms:
- !DecodeImage - DecodeImage: {to_rgb: true, with_mixup: false}
to_rgb: true - NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
with_mixup: false - ResizeImage: {interp: 1, max_size: 1333, target_size: 800, use_cv2: true}
- !NormalizeImage - Permute: {channel_first: true, to_bgr: false}
is_channel_first: false
is_scale: true
mean: [0.485,0.456,0.406]
std: [0.229, 0.224,0.225]
- !ResizeImage
interp: 1
max_size: 1333
target_size: 800
use_cv2: true
- !Permute
channel_first: true
to_bgr: false
batch_size: 1 batch_size: 1
shuffle: false shuffle: false
drop_last: false drop_last: false
worker_num: 0
use_prefetch: False
TrainReader:
inputs_def:
fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
num_max_boxes: 50
dataset:
name: COCODataset
dataset_dir: dataset/coco
image_dir: train2017
anno_path: annotations/instances_train2017.json
with_background: false
sample_transforms:
- DecodeImage: {to_rgb: True, with_mixup: True}
- MixupImage: {alpha: 1.5, beta: 1.5}
- ColorDistort: {}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {}
- RandomFlipImage: {is_normalized: false}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 50}
- BboxXYXY2XYWH: {}
batch_transforms:
- RandomShape: {sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_inter: True}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True, is_channel_first: false}
- Permute: {to_bgr: false, channel_first: True}
# Gt2YoloTarget is only used when use_fine_grained_loss set as true,
# this operator will be deleted automatically if use_fine_grained_loss
# is set as false
- Gt2YoloTarget: {
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]],
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]],
downsample_ratios: [32, 16, 8]}
batch_size: 8
shuffle: true
drop_last: true
EvalReader:
inputs_def:
fields: ['image', 'im_size', 'im_id']
num_max_boxes: 50
dataset:
name: COCODataset
dataset_dir: dataset/coco
image_dir: val2017
anno_path: annotations/instances_val2017.json
with_background: false
sample_transforms:
- DecodeImage: {to_rgb: True}
- ResizeImage: {target_size: 608, interp: 2}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True, is_channel_first: false}
- PadBox: {num_max_boxes: 50}
- Permute: {to_bgr: false, channel_first: True}
batch_size: 8
drop_empty: false
TestReader:
inputs_def:
image_shape: [3, 608, 608]
fields: ['image', 'im_size', 'im_id']
dataset:
name: ImageFolder
anno_path: annotations/instances_val2017.json
with_background: false
sample_transforms:
- DecodeImage: {to_rgb: True}
- ResizeImage: {target_size: 608, interp: 2}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True, is_channel_first: false}
- Permute: {to_bgr: false, channel_first: True}
batch_size: 1
TrainReader:
inputs_def:
fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
num_max_boxes: 50
dataset:
!COCODataSet
image_dir: train2017
anno_path: annotations/instances_train2017.json
dataset_dir: dataset/coco
with_background: false
sample_transforms:
- !DecodeImage
to_rgb: True
with_mixup: True
- !MixupImage
alpha: 1.5
beta: 1.5
- !ColorDistort {}
- !RandomExpand
fill_value: [123.675, 116.28, 103.53]
- !RandomCrop {}
- !RandomFlipImage
is_normalized: false
- !NormalizeBox {}
- !PadBox
num_max_boxes: 50
- !BboxXYXY2XYWH {}
batch_transforms:
- !RandomShape
sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
random_inter: True
- !NormalizeImage
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
is_scale: True
is_channel_first: false
- !Permute
to_bgr: false
channel_first: True
# Gt2YoloTarget is only used when use_fine_grained_loss set as true,
# this operator will be deleted automatically if use_fine_grained_loss
# is set as false
- !Gt2YoloTarget
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
downsample_ratios: [32, 16, 8]
batch_size: 8
shuffle: true
mixup_epoch: 250
drop_last: true
worker_num: 8
bufsize: 16
use_process: true
EvalReader:
inputs_def:
fields: ['image', 'im_size', 'im_id']
num_max_boxes: 50
dataset:
!COCODataSet
image_dir: val2017
anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco
with_background: false
sample_transforms:
- !DecodeImage
to_rgb: True
- !ResizeImage
target_size: 608
interp: 2
- !NormalizeImage
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
is_scale: True
is_channel_first: false
- !PadBox
num_max_boxes: 50
- !Permute
to_bgr: false
channel_first: True
batch_size: 8
drop_empty: false
worker_num: 8
bufsize: 16
TestReader:
inputs_def:
image_shape: [3, 608, 608]
fields: ['image', 'im_size', 'im_id']
dataset:
!ImageFolder
anno_path: annotations/instances_val2017.json
with_background: false
sample_transforms:
- !DecodeImage
to_rgb: True
- !ResizeImage
target_size: 608
interp: 2
- !NormalizeImage
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
is_scale: True
is_channel_first: false
- !Permute
to_bgr: false
channel_first: True
batch_size: 1
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from .reader import *
from .source import * from .source import *
from .transform import * from .transform import *
from .sampler import *
from .loader import *
import copy
import traceback
import logging
import threading
import sys
if sys.version_info >= (3, 0):
import queue as Queue
else:
import Queue
import numpy as np
from paddle.io import DataLoader
from ppdet.core.workspace import register, serializable, create
from .sampler import DistributedBatchSampler
from .transform import operators
from .transform import batch_operators
logger = logging.getLogger(__name__)
class Compose(object):
def __init__(self, transforms, fields=None, from_=operators,
num_classes=81):
self.transforms = transforms
self.transforms_cls = []
for t in self.transforms:
for k, v in t.items():
print(k, v)
op_cls = getattr(from_, k)
self.transforms_cls.append(op_cls(**v))
if hasattr(op_cls, 'num_classes'):
op_cls.num_classes = num_classes
self.fields = fields
def __call__(self, data):
if self.fields is not None:
data_new = []
for item in data:
data_new.append(dict(zip(self.fields, item)))
data = data_new
for f in self.transforms_cls:
try:
data = f(data)
except Exception as e:
stack_info = traceback.format_exc()
logger.warn("fail to map op [{}] with error: {} and stack:\n{}".
format(f, e, str(stack_info)))
raise e
if self.fields is not None:
data_new = []
for item in data:
batch = []
for k in self.fields:
batch.append(item[k])
data_new.append(batch)
batch_size = len(data_new)
data_new = list(zip(*data_new))
if batch_size > 1:
data = [
np.array(item).astype(item[0].dtype) for item in data_new
]
else:
data = data_new
return data
class Prefetcher(threading.Thread):
def __init__(self, iterator, prefetch_num=1):
threading.Thread.__init__(self)
self.queue = Queue.Queue(prefetch_num)
self.iterator = iterator
self.daemon = True
self.start()
def run(self):
for item in self.iterator:
self.queue.put(item)
self.queue.put(None)
def next(self):
next_item = self.queue.get()
if next_item is None:
raise StopIteration
return next_item
# Python 3 compatibility
def __next__(self):
return self.next()
def __iter__(self):
return self
class DataLoaderPrefetch(DataLoader):
def __init__(self,
dataset,
batch_sampler,
collate_fn,
num_workers,
places,
return_list,
prefetch_num=1):
super(DataLoaderPrefetch, self).__init__(
dataset=dataset,
batch_sampler=batch_sampler,
collate_fn=collate_fn,
num_workers=num_workers,
places=places,
return_list=return_list)
self.prefetch_num = prefetch_num
def __iter__(self):
return Prefetcher(super().__iter__(), self.prefetch_num)
class BaseDataLoader(object):
__share__ = ['num_classes']
__inject__ = ['dataset']
def __init__(self,
inputs_def=None,
dataset=None,
sample_transforms=None,
batch_transforms=None,
batch_size=1,
shuffle=False,
drop_last=False,
drop_empty=True,
num_classes=81):
# dataset
self._dataset = dataset #create(dataset['name'])
self._dataset.parse_dataset()
# out fields
self._fields = copy.deepcopy(inputs_def[
'fields']) if inputs_def else None
# sample transform
self._sample_transforms = Compose(
sample_transforms, num_classes=num_classes)
# get data
self._dataset.set_out(self._sample_transforms, self._fields)
# batch transfrom
if batch_transforms:
self._batch_transforms = Compose(batch_transforms, self._fields,
batch_operators, num_classes)
# batch sampler
self._batch_sampler = DistributedBatchSampler(
self._dataset,
batch_size=batch_size,
shuffle=shuffle,
drop_last=drop_last)
self.batch_size = batch_size
def __call__(self,
worker_num,
device,
return_list=False,
use_prefetch=False,
prefetch_num=None):
if use_prefetch:
loader = DataLoaderPrefetch(
dataset=self._dataset,
batch_sampler=self._batch_sampler,
collate_fn=self._batch_transforms,
num_workers=worker_num,
places=device,
return_list=return_list,
prefetch_num=prefetch_num
if prefetch_num is not None else self.batch_size)
else:
loader = DataLoader(
dataset=self._dataset,
batch_sampler=self._batch_sampler,
collate_fn=self._batch_transforms,
num_workers=worker_num,
places=device,
return_list=return_list)
return loader, len(self._batch_sampler)
@register
class TrainReader(BaseDataLoader):
def __init__(self,
inputs_def=None,
dataset=None,
sample_transforms=None,
batch_transforms=None,
batch_size=1,
shuffle=False,
drop_last=False,
drop_empty=True,
num_classes=81):
super(TrainReader, self).__init__(
inputs_def, dataset, sample_transforms, batch_transforms,
batch_size, shuffle, drop_last, drop_empty, num_classes)
@register
class EvalReader(BaseDataLoader):
def __init__(self,
inputs_def=None,
dataset=None,
sample_transforms=None,
batch_transforms=None,
batch_size=1,
shuffle=False,
drop_last=False,
drop_empty=True,
num_classes=81):
super(EvalReader, self).__init__(inputs_def, dataset, sample_transforms,
batch_transforms, batch_size, shuffle,
drop_last, drop_empty, num_classes)
@register
class TestReader(BaseDataLoader):
def __init__(self,
inputs_def=None,
dataset=None,
sample_transforms=None,
batch_transforms=None,
batch_size=1,
shuffle=False,
drop_last=False,
drop_empty=True,
num_classes=81):
super(TestReader, self).__init__(inputs_def, dataset, sample_transforms,
batch_transforms, batch_size, shuffle,
drop_last, drop_empty, num_classes)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# function:
# transform samples in 'source' using 'worker'
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import six
if six.PY3:
from queue import Empty
else:
from Queue import Empty
import uuid
import logging
import signal
import threading
import traceback
logger = logging.getLogger(__name__)
main_pid = os.getpid()
worker_set = set()
class EndSignal(object):
""" signal used to notify worker to exit
"""
def __init__(self, id, errno=0, errmsg=''):
self.id = id
self.errno = errno
self.errmsg = errmsg
class ParallelMap(object):
"""
Transform samples to mapped samples which is similar to
'basic.MappedDataset', but multiple workers (threads or processes)
will be used
Notes:
this class is not thread-safe
"""
def __init__(self,
source,
worker,
worker_num,
bufsize=100,
use_process=False,
memsize='3G'):
self._worker_num = worker_num
self._bufsize = bufsize
self._use_process = use_process
if self._use_process and sys.platform == "win32":
logger.debug("Use multi-thread reader instead of "
"multi-process reader on Windows.")
self._use_process = False
if self._use_process and type(memsize) is str:
assert memsize[-1].lower() in ['g', 'm'], \
"invalid param for memsize[%s], should be " \
"ended with 'G' or 'g' or 'M' or 'm'" % (memsize)
power = 3 if memsize[-1].lower() == 'g' else 2
self._memsize = int(memsize[:-1]) * (1024**power)
self._started = False
self._source = source
self._worker = worker
self._exit = False
self._setup()
self._souce_drained = False
def __iter__(self):
return self
def __next__(self):
return self.next()
def _setup(self):
"""setup input/output queues and workers """
use_process = self._use_process
bufsize = self._bufsize
if use_process:
from .shared_queue import SharedQueue as Queue
from multiprocessing import Process as Worker
from multiprocessing import Event
memsize = self._memsize
self._inq = Queue(bufsize, memsize=memsize)
self._outq = Queue(bufsize, memsize=memsize)
else:
if six.PY3:
from queue import Queue
else:
from Queue import Queue
from threading import Thread as Worker
from threading import Event
self._inq = Queue(bufsize)
self._outq = Queue(bufsize)
consumer_num = self._worker_num
id = str(uuid.uuid4())[-3:]
self._producer = threading.Thread(
target=self._produce,
args=('producer-' + id, self._source, self._inq))
self._producer.daemon = True
self._consumers = []
self._consumer_endsig = {}
global worker_set
for i in range(consumer_num):
consumer_id = 'consumer-' + id + '-' + str(i)
p = Worker(
target=self._consume,
args=(consumer_id, self._inq, self._outq, self._worker))
self._consumers.append(p)
p.daemon = True
setattr(p, 'id', consumer_id)
if use_process:
worker_set.add(p)
self._epoch = -1
self._feeding_ev = Event()
self._produced = 0 # produced sample in self._produce
self._consumed = 0 # consumed sample in self.next
def _produce(self, id, source, inq):
"""Fetch data from source and feed it to 'inq' queue"""
endsig = EndSignal(id)
while True:
self._feeding_ev.wait()
if self._exit:
break
try:
s = source.next()
inq.put(s)
self._produced += 1
except StopIteration:
self._souce_drained = True
self._feeding_ev.clear()
self._feeding_ev.wait()
except Exception as e:
endsig.errno = -1
endsig.errmsg = "producer[{}] failed with error: {}" \
.format(id, str(e))
inq.put(endsig)
break
def _consume(self, id, inq, outq, worker):
"""Fetch data from 'inq', process it and put result to 'outq'"""
if self._use_process:
# handle SIGTERM signal to exit to prevent print stack frame
signal.signal(signal.SIGTERM, lambda signum, frame: sys.exit())
endsig = EndSignal(id)
while True:
sample = inq.get()
if isinstance(sample, EndSignal):
endsig.errno = sample.errno
endsig.errmsg = "consumer[{}] exits for reason[{}]" \
.format(id, sample.errmsg)
outq.put(endsig)
break
try:
result = worker(sample)
outq.put(result)
except Exception as e:
endsig.errno = -2
endsig.errmsg = "consumer[{}] failed to map with error:[{}]" \
.format(id, str(e))
outq.put(endsig)
break
def drained(self):
assert self._epoch >= 0, "first epoch has not started yet"
return self._source.drained() and self._produced == self._consumed
def stop(self):
""" notify to exit
"""
self._exit = True
self._feeding_ev.set()
for _ in range(len(self._consumers)):
self._inq.put(EndSignal(0, "notify consumers to exit"))
def _consumer_healthy(self):
abnormal_num = 0
for w in self._consumers:
if not w.is_alive() and w.id not in self._consumer_endsig:
abnormal_num += 1
if self._use_process:
errmsg = "consumer[{}] exit abnormally with exitcode[{}]" \
.format(w.pid, w.exitcode)
else:
errmsg = "consumer[{}] exit abnormally".format(w.ident)
logger.warn(errmsg)
if abnormal_num > 0:
logger.warn("{} consumers have exited abnormally!!!" \
.format(abnormal_num))
return abnormal_num == 0
def next(self):
""" get next transformed sample
"""
if self._epoch < 0:
self.reset()
if self.drained():
raise StopIteration()
while not self._exit:
try:
sample = self._outq.get(timeout=3)
except Empty as e:
if not self._consumer_healthy():
raise StopIteration()
else:
continue
if isinstance(sample, EndSignal):
self._consumer_endsig[sample.id] = sample
logger.warn("recv endsignal from outq with errmsg[{}]" \
.format(sample.errmsg))
if len(self._consumer_endsig.keys()) < len(self._consumers):
self._inq.put(sample)
else:
self._exit = True
raise StopIteration("all consumers exited, no more samples")
else:
self._consumed += 1
return sample
raise StopIteration()
def reset(self):
""" reset for a new epoch of samples
"""
assert not self._exit, "cannot reset for already stopped dataset"
if self._epoch < 0:
self._epoch = 0
for w in self._consumers:
w.start()
self._producer.start()
else:
assert self._consumer_healthy(), "cannot start another pass of data" \
" for some consumers exited abnormally before!!!"
if not self.drained():
logger.warn("reset before epoch[{}] finishes".format(
self._epoch))
self._produced = self._produced - self._consumed
else:
self._produced = 0
self._epoch += 1
assert len(self._consumer_endsig.keys()) == 0, "some consumers already exited," \
+ " cannot start another epoch"
self._source.reset()
self._souce_drained = False
self._consumed = 0
self._feeding_ev.set()
# FIXME: fix me if you have better impliment
# handle terminate reader process, do not print stack frame
signal.signal(signal.SIGTERM, lambda signum, frame: sys.exit())
# FIXME(dkp): KeyboardInterrupt should be handled inside ParallelMap
# and do such as: 1. exit workers 2. close queues 3. release shared
# memory, HACK KeyboardInterrupt with global signal.SIGINT handler
# here, should be refined later
def _term_workers(sig_num, frame):
global worker_set, main_pid
# only do subporcess killing in main process
if os.getpid() != main_pid:
return
logger.info("KeyboardInterrupt: main proc {} exit, kill subprocess {}" \
.format(os.getpid(), [w.pid for w in worker_set]))
for w in worker_set:
if w.pid is not None:
os.kill(w.pid, signal.SIGINT)
sys.exit()
signal.signal(signal.SIGINT, _term_workers)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import copy
import functools
import collections
import traceback
import numpy as np
import logging
from ppdet.core.workspace import register, serializable
from .parallel_map import ParallelMap
from .transform.batch_operators import Gt2YoloTarget
__all__ = ['Reader', 'create_reader']
logger = logging.getLogger(__name__)
class Compose(object):
def __init__(self, transforms, ctx=None):
self.transforms = transforms
self.ctx = ctx
def __call__(self, data):
ctx = self.ctx if self.ctx else {}
for f in self.transforms:
try:
data = f(data, ctx)
except Exception as e:
stack_info = traceback.format_exc()
logger.warn("fail to map op [{}] with error: {} and stack:\n{}".
format(f, e, str(stack_info)))
raise e
return data
def _calc_img_weights(roidbs):
""" calculate the probabilities of each sample
"""
imgs_cls = []
num_per_cls = {}
img_weights = []
for i, roidb in enumerate(roidbs):
img_cls = set([k for cls in roidbs[i]['gt_class'] for k in cls])
imgs_cls.append(img_cls)
for c in img_cls:
if c not in num_per_cls:
num_per_cls[c] = 1
else:
num_per_cls[c] += 1
for i in range(len(roidbs)):
weights = 0
for c in imgs_cls[i]:
weights += 1 / num_per_cls[c]
img_weights.append(weights)
# probabilities sum to 1
img_weights = img_weights / np.sum(img_weights)
return img_weights
def _has_empty(item):
def empty(x):
if isinstance(x, np.ndarray) and x.size == 0:
return True
elif isinstance(x, collections.Sequence) and len(x) == 0:
return True
else:
return False
if isinstance(item, collections.Sequence) and len(item) == 0:
return True
if item is None:
return True
if empty(item):
return True
return False
def _segm(samples):
assert 'gt_poly' in samples
segms = samples['gt_poly']
if 'is_crowd' in samples:
is_crowd = samples['is_crowd']
if len(segms) != 0:
assert len(segms) == is_crowd.shape[0]
gt_masks = []
valid = True
for i in range(len(segms)):
segm = segms[i]
gt_segm = []
if 'is_crowd' in samples and is_crowd[i]:
gt_segm.append([[0, 0]])
else:
for poly in segm:
if len(poly) == 0:
valid = False
break
gt_segm.append(np.array(poly).reshape(-1, 2))
if (not valid) or len(gt_segm) == 0:
break
gt_masks.append(gt_segm)
return gt_masks
def batch_arrange(batch_samples, fields):
def im_shape(samples, dim=3):
# hard code
assert 'h' in samples
assert 'w' in samples
if dim == 3: # RCNN, ..
return np.array((samples['h'], samples['w'], 1), dtype=np.float32)
else: # YOLOv3, ..
return np.array((samples['h'], samples['w']), dtype=np.int32)
arrange_batch = []
for samples in batch_samples:
one_ins = ()
for i, field in enumerate(fields):
if field == 'gt_mask':
one_ins += (_segm(samples), )
elif field == 'im_shape':
one_ins += (im_shape(samples), )
elif field == 'im_size':
one_ins += (im_shape(samples, 2), )
else:
if field == 'is_difficult':
field = 'difficult'
assert field in samples, '{} not in samples'.format(field)
one_ins += (samples[field], )
arrange_batch.append(one_ins)
return arrange_batch
@register
@serializable
class Reader(object):
"""
Args:
dataset (DataSet): DataSet object
sample_transforms (list of BaseOperator): a list of sample transforms
operators.
batch_transforms (list of BaseOperator): a list of batch transforms
operators.
batch_size (int): batch size.
shuffle (bool): whether shuffle dataset or not. Default False.
drop_last (bool): whether drop last batch or not. Default False.
drop_empty (bool): whether drop sample when it's gt is empty or not.
Default True.
mixup_epoch (int): mixup epoc number. Default is -1, meaning
not use mixup.
cutmix_epoch (int): cutmix epoc number. Default is -1, meaning
not use cutmix.
class_aware_sampling (bool): whether use class-aware sampling or not.
Default False.
worker_num (int): number of working threads/processes.
Default -1, meaning not use multi-threads/multi-processes.
use_process (bool): whether use multi-processes or not.
It only works when worker_num > 1. Default False.
bufsize (int): buffer size for multi-threads/multi-processes,
please note, one instance in buffer is one batch data.
memsize (str): size of shared memory used in result queue when
use_process is true. Default 3G.
inputs_def (dict): network input definition use to get input fields,
which is used to determine the order of returned data.
devices_num (int): number of devices.
"""
def __init__(self,
dataset=None,
sample_transforms=None,
batch_transforms=None,
batch_size=None,
shuffle=False,
drop_last=False,
drop_empty=True,
mixup_epoch=-1,
cutmix_epoch=-1,
class_aware_sampling=False,
worker_num=-1,
use_process=False,
use_fine_grained_loss=False,
num_classes=80,
bufsize=-1,
memsize='3G',
inputs_def=None,
devices_num=1):
self._dataset = dataset
self._roidbs = self._dataset.get_roidb()
self._fields = copy.deepcopy(inputs_def[
'fields']) if inputs_def else None
# transform
self._sample_transforms = Compose(sample_transforms,
{'fields': self._fields})
self._batch_transforms = None
if use_fine_grained_loss:
for bt in batch_transforms:
if isinstance(bt, Gt2YoloTarget):
bt.num_classes = num_classes
elif batch_transforms:
batch_transforms = [
bt for bt in batch_transforms
if not isinstance(bt, Gt2YoloTarget)
]
if batch_transforms:
self._batch_transforms = Compose(batch_transforms,
{'fields': self._fields})
# data
if inputs_def and inputs_def.get('multi_scale', False):
from ppdet.modeling.architectures.input_helper import multiscale_def
im_shape = inputs_def[
'image_shape'] if 'image_shape' in inputs_def else [
3, None, None
]
_, ms_fields = multiscale_def(im_shape, inputs_def['num_scales'],
inputs_def['use_flip'])
self._fields += ms_fields
self._batch_size = batch_size
self._shuffle = shuffle
self._drop_last = drop_last
self._drop_empty = drop_empty
# sampling
self._mixup_epoch = mixup_epoch
self._cutmix_epoch = cutmix_epoch
self._class_aware_sampling = class_aware_sampling
self._load_img = False
self._sample_num = len(self._roidbs)
if self._class_aware_sampling:
self.img_weights = _calc_img_weights(self._roidbs)
self._indexes = None
self._pos = -1
self._epoch = -1
self._curr_iter = 0
# multi-process
self._worker_num = worker_num
self._parallel = None
if self._worker_num > -1:
task = functools.partial(self.worker, self._drop_empty)
bufsize = devices_num * 2 if bufsize == -1 else bufsize
self._parallel = ParallelMap(self, task, worker_num, bufsize,
use_process, memsize)
def __call__(self):
if self._worker_num > -1:
return self._parallel
else:
return self
def __iter__(self):
return self
def reset(self):
"""implementation of Dataset.reset
"""
if self._epoch < 0:
self._epoch = 0
else:
self._epoch += 1
self.indexes = [i for i in range(self.size())]
if self._class_aware_sampling:
self.indexes = np.random.choice(
self._sample_num,
self._sample_num,
replace=True,
p=self.img_weights)
if self._shuffle:
trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0))
np.random.seed(self._epoch + trainer_id)
np.random.shuffle(self.indexes)
if self._mixup_epoch > 0 and len(self.indexes) < 2:
logger.debug("Disable mixup for dataset samples "
"less than 2 samples")
self._mixup_epoch = -1
if self._cutmix_epoch > 0 and len(self.indexes) < 2:
logger.info("Disable cutmix for dataset samples "
"less than 2 samples")
self._cutmix_epoch = -1
self._pos = 0
def __next__(self):
return self.next()
def next(self):
if self._epoch < 0:
self.reset()
if self.drained():
raise StopIteration
batch = self._load_batch()
self._curr_iter += 1
if self._drop_last and len(batch) < self._batch_size:
raise StopIteration
if self._worker_num > -1:
return batch
else:
return self.worker(self._drop_empty, batch)
def _load_batch(self):
batch = []
bs = 0
while bs != self._batch_size:
if self._pos >= self.size():
break
pos = self.indexes[self._pos]
sample = copy.deepcopy(self._roidbs[pos])
sample["curr_iter"] = self._curr_iter
self._pos += 1
if self._drop_empty and self._fields and 'gt_mask' in self._fields:
if _has_empty(_segm(sample)):
#logger.warn('gt_mask is empty or not valid in {}'.format(
# sample['im_file']))
continue
if self._drop_empty and self._fields and 'gt_bbox' in self._fields:
if _has_empty(sample['gt_bbox']):
#logger.warn('gt_bbox {} is empty or not valid in {}, '
# 'drop this sample'.format(
# sample['im_file'], sample['gt_bbox']))
continue
if self._load_img:
sample['image'] = self._load_image(sample['im_file'])
if self._epoch < self._mixup_epoch:
num = len(self.indexes)
mix_idx = np.random.randint(1, num)
mix_idx = self.indexes[(mix_idx + self._pos - 1) % num]
sample['mixup'] = copy.deepcopy(self._roidbs[mix_idx])
sample['mixup']["curr_iter"] = self._curr_iter
if self._load_img:
sample['mixup']['image'] = self._load_image(sample['mixup'][
'im_file'])
if self._epoch < self._cutmix_epoch:
num = len(self.indexes)
mix_idx = np.random.randint(1, num)
sample['cutmix'] = copy.deepcopy(self._roidbs[mix_idx])
sample['cutmix']["curr_iter"] = self._curr_iter
if self._load_img:
sample['cutmix']['image'] = self._load_image(sample[
'cutmix']['im_file'])
batch.append(sample)
bs += 1
return batch
def worker(self, drop_empty=True, batch_samples=None):
"""
sample transform and batch transform.
"""
batch = []
for sample in batch_samples:
sample = self._sample_transforms(sample)
if drop_empty and 'gt_bbox' in sample:
if _has_empty(sample['gt_bbox']):
#logger.warn('gt_bbox {} is empty or not valid in {}, '
# 'drop this sample'.format(
# sample['im_file'], sample['gt_bbox']))
continue
batch.append(sample)
if len(batch) > 0 and self._batch_transforms:
batch = self._batch_transforms(batch)
if len(batch) > 0 and self._fields:
batch = batch_arrange(batch, self._fields)
return batch
def _load_image(self, filename):
with open(filename, 'rb') as f:
return f.read()
def size(self):
""" implementation of Dataset.size
"""
return self._sample_num
def drained(self):
""" implementation of Dataset.drained
"""
assert self._epoch >= 0, 'The first epoch has not begin!'
return self._pos >= self.size()
def stop(self):
if self._parallel:
self._parallel.stop()
def create_reader(cfg, max_iter=0, global_cfg=None, devices_num=1):
"""
Return iterable data reader.
Args:
max_iter (int): number of iterations.
"""
if not isinstance(cfg, dict):
raise TypeError("The config should be a dict when creating reader.")
# synchornize use_fine_grained_loss/num_classes from global_cfg to reader cfg
if global_cfg:
cfg['use_fine_grained_loss'] = getattr(global_cfg,
'use_fine_grained_loss', False)
cfg['num_classes'] = getattr(global_cfg, 'num_classes', 80)
cfg['devices_num'] = devices_num
reader = Reader(**cfg)()
def _reader():
n = 0
while True:
for _batch in reader:
if len(_batch) > 0:
yield _batch
n += 1
if max_iter > 0 and n == max_iter:
return
reader.reset()
if max_iter <= 0:
return
return _reader
import os
import sys
import six
import time
import math
import socket
import contextlib
import numpy as np
from paddle import fluid
from paddle.io import BatchSampler
from paddle.fluid.layers import collective
from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy
_parallel_context_initialized = False
class DistributedBatchSampler(BatchSampler):
def __init__(self, dataset, batch_size, shuffle=False, drop_last=False):
self.dataset = dataset
assert isinstance(batch_size, int) and batch_size > 0, \
"batch_size should be a positive integer"
self.batch_size = batch_size
assert isinstance(shuffle, bool), \
"shuffle should be a boolean value"
self.shuffle = shuffle
assert isinstance(drop_last, bool), \
"drop_last should be a boolean number"
self.drop_last = drop_last
self.nranks = ParallelEnv().nranks
self.local_rank = ParallelEnv().local_rank
self.epoch = 0
self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.nranks))
self.total_size = self.num_samples * self.nranks
def __iter__(self):
num_samples = len(self.dataset)
indices = np.arange(num_samples).tolist()
indices += indices[:(self.total_size - len(indices))]
assert len(indices) == self.total_size
if self.shuffle:
np.random.RandomState(self.epoch).shuffle(indices)
self.epoch += 1
# subsample
def _get_indices_by_batch_size(indices):
subsampled_indices = []
last_batch_size = self.total_size % (self.batch_size * self.nranks)
assert last_batch_size % self.nranks == 0
last_local_batch_size = last_batch_size // self.nranks
for i in range(self.local_rank * self.batch_size,
len(indices) - last_batch_size,
self.batch_size * self.nranks):
subsampled_indices.extend(indices[i:i + self.batch_size])
indices = indices[len(indices) - last_batch_size:]
subsampled_indices.extend(indices[
self.local_rank * last_local_batch_size:(
self.local_rank + 1) * last_local_batch_size])
return subsampled_indices
if self.nranks > 1:
indices = _get_indices_by_batch_size(indices)
assert len(indices) == self.num_samples
_sample_iter = iter(indices)
batch_indices = []
for idx in _sample_iter:
batch_indices.append(idx)
if len(batch_indices) == self.batch_size:
yield batch_indices
batch_indices = []
if not self.drop_last and len(batch_indices) > 0:
yield batch_indices
def __len__(self):
num_samples = self.num_samples
num_samples += int(not self.drop_last) * (self.batch_size - 1)
return num_samples // self.batch_size
def set_epoch(self, epoch):
self.epoch = epoch
def _all_gather(x, nranks, ring_id=0, use_calc_stream=True):
return collective._c_allgather(
x, nranks, ring_id=ring_id, use_calc_stream=use_calc_stream)
def wait_server_ready(endpoints):
assert not isinstance(endpoints, six.string_types)
while True:
all_ok = True
not_ready_endpoints = []
for ep in endpoints:
ip_port = ep.split(":")
with contextlib.closing(
socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
result = sock.connect_ex((ip_port[0], int(ip_port[1])))
if result != 0:
all_ok = False
not_ready_endpoints.append(ep)
if not all_ok:
time.sleep(3)
else:
break
def init_communicator(program, rank, nranks, wait_port, current_endpoint,
endpoints):
if nranks < 2:
return
other_endpoints = endpoints[:]
other_endpoints.remove(current_endpoint)
if rank == 0 and wait_port:
wait_server_ready(other_endpoints)
block = program.global_block()
nccl_id_var = block.create_var(
name=fluid.unique_name.generate('nccl_id'),
persistable=True,
type=fluid.core.VarDesc.VarType.RAW)
block.append_op(
type='c_gen_nccl_id',
inputs={},
outputs={'Out': nccl_id_var},
attrs={
'rank': rank,
'endpoint': current_endpoint,
'other_endpoints': other_endpoints
})
block.append_op(
type='c_comm_init',
inputs={'X': nccl_id_var},
outputs={},
attrs={
'nranks': nranks,
'rank': rank,
'ring_id': 0,
})
def prepare_distributed_context(place=None):
if place is None:
place = fluid.CUDAPlace(ParallelEnv().dev_id) if ParallelEnv().nranks > 1 \
else fluid.CUDAPlace(0)
strategy = ParallelStrategy()
strategy.nranks = ParallelEnv().nranks
strategy.local_rank = ParallelEnv().local_rank
strategy.trainer_endpoints = ParallelEnv().trainer_endpoints
strategy.current_endpoint = ParallelEnv().current_endpoint
if strategy.nranks < 2:
return
global _parallel_context_initialized
if not _parallel_context_initialized and isinstance(place, fluid.CUDAPlace):
def _init_context():
communicator_prog = fluid.Program()
init_communicator(communicator_prog, strategy.local_rank,
strategy.nranks, True, strategy.current_endpoint,
strategy.trainer_endpoints)
exe = fluid.Executor(place)
exe.run(communicator_prog)
fluid.disable_dygraph()
_init_context()
fluid.enable_dygraph(place)
else:
assert ("Only support CUDAPlace for now.")
_parallel_context_initialized = True
return strategy
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
__all__ = ['SharedBuffer', 'SharedMemoryMgr', 'SharedQueue']
from .sharedmemory import SharedBuffer
from .sharedmemory import SharedMemoryMgr
from .sharedmemory import SharedMemoryError
from .queue import SharedQueue
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import sys
import six
if six.PY3:
import pickle
from io import BytesIO as StringIO
from queue import Empty
else:
import cPickle as pickle
from cStringIO import StringIO
from Queue import Empty
import logging
import traceback
import multiprocessing as mp
from multiprocessing.queues import Queue
from .sharedmemory import SharedMemoryMgr
logger = logging.getLogger(__name__)
class SharedQueueError(ValueError):
""" SharedQueueError
"""
pass
class SharedQueue(Queue):
""" a Queue based on shared memory to communicate data between Process,
and it's interface is compatible with 'multiprocessing.queues.Queue'
"""
def __init__(self, maxsize=0, mem_mgr=None, memsize=None, pagesize=None):
""" init
"""
if six.PY3:
super(SharedQueue, self).__init__(maxsize, ctx=mp.get_context())
else:
super(SharedQueue, self).__init__(maxsize)
if mem_mgr is not None:
self._shared_mem = mem_mgr
else:
self._shared_mem = SharedMemoryMgr(
capacity=memsize, pagesize=pagesize)
def put(self, obj, **kwargs):
""" put an object to this queue
"""
obj = pickle.dumps(obj, -1)
buff = None
try:
buff = self._shared_mem.malloc(len(obj))
buff.put(obj)
super(SharedQueue, self).put(buff, **kwargs)
except Exception as e:
stack_info = traceback.format_exc()
err_msg = 'failed to put a element to SharedQueue '\
'with stack info[%s]' % (stack_info)
logger.warn(err_msg)
if buff is not None:
buff.free()
raise e
def get(self, **kwargs):
""" get an object from this queue
"""
buff = None
try:
buff = super(SharedQueue, self).get(**kwargs)
data = buff.get()
return pickle.load(StringIO(data))
except Empty as e:
raise e
except Exception as e:
stack_info = traceback.format_exc()
err_msg = 'failed to get element from SharedQueue '\
'with stack info[%s]' % (stack_info)
logger.warn(err_msg)
raise e
finally:
if buff is not None:
buff.free()
def release(self):
self._shared_mem.release()
self._shared_mem = None
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# utils for memory management which is allocated on sharedmemory,
# note that these structures may not be thread-safe
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import os
import time
import math
import struct
import sys
import six
if six.PY3:
import pickle
else:
import cPickle as pickle
import json
import uuid
import random
import numpy as np
import weakref
import logging
from multiprocessing import Lock
from multiprocessing import RawArray
logger = logging.getLogger(__name__)
class SharedMemoryError(ValueError):
""" SharedMemoryError
"""
pass
class SharedBufferError(SharedMemoryError):
""" SharedBufferError
"""
pass
class MemoryFullError(SharedMemoryError):
""" MemoryFullError
"""
def __init__(self, errmsg=''):
super(MemoryFullError, self).__init__()
self.errmsg = errmsg
def memcopy(dst, src, offset=0, length=None):
""" copy data from 'src' to 'dst' in bytes
"""
length = length if length is not None else len(src)
assert type(dst) == np.ndarray, 'invalid type for "dst" in memcopy'
if type(src) is not np.ndarray:
if type(src) is str and six.PY3:
src = src.encode()
src = np.frombuffer(src, dtype='uint8', count=len(src))
dst[:] = src[offset:offset + length]
class SharedBuffer(object):
""" Buffer allocated from SharedMemoryMgr, and it stores data on shared memory
note that:
every instance of this should be freed explicitely by calling 'self.free'
"""
def __init__(self, owner, capacity, pos, size=0, alloc_status=''):
""" Init
Args:
owner (str): manager to own this buffer
capacity (int): capacity in bytes for this buffer
pos (int): page position in shared memory
size (int): bytes already used
alloc_status (str): debug info about allocator when allocate this
"""
self._owner = owner
self._cap = capacity
self._pos = pos
self._size = size
self._alloc_status = alloc_status
assert self._pos >= 0 and self._cap > 0, \
"invalid params[%d:%d] to construct SharedBuffer" \
% (self._pos, self._cap)
def owner(self):
""" get owner
"""
return SharedMemoryMgr.get_mgr(self._owner)
def put(self, data, override=False):
""" put data to this buffer
Args:
data (str): data to be stored in this buffer
Returns:
None
Raises:
SharedMemoryError when not enough space in this buffer
"""
assert type(data) in [str, bytes], \
'invalid type[%s] for SharedBuffer::put' % (str(type(data)))
if self._size > 0 and not override:
raise SharedBufferError('already has already been setted before')
if self.capacity() < len(data):
raise SharedBufferError('data[%d] is larger than size of buffer[%s]'\
% (len(data), str(self)))
self.owner().put_data(self, data)
self._size = len(data)
def get(self, offset=0, size=None, no_copy=True):
""" get the data stored this buffer
Args:
offset (int): position for the start point to 'get'
size (int): size to get
Returns:
data (np.ndarray('uint8')): user's data in numpy
which is passed in by 'put'
None: if no data stored in
"""
offset = offset if offset >= 0 else self._size + offset
if self._size <= 0:
return None
size = self._size if size is None else size
assert offset + size <= self._cap, 'invalid offset[%d] '\
'or size[%d] for capacity[%d]' % (offset, size, self._cap)
return self.owner().get_data(self, offset, size, no_copy=no_copy)
def size(self):
""" bytes of used memory
"""
return self._size
def resize(self, size):
""" resize the used memory to 'size', should not be greater than capacity
"""
assert size >= 0 and size <= self._cap, \
"invalid size[%d] for resize" % (size)
self._size = size
def capacity(self):
""" size of allocated memory
"""
return self._cap
def __str__(self):
""" human readable format
"""
return "SharedBuffer(owner:%s, pos:%d, size:%d, "\
"capacity:%d, alloc_status:[%s], pid:%d)" \
% (str(self._owner), self._pos, self._size, \
self._cap, self._alloc_status, os.getpid())
def free(self):
""" free this buffer to it's owner
"""
if self._owner is not None:
self.owner().free(self)
self._owner = None
self._cap = 0
self._pos = -1
self._size = 0
return True
else:
return False
class PageAllocator(object):
""" allocator used to malloc and free shared memory which
is split into pages
"""
s_allocator_header = 12
def __init__(self, base, total_pages, page_size):
""" init
"""
self._magic_num = 1234321000 + random.randint(100, 999)
self._base = base
self._total_pages = total_pages
self._page_size = page_size
header_pages = int(
math.ceil((total_pages + self.s_allocator_header) / page_size))
self._header_pages = header_pages
self._free_pages = total_pages - header_pages
self._header_size = self._header_pages * page_size
self._reset()
def _dump_alloc_info(self, fname):
hpages, tpages, pos, used = self.header()
start = self.s_allocator_header
end = start + self._page_size * hpages
alloc_flags = self._base[start:end].tostring()
info = {
'magic_num': self._magic_num,
'header_pages': hpages,
'total_pages': tpages,
'pos': pos,
'used': used
}
info['alloc_flags'] = alloc_flags
fname = fname + '.' + str(uuid.uuid4())[:6]
with open(fname, 'wb') as f:
f.write(pickle.dumps(info, -1))
logger.warn('dump alloc info to file[%s]' % (fname))
def _reset(self):
alloc_page_pos = self._header_pages
used_pages = self._header_pages
header_info = struct.pack(
str('III'), self._magic_num, alloc_page_pos, used_pages)
assert len(header_info) == self.s_allocator_header, \
'invalid size of header_info'
memcopy(self._base[0:self.s_allocator_header], header_info)
self.set_page_status(0, self._header_pages, '1')
self.set_page_status(self._header_pages, self._free_pages, '0')
def header(self):
""" get header info of this allocator
"""
header_str = self._base[0:self.s_allocator_header].tostring()
magic, pos, used = struct.unpack(str('III'), header_str)
assert magic == self._magic_num, \
'invalid header magic[%d] in shared memory' % (magic)
return self._header_pages, self._total_pages, pos, used
def empty(self):
""" are all allocatable pages available
"""
header_pages, pages, pos, used = self.header()
return header_pages == used
def full(self):
""" are all allocatable pages used
"""
header_pages, pages, pos, used = self.header()
return header_pages + used == pages
def __str__(self):
header_pages, pages, pos, used = self.header()
desc = '{page_info[magic:%d,total:%d,used:%d,header:%d,alloc_pos:%d,pagesize:%d]}' \
% (self._magic_num, pages, used, header_pages, pos, self._page_size)
return 'PageAllocator:%s' % (desc)
def set_alloc_info(self, alloc_pos, used_pages):
""" set allocating position to new value
"""
memcopy(self._base[4:12], struct.pack(str('II'), alloc_pos, used_pages))
def set_page_status(self, start, page_num, status):
""" set pages from 'start' to 'end' with new same status 'status'
"""
assert status in ['0', '1'], 'invalid status[%s] for page status '\
'in allocator[%s]' % (status, str(self))
start += self.s_allocator_header
end = start + page_num
assert start >= 0 and end <= self._header_size, 'invalid end[%d] of pages '\
'in allocator[%s]' % (end, str(self))
memcopy(self._base[start:end], str(status * page_num))
def get_page_status(self, start, page_num, ret_flag=False):
start += self.s_allocator_header
end = start + page_num
assert start >= 0 and end <= self._header_size, 'invalid end[%d] of pages '\
'in allocator[%s]' % (end, str(self))
status = self._base[start:end].tostring().decode()
if ret_flag:
return status
zero_num = status.count('0')
if zero_num == 0:
return (page_num, 1)
else:
return (zero_num, 0)
def malloc_page(self, page_num):
header_pages, pages, pos, used = self.header()
end = pos + page_num
if end > pages:
pos = self._header_pages
end = pos + page_num
start_pos = pos
flags = ''
while True:
flags = self.get_page_status(pos, page_num, ret_flag=True)
if flags.count('0') == page_num:
break
# not found enough pages, so shift to next few pages
free_pos = flags.rfind('1') + 1
pos += free_pos
end = pos + page_num
if end > pages:
pos = self._header_pages
end = pos + page_num
flags = ''
# not found available pages after scan all pages
if pos <= start_pos and end >= start_pos:
logger.debug('not found available pages after scan all pages')
break
page_status = (flags.count('0'), 0)
if page_status != (page_num, 0):
free_pages = self._total_pages - used
if free_pages == 0:
err_msg = 'all pages have been used:%s' % (str(self))
else:
err_msg = 'not found enough pages[avail:%d, expect:%d] '\
'with total free pages[%d]' % (page_status[0], page_num, free_pages)
err_msg = 'failed to malloc %d pages at pos[%d] for reason[%s] '\
'and allocator status[%s]' % (page_num, pos, err_msg, str(self))
raise MemoryFullError(err_msg)
self.set_page_status(pos, page_num, '1')
used += page_num
self.set_alloc_info(end, used)
return pos
def free_page(self, start, page_num):
""" free 'page_num' pages start from 'start'
"""
page_status = self.get_page_status(start, page_num)
assert page_status == (page_num, 1), \
'invalid status[%s] when free [%d, %d]' \
% (str(page_status), start, page_num)
self.set_page_status(start, page_num, '0')
_, _, pos, used = self.header()
used -= page_num
self.set_alloc_info(pos, used)
DEFAULT_SHARED_MEMORY_SIZE = 1024 * 1024 * 1024
class SharedMemoryMgr(object):
""" manage a continouse block of memory, provide
'malloc' to allocate new buffer, and 'free' to free buffer
"""
s_memory_mgrs = weakref.WeakValueDictionary()
s_mgr_num = 0
s_log_statis = False
@classmethod
def get_mgr(cls, id):
""" get a SharedMemoryMgr with size of 'capacity'
"""
assert id in cls.s_memory_mgrs, 'invalid id[%s] for memory managers' % (
id)
return cls.s_memory_mgrs[id]
def __init__(self, capacity=None, pagesize=None):
""" init
"""
logger.debug('create SharedMemoryMgr')
pagesize = 64 * 1024 if pagesize is None else pagesize
assert type(pagesize) is int, "invalid type of pagesize[%s]" \
% (str(pagesize))
capacity = DEFAULT_SHARED_MEMORY_SIZE if capacity is None else capacity
assert type(capacity) is int, "invalid type of capacity[%s]" \
% (str(capacity))
assert capacity > 0, '"size of shared memory should be greater than 0'
self._released = False
self._cap = capacity
self._page_size = pagesize
assert self._cap % self._page_size == 0, \
"capacity[%d] and pagesize[%d] are not consistent" \
% (self._cap, self._page_size)
self._total_pages = self._cap // self._page_size
self._pid = os.getpid()
SharedMemoryMgr.s_mgr_num += 1
self._id = self._pid * 100 + SharedMemoryMgr.s_mgr_num
SharedMemoryMgr.s_memory_mgrs[self._id] = self
self._locker = Lock()
self._setup()
def _setup(self):
self._shared_mem = RawArray('c', self._cap)
self._base = np.frombuffer(
self._shared_mem, dtype='uint8', count=self._cap)
self._locker.acquire()
try:
self._allocator = PageAllocator(self._base, self._total_pages,
self._page_size)
finally:
self._locker.release()
def malloc(self, size, wait=True):
""" malloc a new SharedBuffer
Args:
size (int): buffer size to be malloc
wait (bool): whether to wait when no enough memory
Returns:
SharedBuffer
Raises:
SharedMemoryError when not found available memory
"""
page_num = int(math.ceil(size / self._page_size))
size = page_num * self._page_size
start = None
ct = 0
errmsg = ''
while True:
self._locker.acquire()
try:
start = self._allocator.malloc_page(page_num)
alloc_status = str(self._allocator)
except MemoryFullError as e:
start = None
errmsg = e.errmsg
if not wait:
raise e
finally:
self._locker.release()
if start is None:
time.sleep(0.1)
if ct % 100 == 0:
logger.warn('not enough space for reason[%s]' % (errmsg))
ct += 1
else:
break
return SharedBuffer(self._id, size, start, alloc_status=alloc_status)
def free(self, shared_buf):
""" free a SharedBuffer
Args:
shared_buf (SharedBuffer): buffer to be freed
Returns:
None
Raises:
SharedMemoryError when failed to release this buffer
"""
assert shared_buf._owner == self._id, "invalid shared_buf[%s] "\
"for it's not allocated from me[%s]" % (str(shared_buf), str(self))
cap = shared_buf.capacity()
start_page = shared_buf._pos
page_num = cap // self._page_size
#maybe we don't need this lock here
self._locker.acquire()
try:
self._allocator.free_page(start_page, page_num)
finally:
self._locker.release()
def put_data(self, shared_buf, data):
""" fill 'data' into 'shared_buf'
"""
assert len(data) <= shared_buf.capacity(), 'too large data[%d] '\
'for this buffer[%s]' % (len(data), str(shared_buf))
start = shared_buf._pos * self._page_size
end = start + len(data)
assert start >= 0 and end <= self._cap, "invalid start "\
"position[%d] when put data to buff:%s" % (start, str(shared_buf))
self._base[start:end] = np.frombuffer(data, 'uint8', len(data))
def get_data(self, shared_buf, offset, size, no_copy=True):
""" extract 'data' from 'shared_buf' in range [offset, offset + size)
"""
start = shared_buf._pos * self._page_size
start += offset
if no_copy:
return self._base[start:start + size]
else:
return self._base[start:start + size].tostring()
def __str__(self):
return 'SharedMemoryMgr:{id:%d, %s}' % (self._id, str(self._allocator))
def __del__(self):
if SharedMemoryMgr.s_log_statis:
logger.info('destroy [%s]' % (self))
if not self._released and not self._allocator.empty():
logger.debug('not empty when delete this SharedMemoryMgr[%s]' %
(self))
else:
self._released = True
if self._id in SharedMemoryMgr.s_memory_mgrs:
del SharedMemoryMgr.s_memory_mgrs[self._id]
SharedMemoryMgr.s_mgr_num -= 1
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. from . import dataset
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import coco from . import coco
from . import voc #from . import voc
from . import widerface #from . import widerface
from .dataset import *
from .coco import * from .coco import *
from .voc import * #from .voc import *
from .widerface import * #from .widerface import *
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os import os
import numpy as np import numpy as np
import logging
from .dataset import DataSet
from ppdet.core.workspace import register, serializable from ppdet.core.workspace import register, serializable
from .dataset import DetDataset
import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@register @register
@serializable @serializable
class COCODataSet(DataSet): class COCODataset(DetDataset):
"""
Load COCO records with annotations in json file 'anno_path'
Args:
dataset_dir (str): root directory for dataset.
image_dir (str): directory for images.
anno_path (str): json file path.
sample_num (int): number of samples to load, -1 means all.
with_background (bool): whether load background as a class.
if True, total class number will be 81. default True.
"""
def __init__(self, def __init__(self,
dataset_dir=None,
image_dir=None, image_dir=None,
anno_path=None, anno_path=None,
dataset_dir=None, with_background=True,
sample_num=-1, sample_num=-1):
with_background=True): super(COCODataset, self).__init__(dataset_dir, image_dir, anno_path,
super(COCODataSet, self).__init__( with_background, sample_num)
image_dir=image_dir,
anno_path=anno_path,
dataset_dir=dataset_dir,
sample_num=sample_num,
with_background=with_background)
self.anno_path = anno_path
self.sample_num = sample_num
self.with_background = with_background
# `roidbs` is list of dict whose structure is:
# {
# 'im_file': im_fname, # image file name
# 'im_id': img_id, # image id
# 'h': im_h, # height of image
# 'w': im_w, # width
# 'is_crowd': is_crowd,
# 'gt_score': gt_score,
# 'gt_class': gt_class,
# 'gt_bbox': gt_bbox,
# 'gt_poly': gt_poly,
# }
self.roidbs = None
# a dict used to map category name to class id
self.cname2cid = None
self.load_image_only = False self.load_image_only = False
self.load_semantic = False
#self.parse_dataset()
def load_roidb_and_cname2cid(self): def parse_dataset(self):
anno_path = os.path.join(self.dataset_dir, self.anno_path) anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir) image_dir = os.path.join(self.dataset_dir, self.image_dir)
...@@ -104,11 +57,11 @@ class COCODataSet(DataSet): ...@@ -104,11 +57,11 @@ class COCODataSet(DataSet):
im_w = float(img_anno['width']) im_w = float(img_anno['width'])
im_h = float(img_anno['height']) im_h = float(img_anno['height'])
im_fname = os.path.join(image_dir, im_path = os.path.join(image_dir,
im_fname) if image_dir else im_fname im_fname) if image_dir else im_fname
if not os.path.exists(im_fname): if not os.path.exists(im_path):
logger.warn('Illegal image file: {}, and it will be ' logger.warn('Illegal image file: {}, and it will be '
'ignored'.format(im_fname)) 'ignored'.format(im_path))
continue continue
if im_w < 0 or im_h < 0: if im_w < 0 or im_h < 0:
...@@ -118,7 +71,7 @@ class COCODataSet(DataSet): ...@@ -118,7 +71,7 @@ class COCODataSet(DataSet):
continue continue
coco_rec = { coco_rec = {
'im_file': im_fname, 'im_file': im_path,
'im_id': np.array([img_id]), 'im_id': np.array([img_id]),
'h': im_h, 'h': im_h,
'w': im_w, 'w': im_w,
...@@ -127,6 +80,7 @@ class COCODataSet(DataSet): ...@@ -127,6 +80,7 @@ class COCODataSet(DataSet):
if not self.load_image_only: if not self.load_image_only:
ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False) ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
instances = coco.loadAnns(ins_anno_ids) instances = coco.loadAnns(ins_anno_ids)
bboxes = [] bboxes = []
for inst in instances: for inst in instances:
x, y, box_w, box_h = inst['bbox'] x, y, box_w, box_h = inst['bbox']
...@@ -134,7 +88,6 @@ class COCODataSet(DataSet): ...@@ -134,7 +88,6 @@ class COCODataSet(DataSet):
y1 = max(0, y) y1 = max(0, y)
x2 = min(im_w - 1, x1 + max(0, box_w - 1)) x2 = min(im_w - 1, x1 + max(0, box_w - 1))
y2 = min(im_h - 1, y1 + max(0, box_h - 1)) y2 = min(im_h - 1, y1 + max(0, box_h - 1))
if inst['area'] > 0 and x2 >= x1 and y2 >= y1: if inst['area'] > 0 and x2 >= x1 and y2 >= y1:
inst['clean_bbox'] = [x1, y1, x2, y2] inst['clean_bbox'] = [x1, y1, x2, y2]
bboxes.append(inst) bboxes.append(inst)
...@@ -143,7 +96,6 @@ class COCODataSet(DataSet): ...@@ -143,7 +96,6 @@ class COCODataSet(DataSet):
'Found an invalid bbox in annotations: im_id: {}, ' 'Found an invalid bbox in annotations: im_id: {}, '
'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format( 'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
img_id, float(inst['area']), x1, y1, x2, y2)) img_id, float(inst['area']), x1, y1, x2, y2))
num_bbox = len(bboxes) num_bbox = len(bboxes)
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
...@@ -168,9 +120,14 @@ class COCODataSet(DataSet): ...@@ -168,9 +120,14 @@ class COCODataSet(DataSet):
'gt_score': gt_score, 'gt_score': gt_score,
'gt_poly': gt_poly, 'gt_poly': gt_poly,
}) })
# TODO: remove load_semantic
if self.load_semantic:
seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps',
'train2017', im_fname[:-3] + 'png')
coco_rec.update({'semantic': seg_path})
logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format( logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
im_fname, img_id, im_h, im_w)) im_path, img_id, im_h, im_w))
records.append(coco_rec) records.append(coco_rec)
ct += 1 ct += 1
if self.sample_num > 0 and ct >= self.sample_num: if self.sample_num > 0 and ct >= self.sample_num:
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os import os
import numpy as np import numpy as np
from collections import OrderedDict
try: try:
from collections.abc import Sequence from collections.abc import Sequence
except Exception: except Exception:
from collections import Sequence from collections import Sequence
from paddle.io import Dataset
from ppdet.core.workspace import register, serializable from ppdet.core.workspace import register, serializable
from ppdet.utils.download import get_dataset_path from ppdet.utils.download import get_dataset_path
@serializable @serializable
class DataSet(object): class DetDataset(Dataset):
"""
Dataset, e.g., coco, pascal voc
Args:
annotation (str): annotation file path
image_dir (str): directory where image files are stored
shuffle (bool): shuffle samples
"""
def __init__(self, def __init__(self,
dataset_dir=None, dataset_dir=None,
image_dir=None, image_dir=None,
anno_path=None, anno_path=None,
sample_num=-1,
with_background=True, with_background=True,
use_default_label=None, sample_num=-1,
**kwargs): **kwargs):
super(DataSet, self).__init__() super(DetDataset, self).__init__()
self.dataset_dir = dataset_dir if dataset_dir is not None else ''
self.anno_path = anno_path self.anno_path = anno_path
self.image_dir = image_dir if image_dir is not None else '' self.image_dir = image_dir if image_dir is not None else ''
self.dataset_dir = dataset_dir if dataset_dir is not None else ''
self.sample_num = sample_num
self.with_background = with_background self.with_background = with_background
self.use_default_label = use_default_label self.sample_num = sample_num
self.cname2cid = None
self._imid2path = None
def load_roidb_and_cname2cid(self):
"""load dataset"""
raise NotImplementedError('%s.load_roidb_and_cname2cid not available' %
(self.__class__.__name__))
def get_roidb(self):
if not self.roidbs:
data_dir = get_dataset_path(self.dataset_dir, self.anno_path,
self.image_dir)
if data_dir:
self.dataset_dir = data_dir
self.load_roidb_and_cname2cid()
return self.roidbs
def get_cname2cid(self):
if not self.cname2cid:
self.load_roidb_and_cname2cid()
return self.cname2cid
def get_anno(self):
if self.anno_path is None:
return
return os.path.join(self.dataset_dir, self.anno_path)
def get_imid2path(self):
return self._imid2path
def __len__(self, ):
return len(self.roidbs)
def _is_valid_file(f, extensions=('.jpg', '.jpeg', '.png', '.bmp')): def __getitem__(self, idx):
return f.lower().endswith(extensions) # data batch
roidb = self.roidbs[idx]
# data augment
roidb = self.transform(roidb)
# data item
out = OrderedDict()
for k in self.fields:
out[k] = roidb[k]
return out.values()
def set_out(self, sample_transform, fields):
self.transform = sample_transform
self.fields = fields
def _make_dataset(dir): def parse_dataset(self):
dir = os.path.expanduser(dir) raise NotImplemented(
if not os.path.isdir(d): "Need to implement parse_dataset method of Dataset")
raise ('{} should be a dir'.format(dir))
images = []
for root, _, fnames in sorted(os.walk(dir, followlinks=True)):
for fname in sorted(fnames):
path = os.path.join(root, fname)
if is_valid_file(path):
images.append(path)
return images
@register @register
@serializable @serializable
class ImageFolder(DataSet): class ImageFolder(DetDataset):
"""
Args:
dataset_dir (str): root directory for dataset.
image_dir(list|str): list of image folders or list of image files
anno_path (str): annotation file path.
samples (int): number of samples to load, -1 means all
"""
def __init__(self, def __init__(self,
dataset_dir=None, dataset_dir=None,
image_dir=None, image_dir=None,
anno_path=None, anno_path=None,
sample_num=-1,
with_background=True, with_background=True,
use_default_label=None, sample_num=-1,
**kwargs): **kwargs):
super(ImageFolder, self).__init__(dataset_dir, image_dir, anno_path, super(ImageFolder, self).__init__(dataset_dir, image_dir, anno_path,
sample_num, with_background, with_background, sample_num)
use_default_label)
self.roidbs = None
self._imid2path = {}
def get_roidb(self): def parse_dataset(self):
if not self.roidbs:
self.roidbs = self._load_images()
return self.roidbs
def set_images(self, images):
self.image_dir = images
self.roidbs = self._load_images()
def _parse(self):
image_dir = self.image_dir image_dir = self.image_dir
if not isinstance(image_dir, Sequence): if not isinstance(image_dir, Sequence):
image_dir = [image_dir] image_dir = [image_dir]
...@@ -145,20 +73,4 @@ class ImageFolder(DataSet): ...@@ -145,20 +73,4 @@ class ImageFolder(DataSet):
images.extend(_make_dataset(im_dir)) images.extend(_make_dataset(im_dir))
elif os.path.isfile(im_dir) and _is_valid_file(im_dir): elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
images.append(im_dir) images.append(im_dir)
return images self.roidbs = images
def _load_images(self):
images = self._parse()
ct = 0
records = []
for image in images:
assert image != '' and os.path.isfile(image), \
"Image {} not found".format(image)
if self.sample_num > 0 and ct >= self.sample_num:
break
rec = {'im_id': np.array([ct]), 'im_file': image}
self._imid2path[ct] = image
ct += 1
records.append(rec)
assert len(records) > 0, "No image file found"
return records
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import xml.etree.ElementTree as ET
from ppdet.core.workspace import register, serializable
from .dataset import DataSet
import logging
logger = logging.getLogger(__name__)
@register
@serializable
class VOCDataSet(DataSet):
"""
Load dataset with PascalVOC format.
Notes:
`anno_path` must contains xml file and image file path for annotations.
Args:
dataset_dir (str): root directory for dataset.
image_dir (str): directory for images.
anno_path (str): voc annotation file path.
sample_num (int): number of samples to load, -1 means all.
use_default_label (bool): whether use the default mapping of
label to integer index. Default True.
with_background (bool): whether load background as a class,
default True.
label_list (str): if use_default_label is False, will load
mapping between category and class index.
"""
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
sample_num=-1,
use_default_label=True,
with_background=True,
label_list='label_list.txt'):
super(VOCDataSet, self).__init__(
image_dir=image_dir,
anno_path=anno_path,
sample_num=sample_num,
dataset_dir=dataset_dir,
with_background=with_background)
# roidbs is list of dict whose structure is:
# {
# 'im_file': im_fname, # image file name
# 'im_id': im_id, # image id
# 'h': im_h, # height of image
# 'w': im_w, # width
# 'is_crowd': is_crowd,
# 'gt_class': gt_class,
# 'gt_score': gt_score,
# 'gt_bbox': gt_bbox,
# 'difficult': difficult
# }
self.roidbs = None
# 'cname2id' is a dict to map category name to class id
self.cname2cid = None
self.use_default_label = use_default_label
self.label_list = label_list
def load_roidb_and_cname2cid(self):
anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir)
# mapping category name to class id
# if with_background is True:
# background:0, first_class:1, second_class:2, ...
# if with_background is False:
# first_class:0, second_class:1, ...
records = []
ct = 0
cname2cid = {}
if not self.use_default_label:
label_path = os.path.join(self.dataset_dir, self.label_list)
if not os.path.exists(label_path):
raise ValueError("label_list {} does not exists".format(
label_path))
with open(label_path, 'r') as fr:
label_id = int(self.with_background)
for line in fr.readlines():
cname2cid[line.strip()] = label_id
label_id += 1
else:
cname2cid = pascalvoc_label(self.with_background)
with open(anno_path, 'r') as fr:
while True:
line = fr.readline()
if not line:
break
img_file, xml_file = [os.path.join(image_dir, x) \
for x in line.strip().split()[:2]]
if not os.path.exists(img_file):
logger.warn(
'Illegal image file: {}, and it will be ignored'.format(
img_file))
continue
if not os.path.isfile(xml_file):
logger.warn('Illegal xml file: {}, and it will be ignored'.
format(xml_file))
continue
tree = ET.parse(xml_file)
if tree.find('id') is None:
im_id = np.array([ct])
else:
im_id = np.array([int(tree.find('id').text)])
objs = tree.findall('object')
im_w = float(tree.find('size').find('width').text)
im_h = float(tree.find('size').find('height').text)
if im_w < 0 or im_h < 0:
logger.warn(
'Illegal width: {} or height: {} in annotation, '
'and {} will be ignored'.format(im_w, im_h, xml_file))
continue
gt_bbox = []
gt_class = []
gt_score = []
is_crowd = []
difficult = []
for i, obj in enumerate(objs):
cname = obj.find('name').text
_difficult = int(obj.find('difficult').text)
x1 = float(obj.find('bndbox').find('xmin').text)
y1 = float(obj.find('bndbox').find('ymin').text)
x2 = float(obj.find('bndbox').find('xmax').text)
y2 = float(obj.find('bndbox').find('ymax').text)
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(im_w - 1, x2)
y2 = min(im_h - 1, y2)
if x2 > x1 and y2 > y1:
gt_bbox.append([x1, y1, x2, y2])
gt_class.append([cname2cid[cname]])
gt_score.append([1.])
is_crowd.append([0])
difficult.append([_difficult])
else:
logger.warn(
'Found an invalid bbox in annotations: xml_file: {}'
', x1: {}, y1: {}, x2: {}, y2: {}.'.format(
xml_file, x1, y1, x2, y2))
gt_bbox = np.array(gt_bbox).astype('float32')
gt_class = np.array(gt_class).astype('int32')
gt_score = np.array(gt_score).astype('float32')
is_crowd = np.array(is_crowd).astype('int32')
difficult = np.array(difficult).astype('int32')
voc_rec = {
'im_file': img_file,
'im_id': im_id,
'h': im_h,
'w': im_w,
'is_crowd': is_crowd,
'gt_class': gt_class,
'gt_score': gt_score,
'gt_bbox': gt_bbox,
'difficult': difficult
}
if len(objs) != 0:
records.append(voc_rec)
ct += 1
if self.sample_num > 0 and ct >= self.sample_num:
break
assert len(records) > 0, 'not found any voc record in %s' % (
self.anno_path)
logger.debug('{} samples in file {}'.format(ct, anno_path))
self.roidbs, self.cname2cid = records, cname2cid
def pascalvoc_label(with_background=True):
labels_map = {
'aeroplane': 1,
'bicycle': 2,
'bird': 3,
'boat': 4,
'bottle': 5,
'bus': 6,
'car': 7,
'cat': 8,
'chair': 9,
'cow': 10,
'diningtable': 11,
'dog': 12,
'horse': 13,
'motorbike': 14,
'person': 15,
'pottedplant': 16,
'sheep': 17,
'sofa': 18,
'train': 19,
'tvmonitor': 20
}
if not with_background:
labels_map = {k: v - 1 for k, v in labels_map.items()}
return labels_map
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import logging
logger = logging.getLogger(__name__)
from ppdet.core.workspace import register, serializable
from .dataset import DataSet
@register
@serializable
class WIDERFaceDataSet(DataSet):
"""
Load WiderFace records with 'anno_path'
Args:
dataset_dir (str): root directory for dataset.
image_dir (str): directory for images.
anno_path (str): root directory for voc annotation data
sample_num (int): number of samples to load, -1 means all
with_background (bool): whether load background as a class.
if True, total class number will be 2. default True.
"""
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
sample_num=-1,
with_background=True,
with_lmk=False):
super(WIDERFaceDataSet, self).__init__(
image_dir=image_dir,
anno_path=anno_path,
sample_num=sample_num,
dataset_dir=dataset_dir,
with_background=with_background)
self.anno_path = anno_path
self.sample_num = sample_num
self.with_background = with_background
self.roidbs = None
self.cname2cid = None
self.with_lmk = with_lmk
def load_roidb_and_cname2cid(self):
anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir)
txt_file = anno_path
records = []
ct = 0
file_lists = self._load_file_list(txt_file)
cname2cid = widerface_label(self.with_background)
for item in file_lists:
im_fname = item[0]
im_id = np.array([ct])
gt_bbox = np.zeros((len(item) - 1, 4), dtype=np.float32)
gt_class = np.ones((len(item) - 1, 1), dtype=np.int32)
gt_lmk_labels = np.zeros((len(item) - 1, 10), dtype=np.float32)
lmk_ignore_flag = np.zeros((len(item) - 1, 1), dtype=np.int32)
for index_box in range(len(item)):
if index_box < 1:
continue
gt_bbox[index_box - 1] = item[index_box][0]
if self.with_lmk:
gt_lmk_labels[index_box - 1] = item[index_box][1]
lmk_ignore_flag[index_box - 1] = item[index_box][2]
im_fname = os.path.join(image_dir,
im_fname) if image_dir else im_fname
widerface_rec = {
'im_file': im_fname,
'im_id': im_id,
'gt_bbox': gt_bbox,
'gt_class': gt_class,
}
if self.with_lmk:
widerface_rec['gt_keypoint'] = gt_lmk_labels
widerface_rec['keypoint_ignore'] = lmk_ignore_flag
if len(item) != 0:
records.append(widerface_rec)
ct += 1
if self.sample_num > 0 and ct >= self.sample_num:
break
assert len(records) > 0, 'not found any widerface in %s' % (anno_path)
logger.debug('{} samples in file {}'.format(ct, anno_path))
self.roidbs, self.cname2cid = records, cname2cid
def _load_file_list(self, input_txt):
with open(input_txt, 'r') as f_dir:
lines_input_txt = f_dir.readlines()
file_dict = {}
num_class = 0
for i in range(len(lines_input_txt)):
line_txt = lines_input_txt[i].strip('\n\t\r')
if '.jpg' in line_txt:
if i != 0:
num_class += 1
file_dict[num_class] = []
file_dict[num_class].append(line_txt)
if '.jpg' not in line_txt:
if len(line_txt) <= 6:
continue
result_boxs = []
split_str = line_txt.split(' ')
xmin = float(split_str[0])
ymin = float(split_str[1])
w = float(split_str[2])
h = float(split_str[3])
# Filter out wrong labels
if w < 0 or h < 0:
logger.warn('Illegal box with w: {}, h: {} in '
'img: {}, and it will be ignored'.format(
w, h, file_dict[num_class][0]))
continue
xmin = max(0, xmin)
ymin = max(0, ymin)
xmax = xmin + w
ymax = ymin + h
gt_bbox = [xmin, ymin, xmax, ymax]
result_boxs.append(gt_bbox)
if self.with_lmk:
assert len(split_str) > 18, 'When `with_lmk=True`, the number' \
'of characters per line in the annotation file should' \
'exceed 18.'
lmk0_x = float(split_str[5])
lmk0_y = float(split_str[6])
lmk1_x = float(split_str[8])
lmk1_y = float(split_str[9])
lmk2_x = float(split_str[11])
lmk2_y = float(split_str[12])
lmk3_x = float(split_str[14])
lmk3_y = float(split_str[15])
lmk4_x = float(split_str[17])
lmk4_y = float(split_str[18])
lmk_ignore_flag = 0 if lmk0_x == -1 else 1
gt_lmk_label = [
lmk0_x, lmk0_y, lmk1_x, lmk1_y, lmk2_x, lmk2_y, lmk3_x,
lmk3_y, lmk4_x, lmk4_y
]
result_boxs.append(gt_lmk_label)
result_boxs.append(lmk_ignore_flag)
file_dict[num_class].append(result_boxs)
return list(file_dict.values())
def widerface_label(with_background=True):
labels_map = {'face': 1}
if not with_background:
labels_map = {k: v - 1 for k, v in labels_map.items()}
return labels_map
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
...@@ -24,6 +23,7 @@ except Exception: ...@@ -24,6 +23,7 @@ except Exception:
import logging import logging
import cv2 import cv2
import numpy as np import numpy as np
from .operators import register_op, BaseOperator from .operators import register_op, BaseOperator
from .op_helper import jaccard_overlap, gaussian2D from .op_helper import jaccard_overlap, gaussian2D
...@@ -41,28 +41,15 @@ __all__ = [ ...@@ -41,28 +41,15 @@ __all__ = [
@register_op @register_op
class PadBatch(BaseOperator): class PadBatch(BaseOperator):
"""
Pad a batch of samples so they can be divisible by a stride.
The layout of each image should be 'CHW'.
Args:
pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
height and width is divisible by `pad_to_stride`.
"""
def __init__(self, pad_to_stride=0, use_padded_im_info=True, pad_gt=False): def __init__(self, pad_to_stride=0, use_padded_im_info=True, pad_gt=False):
super(PadBatch, self).__init__() super(PadBatch, self).__init__()
self.pad_to_stride = pad_to_stride self.pad_to_stride = pad_to_stride
self.use_padded_im_info = use_padded_im_info self.use_padded_im_info = use_padded_im_info
self.pad_gt = pad_gt self.pad_gt = pad_gt
def __call__(self, samples, context=None): def __call__(self, samples):
"""
Args:
samples (list): a batch of sample, each is dict.
"""
coarsest_stride = self.pad_to_stride coarsest_stride = self.pad_to_stride
#if coarsest_stride == 0:
# return samples
max_shape = np.array([data['image'].shape for data in samples]).max( max_shape = np.array([data['image'].shape for data in samples]).max(
axis=0) axis=0)
...@@ -82,9 +69,10 @@ class PadBatch(BaseOperator): ...@@ -82,9 +69,10 @@ class PadBatch(BaseOperator):
data['image'] = padding_im data['image'] = padding_im
if self.use_padded_im_info: if self.use_padded_im_info:
data['im_info'][:2] = max_shape[1:3] data['im_info'][:2] = max_shape[1:3]
if self.pad_gt: if self.pad_gt:
gt_num = [] gt_num = []
if data['gt_poly'] is not None and len(data['gt_poly']) > 0: if 'gt_poly' in data.keys():
pad_mask = True pad_mask = True
else: else:
pad_mask = False pad_mask = False
...@@ -93,6 +81,7 @@ class PadBatch(BaseOperator): ...@@ -93,6 +81,7 @@ class PadBatch(BaseOperator):
poly_num = [] poly_num = []
poly_part_num = [] poly_part_num = []
point_num = [] point_num = []
for data in samples: for data in samples:
gt_num.append(data['gt_bbox'].shape[0]) gt_num.append(data['gt_bbox'].shape[0])
if pad_mask: if pad_mask:
...@@ -127,7 +116,6 @@ class PadBatch(BaseOperator): ...@@ -127,7 +116,6 @@ class PadBatch(BaseOperator):
data['gt_bbox'] = gt_box_data data['gt_bbox'] = gt_box_data
data['gt_class'] = gt_class_data data['gt_class'] = gt_class_data
data['is_crowd'] = is_crowd_data data['is_crowd'] = is_crowd_data
return samples return samples
...@@ -156,7 +144,7 @@ class RandomShape(BaseOperator): ...@@ -156,7 +144,7 @@ class RandomShape(BaseOperator):
] if random_inter else [] ] if random_inter else []
self.resize_box = resize_box self.resize_box = resize_box
def __call__(self, samples, context=None): def __call__(self, samples):
shape = np.random.choice(self.sizes) shape = np.random.choice(self.sizes)
method = np.random.choice(self.interps) if self.random_inter \ method = np.random.choice(self.interps) if self.random_inter \
else cv2.INTER_NEAREST else cv2.INTER_NEAREST
...@@ -191,7 +179,7 @@ class PadMultiScaleTest(BaseOperator): ...@@ -191,7 +179,7 @@ class PadMultiScaleTest(BaseOperator):
super(PadMultiScaleTest, self).__init__() super(PadMultiScaleTest, self).__init__()
self.pad_to_stride = pad_to_stride self.pad_to_stride = pad_to_stride
def __call__(self, samples, context=None): def __call__(self, samples):
coarsest_stride = self.pad_to_stride coarsest_stride = self.pad_to_stride
if coarsest_stride == 0: if coarsest_stride == 0:
return samples return samples
...@@ -247,7 +235,7 @@ class Gt2YoloTarget(BaseOperator): ...@@ -247,7 +235,7 @@ class Gt2YoloTarget(BaseOperator):
self.num_classes = num_classes self.num_classes = num_classes
self.iou_thresh = iou_thresh self.iou_thresh = iou_thresh
def __call__(self, samples, context=None): def __call__(self, samples):
assert len(self.anchor_masks) == len(self.downsample_ratios), \ assert len(self.anchor_masks) == len(self.downsample_ratios), \
"anchor_masks', and 'downsample_ratios' should have same length." "anchor_masks', and 'downsample_ratios' should have same length."
...@@ -430,7 +418,7 @@ class Gt2FCOSTarget(BaseOperator): ...@@ -430,7 +418,7 @@ class Gt2FCOSTarget(BaseOperator):
inside_gt_box = np.min(clipped_box_reg_targets, axis=2) > 0 inside_gt_box = np.min(clipped_box_reg_targets, axis=2) > 0
return inside_gt_box return inside_gt_box
def __call__(self, samples, context=None): def __call__(self, samples):
assert len(self.object_sizes_of_interest) == len(self.downsample_ratios), \ assert len(self.object_sizes_of_interest) == len(self.downsample_ratios), \
"object_sizes_of_interest', and 'downsample_ratios' should have same length." "object_sizes_of_interest', and 'downsample_ratios' should have same length."
...@@ -554,7 +542,7 @@ class Gt2TTFTarget(BaseOperator): ...@@ -554,7 +542,7 @@ class Gt2TTFTarget(BaseOperator):
self.num_classes = num_classes self.num_classes = num_classes
self.alpha = alpha self.alpha = alpha
def __call__(self, samples, context=None): def __call__(self, samples):
output_size = samples[0]['image'].shape[1] output_size = samples[0]['image'].shape[1]
feat_size = output_size // self.down_ratio feat_size = output_size // self.down_ratio
for sample in samples: for sample in samples:
......
...@@ -33,12 +33,10 @@ import random ...@@ -33,12 +33,10 @@ import random
import math import math
import numpy as np import numpy as np
import os import os
import cv2 import cv2
from PIL import Image, ImageEnhance, ImageDraw from PIL import Image, ImageEnhance, ImageDraw
from ppdet.core.workspace import serializable from ppdet.core.workspace import register, serializable
from ppdet.modeling.ops import AnchorGrid
from .op_helper import (satisfy_sample_constraint, filter_and_process, from .op_helper import (satisfy_sample_constraint, filter_and_process,
generate_sample_bbox, clip_bbox, data_anchor_sampling, generate_sample_bbox, clip_bbox, data_anchor_sampling,
...@@ -74,11 +72,12 @@ class BaseOperator(object): ...@@ -74,11 +72,12 @@ class BaseOperator(object):
name = self.__class__.__name__ name = self.__class__.__name__
self._id = name + '_' + str(uuid.uuid4())[-6:] self._id = name + '_' + str(uuid.uuid4())[-6:]
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
""" Process a sample. """ Process a sample.
Args: Args:
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx} sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
context (dict): info about this sample processing
Returns: Returns:
result (dict): a processed sample result (dict): a processed sample
""" """
...@@ -106,10 +105,10 @@ class DecodeImage(BaseOperator): ...@@ -106,10 +105,10 @@ class DecodeImage(BaseOperator):
raise TypeError("{}: input type is invalid.".format(self)) raise TypeError("{}: input type is invalid.".format(self))
if not isinstance(self.with_mixup, bool): if not isinstance(self.with_mixup, bool):
raise TypeError("{}: input type is invalid.".format(self)) raise TypeError("{}: input type is invalid.".format(self))
if not isinstance(self.with_cutmix, bool):
raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
""" load image if 'im_file' field is not empty but 'image' is""" """ load image if 'im_file' field is not empty but 'image' is"""
if 'image' not in sample: if 'image' not in sample:
with open(sample['im_file'], 'rb') as f: with open(sample['im_file'], 'rb') as f:
...@@ -121,7 +120,9 @@ class DecodeImage(BaseOperator): ...@@ -121,7 +120,9 @@ class DecodeImage(BaseOperator):
if self.to_rgb: if self.to_rgb:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
sample['image'] = im sample['image'] = im
if 'h' not in sample: if 'h' not in sample:
sample['h'] = im.shape[0] sample['h'] = im.shape[0]
elif sample['h'] != im.shape[0]: elif sample['h'] != im.shape[0]:
...@@ -142,12 +143,20 @@ class DecodeImage(BaseOperator): ...@@ -142,12 +143,20 @@ class DecodeImage(BaseOperator):
# make default im_info with [h, w, 1] # make default im_info with [h, w, 1]
sample['im_info'] = np.array( sample['im_info'] = np.array(
[im.shape[0], im.shape[1], 1.], dtype=np.float32) [im.shape[0], im.shape[1], 1.], dtype=np.float32)
# decode mixup image # decode mixup image
if self.with_mixup and 'mixup' in sample: if self.with_mixup and 'mixup' in sample:
self.__call__(sample['mixup'], context) self.__call__(sample['mixup'])
# decode cutmix image # decode cutmix image
if self.with_cutmix and 'cutmix' in sample: if self.with_cutmix and 'cutmix' in sample:
self.__call__(sample['cutmix'], context) self.__call__(sample['cutmix'])
# decode semantic label
if 'semantic' in sample.keys() and sample['semantic'] is not None:
sem_file = sample['semantic']
sem = cv2.imread(sem_file, cv2.IMREAD_GRAYSCALE)
sample['semantic'] = sem.astype('int32')
return sample return sample
...@@ -188,7 +197,9 @@ class MultiscaleTestResize(BaseOperator): ...@@ -188,7 +197,9 @@ class MultiscaleTestResize(BaseOperator):
and isinstance(self.interp, int)): and isinstance(self.interp, int)):
raise TypeError("{}: input type is invalid.".format(self)) raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
""" Resize the image numpy for multi-scale test. """ Resize the image numpy for multi-scale test.
""" """
origin_ims = {} origin_ims = {}
...@@ -292,7 +303,9 @@ class ResizeImage(BaseOperator): ...@@ -292,7 +303,9 @@ class ResizeImage(BaseOperator):
int)): int)):
raise TypeError("{}: input type is invalid.".format(self)) raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
""" Resize the image numpy. """ Resize the image numpy.
""" """
im = sample['image'] im = sample['image']
...@@ -332,6 +345,7 @@ class ResizeImage(BaseOperator): ...@@ -332,6 +345,7 @@ class ResizeImage(BaseOperator):
resize_w = selected_size resize_w = selected_size
resize_h = selected_size resize_h = selected_size
if self.use_cv2: if self.use_cv2:
im = cv2.resize( im = cv2.resize(
im, im,
...@@ -340,6 +354,18 @@ class ResizeImage(BaseOperator): ...@@ -340,6 +354,18 @@ class ResizeImage(BaseOperator):
fx=im_scale_x, fx=im_scale_x,
fy=im_scale_y, fy=im_scale_y,
interpolation=self.interp) interpolation=self.interp)
if 'semantic' in sample.keys() and sample['semantic'] is not None:
semantic = sample['semantic']
semantic = cv2.resize(
semantic.astype('float32'),
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
semantic = np.asarray(semantic).astype('int32')
semantic = np.expand_dims(semantic, 0)
sample['semantic'] = semantic
else: else:
if self.max_size != 0: if self.max_size != 0:
raise TypeError( raise TypeError(
...@@ -406,7 +432,9 @@ class RandomFlipImage(BaseOperator): ...@@ -406,7 +432,9 @@ class RandomFlipImage(BaseOperator):
gt_keypoint[:, i] = width - old_x - 1 gt_keypoint[:, i] = width - old_x - 1
return gt_keypoint return gt_keypoint
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
"""Filp the image and bounding box. """Filp the image and bounding box.
Operators: Operators:
1. Flip the image numpy. 1. Flip the image numpy.
...@@ -453,9 +481,15 @@ class RandomFlipImage(BaseOperator): ...@@ -453,9 +481,15 @@ class RandomFlipImage(BaseOperator):
if self.is_mask_flip and len(sample['gt_poly']) != 0: if self.is_mask_flip and len(sample['gt_poly']) != 0:
sample['gt_poly'] = self.flip_segms(sample['gt_poly'], sample['gt_poly'] = self.flip_segms(sample['gt_poly'],
height, width) height, width)
if 'gt_keypoint' in sample.keys(): if 'gt_keypoint' in sample.keys():
sample['gt_keypoint'] = self.flip_keypoint( sample['gt_keypoint'] = self.flip_keypoint(
sample['gt_keypoint'], width) sample['gt_keypoint'], width)
if 'semantic' in sample.keys() and sample[
'semantic'] is not None:
sample['semantic'] = sample['semantic'][:, ::-1]
sample['flipped'] = True sample['flipped'] = True
sample['image'] = im sample['image'] = im
sample = samples if batch_input else samples[0] sample = samples if batch_input else samples[0]
...@@ -479,7 +513,9 @@ class RandomErasingImage(BaseOperator): ...@@ -479,7 +513,9 @@ class RandomErasingImage(BaseOperator):
self.sh = sh self.sh = sh
self.r1 = r1 self.r1 = r1
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
samples = sample samples = sample
batch_input = True batch_input = True
if not isinstance(samples, Sequence): if not isinstance(samples, Sequence):
...@@ -563,7 +599,9 @@ class GridMaskOp(BaseOperator): ...@@ -563,7 +599,9 @@ class GridMaskOp(BaseOperator):
prob=prob, prob=prob,
upper_iter=upper_iter) upper_iter=upper_iter)
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
samples = sample samples = sample
batch_input = True batch_input = True
if not isinstance(samples, Sequence): if not isinstance(samples, Sequence):
...@@ -591,7 +629,9 @@ class AutoAugmentImage(BaseOperator): ...@@ -591,7 +629,9 @@ class AutoAugmentImage(BaseOperator):
if not isinstance(self.is_normalized, bool): if not isinstance(self.is_normalized, bool):
raise TypeError("{}: input type is invalid.".format(self)) raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
""" """
Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172 Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172
""" """
...@@ -670,7 +710,9 @@ class NormalizeImage(BaseOperator): ...@@ -670,7 +710,9 @@ class NormalizeImage(BaseOperator):
if reduce(lambda x, y: x * y, self.std) == 0: if reduce(lambda x, y: x * y, self.std) == 0:
raise ValueError('{}: std is invalid!'.format(self)) raise ValueError('{}: std is invalid!'.format(self))
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
"""Normalize the image. """Normalize the image.
Operators: Operators:
1.(optional) Scale the image to [0,1] 1.(optional) Scale the image to [0,1]
...@@ -786,7 +828,9 @@ class RandomDistort(BaseOperator): ...@@ -786,7 +828,9 @@ class RandomDistort(BaseOperator):
img = Image.fromarray(img, mode='HSV').convert('RGB') img = Image.fromarray(img, mode='HSV').convert('RGB')
return img return img
def __call__(self, sample, context): def __call__(
self,
sample, ):
"""random distort the image""" """random distort the image"""
ops = [ ops = [
self.random_brightness, self.random_contrast, self.random_brightness, self.random_contrast,
...@@ -827,7 +871,9 @@ class ExpandImage(BaseOperator): ...@@ -827,7 +871,9 @@ class ExpandImage(BaseOperator):
self.mean = mean self.mean = mean
self.prob = prob self.prob = prob
def __call__(self, sample, context): def __call__(
self,
sample, ):
""" """
Expand the image and modify bounding box. Expand the image and modify bounding box.
Operators: Operators:
...@@ -911,7 +957,9 @@ class CropImage(BaseOperator): ...@@ -911,7 +957,9 @@ class CropImage(BaseOperator):
self.satisfy_all = satisfy_all self.satisfy_all = satisfy_all
self.avoid_no_bbox = avoid_no_bbox self.avoid_no_bbox = avoid_no_bbox
def __call__(self, sample, context): def __call__(
self,
sample, ):
""" """
Crop the image and modify bounding box. Crop the image and modify bounding box.
Operators: Operators:
...@@ -1007,7 +1055,9 @@ class CropImageWithDataAchorSampling(BaseOperator): ...@@ -1007,7 +1055,9 @@ class CropImageWithDataAchorSampling(BaseOperator):
self.avoid_no_bbox = avoid_no_bbox self.avoid_no_bbox = avoid_no_bbox
self.das_anchor_scales = np.array(das_anchor_scales) self.das_anchor_scales = np.array(das_anchor_scales)
def __call__(self, sample, context): def __call__(
self,
sample, ):
""" """
Crop the image and modify bounding box. Crop the image and modify bounding box.
Operators: Operators:
...@@ -1140,7 +1190,9 @@ class NormalizeBox(BaseOperator): ...@@ -1140,7 +1190,9 @@ class NormalizeBox(BaseOperator):
def __init__(self): def __init__(self):
super(NormalizeBox, self).__init__() super(NormalizeBox, self).__init__()
def __call__(self, sample, context): def __call__(
self,
sample, ):
gt_bbox = sample['gt_bbox'] gt_bbox = sample['gt_bbox']
width = sample['w'] width = sample['w']
height = sample['h'] height = sample['h']
...@@ -1180,7 +1232,9 @@ class Permute(BaseOperator): ...@@ -1180,7 +1232,9 @@ class Permute(BaseOperator):
isinstance(self.channel_first, bool)): isinstance(self.channel_first, bool)):
raise TypeError("{}: input type is invalid.".format(self)) raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
samples = sample samples = sample
batch_input = True batch_input = True
if not isinstance(samples, Sequence): if not isinstance(samples, Sequence):
...@@ -1229,7 +1283,9 @@ class MixupImage(BaseOperator): ...@@ -1229,7 +1283,9 @@ class MixupImage(BaseOperator):
img2.astype('float32') * (1.0 - factor) img2.astype('float32') * (1.0 - factor)
return img.astype('uint8') return img.astype('uint8')
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
if 'mixup' not in sample: if 'mixup' not in sample:
return sample return sample
factor = np.random.beta(self.alpha, self.beta) factor = np.random.beta(self.alpha, self.beta)
...@@ -1312,7 +1368,9 @@ class CutmixImage(BaseOperator): ...@@ -1312,7 +1368,9 @@ class CutmixImage(BaseOperator):
img_1[bby1:bby2, bbx1:bbx2, :] = img2[bby1:bby2, bbx1:bbx2, :] img_1[bby1:bby2, bbx1:bbx2, :] = img2[bby1:bby2, bbx1:bbx2, :]
return img_1 return img_1
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
if 'cutmix' not in sample: if 'cutmix' not in sample:
return sample return sample
factor = np.random.beta(self.alpha, self.beta) factor = np.random.beta(self.alpha, self.beta)
...@@ -1371,10 +1429,12 @@ class RandomInterpImage(BaseOperator): ...@@ -1371,10 +1429,12 @@ class RandomInterpImage(BaseOperator):
for interp in interps: for interp in interps:
self.resizers.append(ResizeImage(target_size, max_size, interp)) self.resizers.append(ResizeImage(target_size, max_size, interp))
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
"""Resise the image numpy by random resizer.""" """Resise the image numpy by random resizer."""
resizer = random.choice(self.resizers) resizer = random.choice(self.resizers)
return resizer(sample, context) return resizer(sample, )
@register_op @register_op
...@@ -1393,7 +1453,9 @@ class Resize(BaseOperator): ...@@ -1393,7 +1453,9 @@ class Resize(BaseOperator):
self.target_dim = target_dim self.target_dim = target_dim
self.interp = interp # 'random' for yolov3 self.interp = interp # 'random' for yolov3
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
w = sample['w'] w = sample['w']
h = sample['h'] h = sample['h']
...@@ -1517,7 +1579,9 @@ class ColorDistort(BaseOperator): ...@@ -1517,7 +1579,9 @@ class ColorDistort(BaseOperator):
img += delta img += delta
return img return img
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
img = sample['image'] img = sample['image']
if self.random_apply: if self.random_apply:
functions = [ functions = [
...@@ -1600,7 +1664,9 @@ class CornerRandColor(ColorDistort): ...@@ -1600,7 +1664,9 @@ class CornerRandColor(ColorDistort):
img_mean *= (1 - alpha) img_mean *= (1 - alpha)
img += img_mean img += img_mean
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
img = sample['image'] img = sample['image']
if self.is_scale: if self.is_scale:
img = img.astype(np.float32, copy=False) img = img.astype(np.float32, copy=False)
...@@ -1633,7 +1699,9 @@ class NormalizePermute(BaseOperator): ...@@ -1633,7 +1699,9 @@ class NormalizePermute(BaseOperator):
self.mean = mean self.mean = mean
self.std = std self.std = std
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
img = sample['image'] img = sample['image']
img = img.astype(np.float32) img = img.astype(np.float32)
...@@ -1707,7 +1775,9 @@ class RandomExpand(BaseOperator): ...@@ -1707,7 +1775,9 @@ class RandomExpand(BaseOperator):
_expand_rle(segm, x, y, height, width, ratio)) _expand_rle(segm, x, y, height, width, ratio))
return expanded_segms return expanded_segms
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
if np.random.uniform(0., 1.) < self.prob: if np.random.uniform(0., 1.) < self.prob:
return sample return sample
...@@ -1839,7 +1909,9 @@ class RandomCrop(BaseOperator): ...@@ -1839,7 +1909,9 @@ class RandomCrop(BaseOperator):
crop_segms.append(_crop_rle(segm, crop, height, width)) crop_segms.append(_crop_rle(segm, crop, height, width))
return crop_segms return crop_segms
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0: if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
return sample return sample
...@@ -1986,22 +2058,23 @@ class PadBox(BaseOperator): ...@@ -1986,22 +2058,23 @@ class PadBox(BaseOperator):
self.num_max_boxes = num_max_boxes self.num_max_boxes = num_max_boxes
super(PadBox, self).__init__() super(PadBox, self).__init__()
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
assert 'gt_bbox' in sample assert 'gt_bbox' in sample
bbox = sample['gt_bbox'] bbox = sample['gt_bbox']
gt_num = min(self.num_max_boxes, len(bbox)) gt_num = min(self.num_max_boxes, len(bbox))
num_max = self.num_max_boxes num_max = self.num_max_boxes
fields = context['fields'] if context else []
pad_bbox = np.zeros((num_max, 4), dtype=np.float32) pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
if gt_num > 0: if gt_num > 0:
pad_bbox[:gt_num, :] = bbox[:gt_num, :] pad_bbox[:gt_num, :] = bbox[:gt_num, :]
sample['gt_bbox'] = pad_bbox sample['gt_bbox'] = pad_bbox
if 'gt_class' in fields: if 'gt_class' in sample.keys():
pad_class = np.zeros((num_max), dtype=np.int32) pad_class = np.zeros((num_max), dtype=np.int32)
if gt_num > 0: if gt_num > 0:
pad_class[:gt_num] = sample['gt_class'][:gt_num, 0] pad_class[:gt_num] = sample['gt_class'][:gt_num, 0]
sample['gt_class'] = pad_class sample['gt_class'] = pad_class
if 'gt_score' in fields: if 'gt_score' in sample.keys():
pad_score = np.zeros((num_max), dtype=np.float32) pad_score = np.zeros((num_max), dtype=np.float32)
if gt_num > 0: if gt_num > 0:
pad_score[:gt_num] = sample['gt_score'][:gt_num, 0] pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
...@@ -2009,7 +2082,7 @@ class PadBox(BaseOperator): ...@@ -2009,7 +2082,7 @@ class PadBox(BaseOperator):
# in training, for example in op ExpandImage, # in training, for example in op ExpandImage,
# the bbox and gt_class is expandded, but the difficult is not, # the bbox and gt_class is expandded, but the difficult is not,
# so, judging by it's length # so, judging by it's length
if 'is_difficult' in fields: if 'is_difficult' in sample.keys():
pad_diff = np.zeros((num_max), dtype=np.int32) pad_diff = np.zeros((num_max), dtype=np.int32)
if gt_num > 0: if gt_num > 0:
pad_diff[:gt_num] = sample['difficult'][:gt_num, 0] pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
...@@ -2026,7 +2099,9 @@ class BboxXYXY2XYWH(BaseOperator): ...@@ -2026,7 +2099,9 @@ class BboxXYXY2XYWH(BaseOperator):
def __init__(self): def __init__(self):
super(BboxXYXY2XYWH, self).__init__() super(BboxXYXY2XYWH, self).__init__()
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
assert 'gt_bbox' in sample assert 'gt_bbox' in sample
bbox = sample['gt_bbox'] bbox = sample['gt_bbox']
bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2] bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2]
...@@ -2050,7 +2125,9 @@ class Lighting(BaseOperator): ...@@ -2050,7 +2125,9 @@ class Lighting(BaseOperator):
self.eigval = np.array(eigval).astype('float32') self.eigval = np.array(eigval).astype('float32')
self.eigvec = np.array(eigvec).astype('float32') self.eigvec = np.array(eigvec).astype('float32')
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
alpha = np.random.normal(scale=self.alphastd, size=(3, )) alpha = np.random.normal(scale=self.alphastd, size=(3, ))
sample['image'] += np.dot(self.eigvec, self.eigval * alpha) sample['image'] += np.dot(self.eigvec, self.eigval * alpha)
return sample return sample
...@@ -2088,7 +2165,9 @@ class CornerTarget(BaseOperator): ...@@ -2088,7 +2165,9 @@ class CornerTarget(BaseOperator):
self.gaussian_iou = gaussian_iou self.gaussian_iou = gaussian_iou
self.max_tag_len = max_tag_len self.max_tag_len = max_tag_len
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
tl_heatmaps = np.zeros( tl_heatmaps = np.zeros(
(self.num_classes, self.output_size[0], self.output_size[1]), (self.num_classes, self.output_size[0], self.output_size[1]),
dtype=np.float32) dtype=np.float32)
...@@ -2185,7 +2264,9 @@ class CornerCrop(BaseOperator): ...@@ -2185,7 +2264,9 @@ class CornerCrop(BaseOperator):
self.is_train = is_train self.is_train = is_train
self.input_size = input_size self.input_size = input_size
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
im_h, im_w = int(sample['h']), int(sample['w']) im_h, im_w = int(sample['h']), int(sample['w'])
if self.is_train: if self.is_train:
scale = np.random.choice(self.random_scales) scale = np.random.choice(self.random_scales)
...@@ -2259,7 +2340,9 @@ class CornerRatio(BaseOperator): ...@@ -2259,7 +2340,9 @@ class CornerRatio(BaseOperator):
self.input_size = input_size self.input_size = input_size
self.output_size = output_size self.output_size = output_size
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
scale = (self.input_size + 1) // self.output_size scale = (self.input_size + 1) // self.output_size
out_height, out_width = (sample['h'] + 1) // scale, ( out_height, out_width = (sample['h'] + 1) // scale, (
sample['w'] + 1) // scale sample['w'] + 1) // scale
...@@ -2289,7 +2372,9 @@ class RandomScaledCrop(BaseOperator): ...@@ -2289,7 +2372,9 @@ class RandomScaledCrop(BaseOperator):
self.scale_range = scale_range self.scale_range = scale_range
self.interp = interp self.interp = interp
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
w = sample['w'] w = sample['w']
h = sample['h'] h = sample['h']
random_scale = np.random.uniform(*self.scale_range) random_scale = np.random.uniform(*self.scale_range)
...@@ -2338,7 +2423,9 @@ class ResizeAndPad(BaseOperator): ...@@ -2338,7 +2423,9 @@ class ResizeAndPad(BaseOperator):
self.target_dim = target_dim self.target_dim = target_dim
self.interp = interp self.interp = interp
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
w = sample['w'] w = sample['w']
h = sample['h'] h = sample['h']
interp = self.interp interp = self.interp
...@@ -2363,109 +2450,6 @@ class ResizeAndPad(BaseOperator): ...@@ -2363,109 +2450,6 @@ class ResizeAndPad(BaseOperator):
return sample return sample
@register_op
class TargetAssign(BaseOperator):
"""Assign regression target and labels.
Args:
image_size (int or list): input image size, a single integer or list of
[h, w]. Default: 512
min_level (int): min level of the feature pyramid. Default: 3
max_level (int): max level of the feature pyramid. Default: 7
anchor_base_scale (int): base anchor scale. Default: 4
num_scales (int): number of anchor scales. Default: 3
aspect_ratios (list): aspect ratios.
Default: [(1, 1), (1.4, 0.7), (0.7, 1.4)]
match_threshold (float): threshold for foreground IoU. Default: 0.5
"""
def __init__(self,
image_size=512,
min_level=3,
max_level=7,
anchor_base_scale=4,
num_scales=3,
aspect_ratios=[(1, 1), (1.4, 0.7), (0.7, 1.4)],
match_threshold=0.5):
super(TargetAssign, self).__init__()
assert image_size % 2 ** max_level == 0, \
"image size should be multiple of the max level stride"
self.image_size = image_size
self.min_level = min_level
self.max_level = max_level
self.anchor_base_scale = anchor_base_scale
self.num_scales = num_scales
self.aspect_ratios = aspect_ratios
self.match_threshold = match_threshold
@property
def anchors(self):
if not hasattr(self, '_anchors'):
anchor_grid = AnchorGrid(self.image_size, self.min_level,
self.max_level, self.anchor_base_scale,
self.num_scales, self.aspect_ratios)
self._anchors = np.concatenate(anchor_grid.generate())
return self._anchors
def iou_matrix(self, a, b):
tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
area_o = (area_a[:, np.newaxis] + area_b - area_i)
# return area_i / (area_o + 1e-10)
return np.where(area_i == 0., np.zeros_like(area_i), area_i / area_o)
def match(self, anchors, gt_boxes):
# XXX put smaller matrix first would be a little bit faster
mat = self.iou_matrix(gt_boxes, anchors)
max_anchor_for_each_gt = mat.argmax(axis=1)
max_for_each_anchor = mat.max(axis=0)
anchor_to_gt = mat.argmax(axis=0)
anchor_to_gt[max_for_each_anchor < self.match_threshold] = -1
# XXX ensure each gt has at least one anchor assigned,
# see `force_match_for_each_row` in TF implementation
one_hot = np.zeros_like(mat)
one_hot[np.arange(mat.shape[0]), max_anchor_for_each_gt] = 1.
max_anchor_indices = one_hot.sum(axis=0).nonzero()[0]
max_gt_indices = one_hot.argmax(axis=0)[max_anchor_indices]
anchor_to_gt[max_anchor_indices] = max_gt_indices
return anchor_to_gt
def encode(self, anchors, boxes):
wha = anchors[..., 2:] - anchors[..., :2] + 1
ca = anchors[..., :2] + wha * .5
whb = boxes[..., 2:] - boxes[..., :2] + 1
cb = boxes[..., :2] + whb * .5
offsets = np.empty_like(anchors)
offsets[..., :2] = (cb - ca) / wha
offsets[..., 2:] = np.log(whb / wha)
return offsets
def __call__(self, sample, context=None):
gt_boxes = sample['gt_bbox']
gt_labels = sample['gt_class']
labels = np.full((self.anchors.shape[0], 1), 0, dtype=np.int32)
targets = np.full((self.anchors.shape[0], 4), 0., dtype=np.float32)
sample['gt_label'] = labels
sample['gt_target'] = targets
if len(gt_boxes) < 1:
sample['fg_num'] = np.array(0, dtype=np.int32)
return sample
anchor_to_gt = self.match(self.anchors, gt_boxes)
matched_indices = (anchor_to_gt >= 0).nonzero()[0]
labels[matched_indices] = gt_labels[anchor_to_gt[matched_indices]]
matched_boxes = gt_boxes[anchor_to_gt[matched_indices]]
matched_anchors = self.anchors[matched_indices]
matched_targets = self.encode(matched_anchors, matched_boxes)
targets[matched_indices] = matched_targets
sample['fg_num'] = np.array(len(matched_targets), dtype=np.int32)
return sample
@register_op @register_op
class DebugVisibleImage(BaseOperator): class DebugVisibleImage(BaseOperator):
""" """
...@@ -2482,7 +2466,9 @@ class DebugVisibleImage(BaseOperator): ...@@ -2482,7 +2466,9 @@ class DebugVisibleImage(BaseOperator):
if not isinstance(self.is_normalized, bool): if not isinstance(self.is_normalized, bool):
raise TypeError("{}: input type is invalid.".format(self)) raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, sample, context=None): def __call__(
self,
sample, ):
image = Image.open(sample['im_file']).convert('RGB') image = Image.open(sample['im_file']).convert('RGB')
out_file_name = sample['im_file'].split('/')[-1] out_file_name = sample['im_file'].split('/')[-1]
width = sample['w'] width = sample['w']
......
...@@ -45,7 +45,7 @@ class MaskRCNN(BaseArch): ...@@ -45,7 +45,7 @@ class MaskRCNN(BaseArch):
def model_arch(self): def model_arch(self):
# Backbone # Backbone
body_feats = self.backbone(self.inputs) body_feats = self.backbone(self.inputs)
spatial_scale = None spatial_scale = 1. / 16
# Neck # Neck
if self.neck is not None: if self.neck is not None:
......
...@@ -29,20 +29,14 @@ class BaseArch(Layer): ...@@ -29,20 +29,14 @@ class BaseArch(Layer):
raise "Now, only support train or infer mode!" raise "Now, only support train or infer mode!"
return out return out
def build_inputs(self, data, input_def): def build_inputs(self, inputs, inputs_keys):
inputs = {} out = {}
for name in input_def: for i, k in enumerate(inputs_keys):
inputs[name] = [] v = to_variable(inputs[i])
batch_size = len(data) out[k] = v
for bs in range(batch_size): return out
for name, input in zip(input_def, data[bs]):
input_v = np.array(input)[np.newaxis, ...] def model_arch(self, ):
inputs[name].append(input_v)
for name in input_def:
inputs[name] = to_variable(np.concatenate(inputs[name]))
return inputs
def model_arch(self, mode):
raise NotImplementedError("Should implement model_arch method!") raise NotImplementedError("Should implement model_arch method!")
def loss(self, ): def loss(self, ):
......
...@@ -13,7 +13,7 @@ class MaskFeat(Layer): ...@@ -13,7 +13,7 @@ class MaskFeat(Layer):
__inject__ = ['mask_roi_extractor'] __inject__ = ['mask_roi_extractor']
def __init__(self, def __init__(self,
mask_roi_extractor, mask_roi_extractor=None,
num_convs=1, num_convs=1,
feat_in=2048, feat_in=2048,
feat_out=256, feat_out=256,
......
...@@ -47,7 +47,7 @@ class Mask(object): ...@@ -47,7 +47,7 @@ class Mask(object):
im_info=inputs['im_info'], im_info=inputs['im_info'],
gt_classes=inputs['gt_class'], gt_classes=inputs['gt_class'],
is_crowd=inputs['is_crowd'], is_crowd=inputs['is_crowd'],
gt_segms=inputs['gt_mask'], gt_segms=inputs['gt_poly'],
rois=proposals, rois=proposals,
rois_num=proposals_num, rois_num=proposals_num,
labels_int32=labels_int32) labels_int32=labels_int32)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
...@@ -28,12 +14,13 @@ from paddle.fluid.layers.ops import cos ...@@ -28,12 +14,13 @@ from paddle.fluid.layers.ops import cos
from ppdet.core.workspace import register, serializable from ppdet.core.workspace import register, serializable
__all__ = ['LearningRate', 'OptimizerBuilder'] __all__ = ['Optimize']
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@serializable @serializable
@register
class PiecewiseDecay(object): class PiecewiseDecay(object):
""" """
Multi step learning rate decay Multi step learning rate decay
...@@ -43,7 +30,7 @@ class PiecewiseDecay(object): ...@@ -43,7 +30,7 @@ class PiecewiseDecay(object):
milestones (list): steps at which to decay learning rate milestones (list): steps at which to decay learning rate
""" """
def __init__(self, gamma=[0.1, 0.01], milestones=[60000, 80000]): def __init__(self, gamma=[0.1, 0.01], milestones=[8, 11]):
super(PiecewiseDecay, self).__init__() super(PiecewiseDecay, self).__init__()
if type(gamma) is not list: if type(gamma) is not list:
self.gamma = [] self.gamma = []
...@@ -53,9 +40,13 @@ class PiecewiseDecay(object): ...@@ -53,9 +40,13 @@ class PiecewiseDecay(object):
self.gamma = gamma self.gamma = gamma
self.milestones = milestones self.milestones = milestones
def __call__(self, base_lr=None, boundary=None, value=None): def __call__(self,
base_lr=None,
boundary=None,
value=None,
step_per_epoch=None):
if boundary is not None: if boundary is not None:
boundary.extend(self.milestones) boundary.extend(self.milestones * int(step_per_epoch))
if value is not None: if value is not None:
for i in self.gamma: for i in self.gamma:
...@@ -65,6 +56,7 @@ class PiecewiseDecay(object): ...@@ -65,6 +56,7 @@ class PiecewiseDecay(object):
@serializable @serializable
@register
class LinearWarmup(object): class LinearWarmup(object):
""" """
Warm up learning rate linearly Warm up learning rate linearly
...@@ -89,11 +81,14 @@ class LinearWarmup(object): ...@@ -89,11 +81,14 @@ class LinearWarmup(object):
value.append(lr) value.append(lr)
if i > 0: if i > 0:
boundary.append(i) boundary.append(i)
boundary.append(self.steps)
value.append(base_lr)
return boundary, value return boundary, value
@serializable
@register @register
class LearningRate(object): class BaseLR(object):
""" """
Learning Rate configuration Learning Rate configuration
...@@ -101,26 +96,24 @@ class LearningRate(object): ...@@ -101,26 +96,24 @@ class LearningRate(object):
base_lr (float): base learning rate base_lr (float): base learning rate
schedulers (list): learning rate schedulers schedulers (list): learning rate schedulers
""" """
__category__ = 'optim' __inject__ = ['decay', 'warmup']
def __init__(self, def __init__(self, base_lr=0.01, decay=None, warmup=None):
base_lr=0.01, super(BaseLR, self).__init__()
schedulers=[PiecewiseDecay(), LinearWarmup()]):
super(LearningRate, self).__init__()
self.base_lr = base_lr self.base_lr = base_lr
self.schedulers = schedulers self.decay = decay
self.warmup = warmup
def __call__(self): def __call__(self, step_per_epoch):
# TODO: split warmup & decay
# warmup # warmup
boundary, value = self.schedulers[1](self.base_lr) boundary, value = self.warmup(self.base_lr)
# decay # decay
decay_lr = self.schedulers[0](self.base_lr, boundary, value) decay_lr = self.decay(self.base_lr, boundary, value, step_per_epoch)
return decay_lr return decay_lr
@register @register
class OptimizerBuilder(): class Optimize():
""" """
Build optimizer handles Build optimizer handles
...@@ -129,35 +122,40 @@ class OptimizerBuilder(): ...@@ -129,35 +122,40 @@ class OptimizerBuilder():
optimizer (object): an `Optimizer` instance optimizer (object): an `Optimizer` instance
""" """
__category__ = 'optim' __category__ = 'optim'
__inject__ = ['learning_rate']
def __init__(self, def __init__(self,
clip_grad_by_norm=None, learning_rate,
regularizer={'type': 'L2', optimizer={'name': 'Momentum',
'factor': .0001}, 'momentum': 0.9},
optimizer={'type': 'Momentum', regularizer={'name': 'L2',
'momentum': .9}): 'factor': 0.0001},
self.clip_grad_by_norm = clip_grad_by_norm clip_grad_by_norm=None):
self.regularizer = regularizer self.learning_rate = learning_rate
self.optimizer = optimizer self.optimizer = optimizer
self.regularizer = regularizer
self.clip_grad_by_norm = clip_grad_by_norm
def __call__(self, learning_rate, params=None): def __call__(self, params=None, step_per_epoch=1):
if self.clip_grad_by_norm is not None:
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(
clip_norm=self.clip_grad_by_norm))
if self.regularizer: if self.regularizer:
reg_type = self.regularizer['type'] + 'Decay' reg_type = self.regularizer['name'] + 'Decay'
reg_factor = self.regularizer['factor'] reg_factor = self.regularizer['factor']
regularization = getattr(regularizer, reg_type)(reg_factor) regularization = getattr(regularizer, reg_type)(reg_factor)
else: else:
regularization = None regularization = None
if self.clip_grad_by_norm is not None:
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(
clip_norm=self.clip_grad_by_norm))
optim_args = self.optimizer.copy() optim_args = self.optimizer.copy()
optim_type = optim_args['type'] optim_type = optim_args['name']
del optim_args['type'] del optim_args['name']
op = getattr(optimizer, optim_type) op = getattr(optimizer, optim_type)
return op(learning_rate=learning_rate,
return op(learning_rate=self.learning_rate(step_per_epoch),
parameter_list=params, parameter_list=params,
regularization=regularization, regularization=regularization,
**optim_args) **optim_args)
...@@ -99,7 +99,7 @@ def clip_bbox(boxes, im_shape): ...@@ -99,7 +99,7 @@ def clip_bbox(boxes, im_shape):
@jit @jit
def bbox_overlaps(bboxes1, bboxes2): def compute_iou(bboxes1, bboxes2):
w1 = np.maximum(bboxes1[:, 2] - bboxes1[:, 0] + 1, 0) w1 = np.maximum(bboxes1[:, 2] - bboxes1[:, 0] + 1, 0)
h1 = np.maximum(bboxes1[:, 3] - bboxes1[:, 1] + 1, 0) h1 = np.maximum(bboxes1[:, 3] - bboxes1[:, 1] + 1, 0)
w2 = np.maximum(bboxes2[:, 2] - bboxes2[:, 0] + 1, 0) w2 = np.maximum(bboxes2[:, 2] - bboxes2[:, 0] + 1, 0)
......
...@@ -122,7 +122,7 @@ def polys_to_boxes(polys): ...@@ -122,7 +122,7 @@ def polys_to_boxes(polys):
@jit @jit
def bbox_overlaps_mask(boxes, query_boxes): def compute_iou_mask(boxes, query_boxes):
N = boxes.shape[0] N = boxes.shape[0]
K = query_boxes.shape[0] K = query_boxes.shape[0]
overlaps = np.zeros((N, K), dtype=boxes.dtype) overlaps = np.zeros((N, K), dtype=boxes.dtype)
......
...@@ -89,7 +89,7 @@ def generate_rpn_anchor_target(anchors, ...@@ -89,7 +89,7 @@ def generate_rpn_anchor_target(anchors,
@jit @jit
def label_anchor(anchors, gt_boxes): def label_anchor(anchors, gt_boxes):
iou = bbox_overlaps(anchors, gt_boxes) iou = compute_iou(anchors, gt_boxes)
# every gt's anchor's index # every gt's anchor's index
gt_bbox_anchor_inds = iou.argmax(axis=0) gt_bbox_anchor_inds = iou.argmax(axis=0)
...@@ -249,7 +249,7 @@ def label_bbox(boxes, ...@@ -249,7 +249,7 @@ def label_bbox(boxes,
class_nums=81, class_nums=81,
is_cascade_rcnn=False): is_cascade_rcnn=False):
iou = bbox_overlaps(boxes, gt_boxes) iou = compute_iou(boxes, gt_boxes)
# every roi's gt box's index # every roi's gt box's index
roi_gt_bbox_inds = np.zeros((boxes.shape[0]), dtype=np.int32) roi_gt_bbox_inds = np.zeros((boxes.shape[0]), dtype=np.int32)
...@@ -384,7 +384,7 @@ def sample_mask(boxes, gt_polys, label_int32, gt_classes, is_crowd, num_classes, ...@@ -384,7 +384,7 @@ def sample_mask(boxes, gt_polys, label_int32, gt_classes, is_crowd, num_classes,
masks_fg = np.zeros((fg_inds.shape[0], resolution**2), dtype=np.int32) masks_fg = np.zeros((fg_inds.shape[0], resolution**2), dtype=np.int32)
bbox_fg = boxes[fg_inds] bbox_fg = boxes[fg_inds]
iou = bbox_overlaps_mask(bbox_fg, boxes_from_polys) iou = compute_iou_mask(bbox_fg, boxes_from_polys)
fg_polys_inds = np.argmax(iou, axis=1) fg_polys_inds = np.argmax(iou, axis=1)
for i in range(bbox_fg.shape[0]): for i in range(bbox_fg.shape[0]):
......
...@@ -18,7 +18,6 @@ from ppdet.core.workspace import load_config, merge_config, create ...@@ -18,7 +18,6 @@ from ppdet.core.workspace import load_config, merge_config, create
from ppdet.utils.check import check_gpu, check_version, check_config from ppdet.utils.check import check_gpu, check_version, check_config
from ppdet.utils.cli import ArgsParser from ppdet.utils.cli import ArgsParser
from ppdet.utils.eval_utils import coco_eval_results from ppdet.utils.eval_utils import coco_eval_results
from ppdet.data.reader import create_reader
from ppdet.utils.checkpoint import load_dygraph_ckpt, save_dygraph_ckpt from ppdet.utils.checkpoint import load_dygraph_ckpt, save_dygraph_ckpt
...@@ -40,7 +39,15 @@ def parse_args(): ...@@ -40,7 +39,15 @@ def parse_args():
return args return args
def run(FLAGS, cfg): def run(FLAGS, cfg, place):
if FLAGS.use_gpu:
devices_num = 1
else:
devices_num = int(os.environ.get('CPU_NUM', 1))
# Data
eval_loader, _ = create('EvalReader')(cfg['worker_num'], place)
# Model # Model
main_arch = cfg.architecture main_arch = cfg.architecture
...@@ -49,16 +56,9 @@ def run(FLAGS, cfg): ...@@ -49,16 +56,9 @@ def run(FLAGS, cfg):
# Init Model # Init Model
model = load_dygraph_ckpt(model, ckpt=cfg.weights) model = load_dygraph_ckpt(model, ckpt=cfg.weights)
# Data Reader
if FLAGS.use_gpu:
devices_num = 1
else:
devices_num = int(os.environ.get('CPU_NUM', 1))
eval_reader = create_reader(cfg.EvalReader, devices_num=devices_num)
# Run Eval # Run Eval
outs_res = [] outs_res = []
for iter_id, data in enumerate(eval_reader()): for iter_id, data in enumerate(eval_loader):
start_time = time.time() start_time = time.time()
# forward # forward
...@@ -82,7 +82,6 @@ def main(): ...@@ -82,7 +82,6 @@ def main():
cfg = load_config(FLAGS.config) cfg = load_config(FLAGS.config)
merge_config(FLAGS.opt) merge_config(FLAGS.opt)
check_config(cfg)
check_gpu(cfg.use_gpu) check_gpu(cfg.use_gpu)
check_version() check_version()
...@@ -90,7 +89,7 @@ def main(): ...@@ -90,7 +89,7 @@ def main():
.dev_id) if cfg.use_gpu else fluid.CPUPlace() .dev_id) if cfg.use_gpu else fluid.CPUPlace()
with fluid.dygraph.guard(place): with fluid.dygraph.guard(place):
run(FLAGS, cfg) run(FLAGS, cfg, place)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -17,7 +17,6 @@ import numpy as np ...@@ -17,7 +17,6 @@ import numpy as np
from collections import deque from collections import deque
import paddle.fluid as fluid import paddle.fluid as fluid
from ppdet.core.workspace import load_config, merge_config, create from ppdet.core.workspace import load_config, merge_config, create
from ppdet.data.reader import create_reader
from ppdet.utils.stats import TrainingStats from ppdet.utils.stats import TrainingStats
from ppdet.utils.check import check_gpu, check_version, check_config from ppdet.utils.check import check_gpu, check_version, check_config
from ppdet.utils.cli import ArgsParser from ppdet.utils.cli import ArgsParser
...@@ -86,7 +85,7 @@ def parse_args(): ...@@ -86,7 +85,7 @@ def parse_args():
return args return args
def run(FLAGS, cfg): def run(FLAGS, cfg, place):
env = os.environ env = os.environ
FLAGS.dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env FLAGS.dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
if FLAGS.dist: if FLAGS.dist:
...@@ -99,13 +98,16 @@ def run(FLAGS, cfg): ...@@ -99,13 +98,16 @@ def run(FLAGS, cfg):
random.seed(0) random.seed(0)
np.random.seed(0) np.random.seed(0)
# Data
train_loader, step_per_epoch = create('TrainReader')(
cfg['worker_num'], place, use_prefetch=cfg['use_prefetch'])
# Model # Model
main_arch = cfg.architecture main_arch = cfg.architecture
model = create(cfg.architecture) model = create(cfg.architecture)
# Optimizer # Optimizer
lr = create('LearningRate')() optimizer = create('Optimize')(model.parameters(), step_per_epoch)
optimizer = create('OptimizerBuilder')(lr, model.parameters())
# Init Model & Optimzer # Init Model & Optimzer
model = load_dygraph_ckpt( model = load_dygraph_ckpt(
...@@ -120,65 +122,56 @@ def run(FLAGS, cfg): ...@@ -120,65 +122,56 @@ def run(FLAGS, cfg):
strategy = fluid.dygraph.parallel.prepare_context() strategy = fluid.dygraph.parallel.prepare_context()
model = fluid.dygraph.parallel.DataParallel(model, strategy) model = fluid.dygraph.parallel.DataParallel(model, strategy)
# Data Reader # Run Train
start_iter = 0 start_iter = 0
if cfg.use_gpu:
devices_num = fluid.core.get_cuda_device_count()
else:
devices_num = int(os.environ.get('CPU_NUM', 1))
train_reader = create_reader(
cfg.TrainReader, (cfg.max_iters - start_iter), cfg, devices_num=1)
time_stat = deque(maxlen=cfg.log_smooth_window) time_stat = deque(maxlen=cfg.log_smooth_window)
start_time = time.time() start_time = time.time()
end_time = time.time() end_time = time.time()
# Run Train for e_id in range(int(cfg.epoch)):
for iter_id, data in enumerate(train_reader()): for iter_id, data in enumerate(train_loader):
start_time = end_time
start_time = end_time end_time = time.time()
end_time = time.time() time_stat.append(end_time - start_time)
time_stat.append(end_time - start_time) time_cost = np.mean(time_stat)
time_cost = np.mean(time_stat) eta_sec = (cfg.epoch * step_per_epoch - iter_id) * time_cost
eta_sec = (cfg.max_iters - iter_id) * time_cost eta = str(datetime.timedelta(seconds=int(eta_sec)))
eta = str(datetime.timedelta(seconds=int(eta_sec)))
# Model Forward
# Model Forward model.train()
model.train() outputs = model(data, cfg['TrainReader']['inputs_def']['fields'],
outputs = model(data, cfg['TrainReader']['inputs_def']['fields'], 'train')
'train')
# Model Backward
# Model Backward loss = outputs['loss']
loss = outputs['loss'] if ParallelEnv().nranks > 1:
if ParallelEnv().nranks > 1: loss = model.scale_loss(loss)
loss = model.scale_loss(loss) loss.backward()
loss.backward() model.apply_collective_grads()
model.apply_collective_grads() else:
else: loss.backward()
loss.backward() optimizer.minimize(loss)
optimizer.minimize(loss) model.clear_gradients()
model.clear_gradients() curr_lr = optimizer.current_step_lr()
curr_lr = optimizer.current_step_lr()
if ParallelEnv().nranks < 2 or ParallelEnv().local_rank == 0:
if ParallelEnv().nranks < 2 or ParallelEnv().local_rank == 0: # Log state
# Log state if iter_id == 0:
if iter_id == 0: train_stats = TrainingStats(cfg.log_smooth_window,
train_stats = TrainingStats(cfg.log_smooth_window, outputs.keys())
outputs.keys()) train_stats.update(outputs)
train_stats.update(outputs) logs = train_stats.log()
logs = train_stats.log() if iter_id % cfg.log_iter == 0:
if iter_id % cfg.log_iter == 0: strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format(
strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format( iter_id, curr_lr, logs, time_cost, eta)
iter_id, curr_lr, logs, time_cost, eta) logger.info(strs)
logger.info(strs)
# Save Stage # Save Stage
if iter_id > 0 and iter_id % int( if fluid.dygraph.parallel.Env().local_rank == 0:
cfg.snapshot_iter) == 0 or iter_id == cfg.max_iters - 1: cfg_name = os.path.basename(FLAGS.config).split('.')[0]
cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_name = str(e_id + 1) if e_id + 1 != int(
save_name = str( cfg.epoch) else "model_final"
iter_id) if iter_id != cfg.max_iters - 1 else "model_final" save_dir = os.path.join(cfg.save_dir, cfg_name, save_name)
save_dir = os.path.join(cfg.save_dir, cfg_name, save_name) save_dygraph_ckpt(model, optimizer, save_dir)
save_dygraph_ckpt(model, optimizer, save_dir)
def main(): def main():
...@@ -186,7 +179,6 @@ def main(): ...@@ -186,7 +179,6 @@ def main():
cfg = load_config(FLAGS.config) cfg = load_config(FLAGS.config)
merge_config(FLAGS.opt) merge_config(FLAGS.opt)
check_config(cfg)
check_gpu(cfg.use_gpu) check_gpu(cfg.use_gpu)
check_version() check_version()
...@@ -194,7 +186,7 @@ def main(): ...@@ -194,7 +186,7 @@ def main():
if cfg.use_gpu else fluid.CPUPlace() if cfg.use_gpu else fluid.CPUPlace()
with fluid.dygraph.guard(place): with fluid.dygraph.guard(place):
run(FLAGS, cfg) run(FLAGS, cfg, place)
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册