未验证 提交 e527466d 编写于 作者: W wangxinxin08 提交者: GitHub

unify data transform (#2227)

上级 5b6bebf2
worker_num: 2
TrainReader:
sample_transforms:
- DecodeOp: {}
- RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlipOp: {prob: 0.5}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: true}
- PadBatch: {pad_to_stride: 32, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
......@@ -15,12 +15,12 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
......@@ -29,12 +29,12 @@ EvalReader:
TestReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
worker_num: 2
TrainReader:
sample_transforms:
- DecodeOp: {}
- RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlipOp: {prob: 0.5}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: true}
- PadBatch: {pad_to_stride: 32, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
......@@ -15,12 +15,12 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
......@@ -29,12 +29,12 @@ EvalReader:
TestReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
worker_num: 2
TrainReader:
sample_transforms:
- DecodeOp: {}
- RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlipOp: {prob: 0.5}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: true}
- PadBatch: {pad_to_stride: 32, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
......@@ -15,12 +15,12 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
......@@ -29,12 +29,12 @@ EvalReader:
TestReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
worker_num: 2
TrainReader:
sample_transforms:
- DecodeOp: {}
- RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlipOp: {prob: 0.5}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: -1., pad_gt: true}
- PadBatch: {pad_to_stride: -1., pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
......@@ -15,12 +15,12 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: -1., pad_gt: false}
- PadBatch: {pad_to_stride: -1., pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
......@@ -29,12 +29,12 @@ EvalReader:
TestReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: -1., pad_gt: false}
- PadBatch: {pad_to_stride: -1., pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
worker_num: 2
TrainReader:
sample_transforms:
- DecodeOp: {}
- RandomFlipOp: {prob: 0.5}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true}
- PermuteOp: {}
- Decode: {}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Resize: {target_size: [800, 1333], keep_ratio: true, interp: 1}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 128}
- PadBatch: {pad_to_stride: 128}
- Gt2FCOSTarget:
object_sizes_boundary: [64, 128, 256, 512]
center_sampling_radius: 1.5
......@@ -20,23 +20,23 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {}
- Decode: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 128}
- PadBatch: {pad_to_stride: 128}
batch_size: 1
shuffle: false
TestReader:
sample_transforms:
- DecodeOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {}
- Decode: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 128}
- PadBatch: {pad_to_stride: 128}
batch_size: 1
shuffle: false
worker_num: 2
TrainReader:
sample_transforms:
- DecodeOp: {}
- RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlipOp: {prob: 0.5}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: true}
- PadBatch: {pad_to_stride: 32, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
......@@ -28,12 +28,12 @@ EvalReader:
TestReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
- PadBatch: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
worker_num: 2
TrainReader:
sample_transforms:
- DecodeOp: {}
- RandomResizeOp: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlipOp: {prob: 0.5}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: -1., pad_gt: true}
- PadBatch: {pad_to_stride: -1., pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
......@@ -15,12 +15,12 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: -1., pad_gt: false}
- PadBatch: {pad_to_stride: -1., pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
......@@ -29,12 +29,12 @@ EvalReader:
TestReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: -1., pad_gt: false}
- PadBatch: {pad_to_stride: -1., pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
......
......@@ -3,20 +3,20 @@ TrainReader:
inputs_def:
num_max_boxes: 50
sample_transforms:
- DecodeOp: {}
- MixupOp: {alpha: 1.5, beta: 1.5}
- RandomDistortOp: {}
- RandomExpandOp: {fill_value: [123.675, 116.28, 103.53]}
- RandomCropOp: {}
- RandomFlipOp: {}
- Decode: {}
- Mixup: {alpha: 1.5, beta: 1.5}
- RandomDistort: {}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {}
- RandomFlip: {}
batch_transforms:
- BatchRandomResizeOp: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
- NormalizeBoxOp: {}
- PadBoxOp: {num_max_boxes: 50}
- BboxXYXY2XYWHOp: {}
- NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- PermuteOp: {}
- Gt2YoloTargetOp: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
- BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 50}
- BboxXYXY2XYWH: {}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
- Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
batch_size: 24
shuffle: true
drop_last: true
......@@ -25,10 +25,10 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {target_size: [608, 608], keep_ratio: False, interp: 2}
- NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- PermuteOp: {}
- Decode: {}
- Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
batch_size: 8
drop_empty: false
......@@ -36,8 +36,8 @@ TestReader:
inputs_def:
image_shape: [3, 608, 608]
sample_transforms:
- DecodeOp: {}
- ResizeOp: {target_size: [608, 608], keep_ratio: False, interp: 2}
- NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- PermuteOp: {}
- Decode: {}
- Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
batch_size: 1
worker_num: 2
TrainReader:
sample_transforms:
- DecodeOp: {}
- Decode: {}
- Poly2Mask: {}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- RandomFlipOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- PermuteOp: {}
- Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- RandomFlip: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32}
- Gt2Solov2TargetOp: {num_grids: [40, 36, 24, 16, 12],
- PadBatch: {pad_to_stride: 32}
- Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
coord_sigma: 0.2}
batch_size: 2
......@@ -19,12 +19,12 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {}
- Decode: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32}
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false
......@@ -33,12 +33,12 @@ EvalReader:
TestReader:
sample_transforms:
- DecodeOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- PermuteOp: {}
- Decode: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- Permute: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32}
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false
......@@ -3,17 +3,17 @@ TrainReader:
inputs_def:
num_max_boxes: 90
sample_transforms:
- DecodeOp: {}
- RandomDistortOp: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpandOp: {fill_value: [127.5, 127.5, 127.5]}
- RandomCropOp: {allow_no_crop: Fasle}
- RandomFlipOp: {}
- ResizeOp: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeBoxOp: {}
- PadBoxOp: {num_max_boxes: 90}
- Decode: {}
- RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpand: {fill_value: [127.5, 127.5, 127.5]}
- RandomCrop: {allow_no_crop: Fasle}
- RandomFlip: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 90}
batch_transforms:
- NormalizeImageOp: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- PermuteOp: {}
- NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- Permute: {}
batch_size: 32
shuffle: true
drop_last: true
......@@ -21,10 +21,10 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImageOp: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- PermuteOp: {}
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- Permute: {}
batch_size: 1
drop_empty: false
......@@ -33,8 +33,8 @@ TestReader:
inputs_def:
image_shape: [3, 300, 300]
sample_transforms:
- DecodeOp: {}
- ResizeOp: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImageOp: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- PermuteOp: {}
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- Permute: {}
batch_size: 1
......@@ -4,18 +4,18 @@ TrainReader:
num_max_boxes: 90
sample_transforms:
- DecodeOp: {}
- RandomDistortOp: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpandOp: {fill_value: [104., 117., 123.]}
- RandomCropOp: {allow_no_crop: true}
- RandomFlipOp: {}
- ResizeOp: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeBoxOp: {}
- PadBoxOp: {num_max_boxes: 90}
- Decode: {}
- RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpand: {fill_value: [104., 117., 123.]}
- RandomCrop: {allow_no_crop: true}
- RandomFlip: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 90}
batch_transforms:
- NormalizeImageOp: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
- PermuteOp: {}
- NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
- Permute: {}
batch_size: 8
shuffle: true
......@@ -24,10 +24,10 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImageOp: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
- PermuteOp: {}
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
- Permute: {}
batch_size: 1
drop_empty: false
......@@ -35,8 +35,8 @@ TestReader:
inputs_def:
image_shape: [3, 300, 300]
sample_transforms:
- DecodeOp: {}
- ResizeOp: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImageOp: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
- PermuteOp: {}
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
- Permute: {}
batch_size: 1
......@@ -3,17 +3,17 @@ TrainReader:
inputs_def:
num_max_boxes: 90
sample_transforms:
- DecodeOp: {}
- RandomDistortOp: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpandOp: {fill_value: [123.675, 116.28, 103.53]}
- RandomCropOp: {allow_no_crop: Fasle}
- RandomFlipOp: {}
- ResizeOp: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeBoxOp: {}
- PadBoxOp: {num_max_boxes: 90}
- Decode: {}
- RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {allow_no_crop: Fasle}
- RandomFlip: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 90}
batch_transforms:
- NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- PermuteOp: {}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 64
shuffle: true
drop_last: true
......@@ -21,10 +21,10 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- PermuteOp: {}
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 1
drop_empty: false
......@@ -33,8 +33,8 @@ TestReader:
inputs_def:
image_shape: [3, 300, 300]
sample_transforms:
- DecodeOp: {}
- ResizeOp: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- PermuteOp: {}
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 1
......@@ -3,17 +3,17 @@ TrainReader:
inputs_def:
num_max_boxes: 90
sample_transforms:
- DecodeOp: {}
- RandomDistortOp: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpandOp: {fill_value: [123.675, 116.28, 103.53]}
- RandomCropOp: {allow_no_crop: Fasle}
- RandomFlipOp: {}
- ResizeOp: {target_size: [320, 320], keep_ratio: False, interp: 1}
- NormalizeBoxOp: {}
- PadBoxOp: {num_max_boxes: 90}
- Decode: {}
- RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {allow_no_crop: Fasle}
- RandomFlip: {}
- Resize: {target_size: [320, 320], keep_ratio: False, interp: 1}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 90}
batch_transforms:
- NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- PermuteOp: {}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 64
shuffle: true
drop_last: true
......@@ -21,10 +21,10 @@ TrainReader:
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {target_size: [320, 320], keep_ratio: False, interp: 1}
- NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- PermuteOp: {}
- Decode: {}
- Resize: {target_size: [320, 320], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 1
drop_empty: false
......@@ -33,8 +33,8 @@ TestReader:
inputs_def:
image_shape: [3, 320, 320]
sample_transforms:
- DecodeOp: {}
- ResizeOp: {target_size: [320, 320], keep_ratio: False, interp: 1}
- NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- PermuteOp: {}
- Decode: {}
- Resize: {target_size: [320, 320], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 1
worker_num: 2
TrainReader:
sample_transforms:
- DecodeOp: {}
- RandomFlipOp: {prob: 0.5}
- ResizeOp: {interp: 1, target_size: [512, 512], keep_ratio: False}
- NormalizeImageOp: {mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375], is_scale: false}
- PermuteOp: {}
- Decode: {}
- RandomFlip: {prob: 0.5}
- Resize: {interp: 1, target_size: [512, 512], keep_ratio: False}
- NormalizeImage: {mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375], is_scale: false}
- Permute: {}
batch_transforms:
- Gt2TTFTargetOp: {down_ratio: 4}
- PadBatchOp: {pad_to_stride: 32, pad_gt: true}
- Gt2TTFTarget: {down_ratio: 4}
- PadBatch: {pad_to_stride: 32, pad_gt: true}
batch_size: 12
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 1, target_size: [512, 512], keep_ratio: False}
- NormalizeImageOp: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 1, target_size: [512, 512], keep_ratio: False}
- NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
- Permute: {}
batch_size: 1
drop_last: false
drop_empty: false
TestReader:
sample_transforms:
- DecodeOp: {}
- ResizeOp: {interp: 1, target_size: [512, 512], keep_ratio: False}
- NormalizeImageOp: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
- PermuteOp: {}
- Decode: {}
- Resize: {interp: 1, target_size: [512, 512], keep_ratio: False}
- NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
- Permute: {}
batch_size: 1
drop_last: false
drop_empty: false
......@@ -3,20 +3,20 @@ TrainReader:
inputs_def:
num_max_boxes: 50
sample_transforms:
- DecodeOp: {}
- MixupOp: {alpha: 1.5, beta: 1.5}
- RandomDistortOp: {}
- RandomExpandOp: {fill_value: [123.675, 116.28, 103.53]}
- RandomCropOp: {}
- RandomFlipOp: {}
- Decode: {}
- Mixup: {alpha: 1.5, beta: 1.5}
- RandomDistort: {}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {}
- RandomFlip: {}
batch_transforms:
- BatchRandomResizeOp: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
- NormalizeBoxOp: {}
- PadBoxOp: {num_max_boxes: 50}
- BboxXYXY2XYWHOp: {}
- NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- PermuteOp: {}
- Gt2YoloTargetOp: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
- BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 50}
- BboxXYXY2XYWH: {}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
- Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
batch_size: 8
shuffle: true
drop_last: true
......@@ -27,10 +27,10 @@ EvalReader:
inputs_def:
num_max_boxes: 50
sample_transforms:
- DecodeOp: {}
- ResizeOp: {target_size: [608, 608], keep_ratio: False, interp: 2}
- NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- PermuteOp: {}
- Decode: {}
- Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
batch_size: 1
drop_empty: false
......@@ -38,8 +38,8 @@ TestReader:
inputs_def:
image_shape: [3, 608, 608]
sample_transforms:
- DecodeOp: {}
- ResizeOp: {target_size: [608, 608], keep_ratio: False, interp: 2}
- NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- PermuteOp: {}
- Decode: {}
- Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
batch_size: 1
......@@ -14,14 +14,9 @@
from . import operators
from . import batch_operators
from . import operator
from . import batch_operator
# TODO: operators and batch_operators will be replaced by operator and batch_operator
from .operators import *
from .operator import *
from .batch_operators import *
from .batch_operator import *
__all__ = []
__all__ += registered_ops
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
import cv2
import numpy as np
from .operator import register_op, BaseOperator, ResizeOp
from .op_helper import jaccard_overlap, gaussian2D
from scipy import ndimage
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = [
'PadBatchOp',
'BatchRandomResizeOp',
'Gt2YoloTargetOp',
'Gt2FCOSTargetOp',
'Gt2TTFTargetOp',
'Gt2Solov2TargetOp',
]
@register_op
class PadBatchOp(BaseOperator):
"""
Pad a batch of samples so they can be divisible by a stride.
The layout of each image should be 'CHW'.
Args:
pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
height and width is divisible by `pad_to_stride`.
"""
def __init__(self, pad_to_stride=0, pad_gt=False):
super(PadBatchOp, self).__init__()
self.pad_to_stride = pad_to_stride
self.pad_gt = pad_gt
def __call__(self, samples, context=None):
"""
Args:
samples (list): a batch of sample, each is dict.
"""
coarsest_stride = self.pad_to_stride
max_shape = np.array([data['image'].shape for data in samples]).max(
axis=0)
if coarsest_stride > 0:
max_shape[1] = int(
np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
max_shape[2] = int(
np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
padding_batch = []
for data in samples:
im = data['image']
im_c, im_h, im_w = im.shape[:]
padding_im = np.zeros(
(im_c, max_shape[1], max_shape[2]), dtype=np.float32)
padding_im[:, :im_h, :im_w] = im
data['image'] = padding_im
if 'semantic' in data and data['semantic'] is not None:
semantic = data['semantic']
padding_sem = np.zeros(
(1, max_shape[1], max_shape[2]), dtype=np.float32)
padding_sem[:, :im_h, :im_w] = semantic
data['semantic'] = padding_sem
if 'gt_segm' in data and data['gt_segm'] is not None:
gt_segm = data['gt_segm']
padding_segm = np.zeros(
(gt_segm.shape[0], max_shape[1], max_shape[2]),
dtype=np.uint8)
padding_segm[:, :im_h, :im_w] = gt_segm
data['gt_segm'] = padding_segm
if self.pad_gt:
gt_num = []
if 'gt_poly' in data and data['gt_poly'] is not None and len(data[
'gt_poly']) > 0:
pad_mask = True
else:
pad_mask = False
if pad_mask:
poly_num = []
poly_part_num = []
point_num = []
for data in samples:
gt_num.append(data['gt_bbox'].shape[0])
if pad_mask:
poly_num.append(len(data['gt_poly']))
for poly in data['gt_poly']:
poly_part_num.append(int(len(poly)))
for p_p in poly:
point_num.append(int(len(p_p) / 2))
gt_num_max = max(gt_num)
for i, data in enumerate(samples):
gt_box_data = -np.ones([gt_num_max, 4], dtype=np.float32)
gt_class_data = -np.ones([gt_num_max], dtype=np.int32)
is_crowd_data = np.ones([gt_num_max], dtype=np.int32)
if pad_mask:
poly_num_max = max(poly_num)
poly_part_num_max = max(poly_part_num)
point_num_max = max(point_num)
gt_masks_data = -np.ones(
[poly_num_max, poly_part_num_max, point_num_max, 2],
dtype=np.float32)
gt_num = data['gt_bbox'].shape[0]
gt_box_data[0:gt_num, :] = data['gt_bbox']
gt_class_data[0:gt_num] = np.squeeze(data['gt_class'])
is_crowd_data[0:gt_num] = np.squeeze(data['is_crowd'])
if pad_mask:
for j, poly in enumerate(data['gt_poly']):
for k, p_p in enumerate(poly):
pp_np = np.array(p_p).reshape(-1, 2)
gt_masks_data[j, k, :pp_np.shape[0], :] = pp_np
data['gt_poly'] = gt_masks_data
data['gt_bbox'] = gt_box_data
data['gt_class'] = gt_class_data
data['is_crowd'] = is_crowd_data
return samples
@register_op
class BatchRandomResizeOp(BaseOperator):
"""
Resize image to target size randomly. random target_size and interpolation method
Args:
target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
keep_ratio (bool): whether keep_raio or not, default true
interp (int): the interpolation method
random_size (bool): whether random select target size of image
random_interp (bool): whether random select interpolation method
"""
def __init__(self,
target_size,
keep_ratio,
interp=cv2.INTER_NEAREST,
random_size=True,
random_interp=False):
super(BatchRandomResizeOp, self).__init__()
self.keep_ratio = keep_ratio
self.interps = [
cv2.INTER_NEAREST,
cv2.INTER_LINEAR,
cv2.INTER_AREA,
cv2.INTER_CUBIC,
cv2.INTER_LANCZOS4,
]
self.interp = interp
assert isinstance(target_size, (
int, Sequence)), "target_size must be int, list or tuple"
if random_size and not isinstance(target_size, list):
raise TypeError(
"Type of target_size is invalid when random_size is True. Must be List, now is {}".
format(type(target_size)))
self.target_size = target_size
self.random_size = random_size
self.random_interp = random_interp
def __call__(self, samples, context=None):
if self.random_size:
target_size = np.random.choice(self.target_size)
else:
target_size = self.target_size
if self.random_interp:
interp = np.random.choice(self.interps)
else:
interp = self.interp
resizer = ResizeOp(
target_size, keep_ratio=self.keep_ratio, interp=interp)
return resizer(samples, context=context)
@register_op
class Gt2YoloTargetOp(BaseOperator):
"""
Generate YOLOv3 targets by groud truth data, this operator is only used in
fine grained YOLOv3 loss mode
"""
def __init__(self,
anchors,
anchor_masks,
downsample_ratios,
num_classes=80,
iou_thresh=1.):
super(Gt2YoloTargetOp, self).__init__()
self.anchors = anchors
self.anchor_masks = anchor_masks
self.downsample_ratios = downsample_ratios
self.num_classes = num_classes
self.iou_thresh = iou_thresh
def __call__(self, samples, context=None):
assert len(self.anchor_masks) == len(self.downsample_ratios), \
"anchor_masks', and 'downsample_ratios' should have same length."
h, w = samples[0]['image'].shape[1:3]
an_hw = np.array(self.anchors) / np.array([[w, h]])
for sample in samples:
# im, gt_bbox, gt_class, gt_score = sample
im = sample['image']
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
if 'gt_score' not in sample:
sample['gt_score'] = np.ones(
(gt_bbox.shape[0], 1), dtype=np.float32)
gt_score = sample['gt_score']
for i, (
mask, downsample_ratio
) in enumerate(zip(self.anchor_masks, self.downsample_ratios)):
grid_h = int(h / downsample_ratio)
grid_w = int(w / downsample_ratio)
target = np.zeros(
(len(mask), 6 + self.num_classes, grid_h, grid_w),
dtype=np.float32)
for b in range(gt_bbox.shape[0]):
gx, gy, gw, gh = gt_bbox[b, :]
cls = gt_class[b]
score = gt_score[b]
if gw <= 0. or gh <= 0. or score <= 0.:
continue
# find best match anchor index
best_iou = 0.
best_idx = -1
for an_idx in range(an_hw.shape[0]):
iou = jaccard_overlap(
[0., 0., gw, gh],
[0., 0., an_hw[an_idx, 0], an_hw[an_idx, 1]])
if iou > best_iou:
best_iou = iou
best_idx = an_idx
gi = int(gx * grid_w)
gj = int(gy * grid_h)
# gtbox should be regresed in this layes if best match
# anchor index in anchor mask of this layer
if best_idx in mask:
best_n = mask.index(best_idx)
# x, y, w, h, scale
target[best_n, 0, gj, gi] = gx * grid_w - gi
target[best_n, 1, gj, gi] = gy * grid_h - gj
target[best_n, 2, gj, gi] = np.log(
gw * w / self.anchors[best_idx][0])
target[best_n, 3, gj, gi] = np.log(
gh * h / self.anchors[best_idx][1])
target[best_n, 4, gj, gi] = 2.0 - gw * gh
# objectness record gt_score
target[best_n, 5, gj, gi] = score
# classification
target[best_n, 6 + cls, gj, gi] = 1.
# For non-matched anchors, calculate the target if the iou
# between anchor and gt is larger than iou_thresh
if self.iou_thresh < 1:
for idx, mask_i in enumerate(mask):
if mask_i == best_idx: continue
iou = jaccard_overlap(
[0., 0., gw, gh],
[0., 0., an_hw[mask_i, 0], an_hw[mask_i, 1]])
if iou > self.iou_thresh and target[idx, 5, gj,
gi] == 0.:
# x, y, w, h, scale
target[idx, 0, gj, gi] = gx * grid_w - gi
target[idx, 1, gj, gi] = gy * grid_h - gj
target[idx, 2, gj, gi] = np.log(
gw * w / self.anchors[mask_i][0])
target[idx, 3, gj, gi] = np.log(
gh * h / self.anchors[mask_i][1])
target[idx, 4, gj, gi] = 2.0 - gw * gh
# objectness record gt_score
target[idx, 5, gj, gi] = score
# classification
target[idx, 6 + cls, gj, gi] = 1.
sample['target{}'.format(i)] = target
# remove useless gt_class and gt_score after target calculated
sample.pop('gt_class')
sample.pop('gt_score')
return samples
@register_op
class Gt2FCOSTargetOp(BaseOperator):
"""
Generate FCOS targets by groud truth data
"""
def __init__(self,
object_sizes_boundary,
center_sampling_radius,
downsample_ratios,
norm_reg_targets=False):
super(Gt2FCOSTargetOp, self).__init__()
self.center_sampling_radius = center_sampling_radius
self.downsample_ratios = downsample_ratios
self.INF = np.inf
self.object_sizes_boundary = [-1] + object_sizes_boundary + [self.INF]
object_sizes_of_interest = []
for i in range(len(self.object_sizes_boundary) - 1):
object_sizes_of_interest.append([
self.object_sizes_boundary[i], self.object_sizes_boundary[i + 1]
])
self.object_sizes_of_interest = object_sizes_of_interest
self.norm_reg_targets = norm_reg_targets
def _compute_points(self, w, h):
"""
compute the corresponding points in each feature map
:param h: image height
:param w: image width
:return: points from all feature map
"""
locations = []
for stride in self.downsample_ratios:
shift_x = np.arange(0, w, stride).astype(np.float32)
shift_y = np.arange(0, h, stride).astype(np.float32)
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shift_x = shift_x.flatten()
shift_y = shift_y.flatten()
location = np.stack([shift_x, shift_y], axis=1) + stride // 2
locations.append(location)
num_points_each_level = [len(location) for location in locations]
locations = np.concatenate(locations, axis=0)
return locations, num_points_each_level
def _convert_xywh2xyxy(self, gt_bbox, w, h):
"""
convert the bounding box from style xywh to xyxy
:param gt_bbox: bounding boxes normalized into [0, 1]
:param w: image width
:param h: image height
:return: bounding boxes in xyxy style
"""
bboxes = gt_bbox.copy()
bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * w
bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * h
bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
return bboxes
def _check_inside_boxes_limited(self, gt_bbox, xs, ys,
num_points_each_level):
"""
check if points is within the clipped boxes
:param gt_bbox: bounding boxes
:param xs: horizontal coordinate of points
:param ys: vertical coordinate of points
:return: the mask of points is within gt_box or not
"""
bboxes = np.reshape(
gt_bbox, newshape=[1, gt_bbox.shape[0], gt_bbox.shape[1]])
bboxes = np.tile(bboxes, reps=[xs.shape[0], 1, 1])
ct_x = (bboxes[:, :, 0] + bboxes[:, :, 2]) / 2
ct_y = (bboxes[:, :, 1] + bboxes[:, :, 3]) / 2
beg = 0
clipped_box = bboxes.copy()
for lvl, stride in enumerate(self.downsample_ratios):
end = beg + num_points_each_level[lvl]
stride_exp = self.center_sampling_radius * stride
clipped_box[beg:end, :, 0] = np.maximum(
bboxes[beg:end, :, 0], ct_x[beg:end, :] - stride_exp)
clipped_box[beg:end, :, 1] = np.maximum(
bboxes[beg:end, :, 1], ct_y[beg:end, :] - stride_exp)
clipped_box[beg:end, :, 2] = np.minimum(
bboxes[beg:end, :, 2], ct_x[beg:end, :] + stride_exp)
clipped_box[beg:end, :, 3] = np.minimum(
bboxes[beg:end, :, 3], ct_y[beg:end, :] + stride_exp)
beg = end
l_res = xs - clipped_box[:, :, 0]
r_res = clipped_box[:, :, 2] - xs
t_res = ys - clipped_box[:, :, 1]
b_res = clipped_box[:, :, 3] - ys
clipped_box_reg_targets = np.stack([l_res, t_res, r_res, b_res], axis=2)
inside_gt_box = np.min(clipped_box_reg_targets, axis=2) > 0
return inside_gt_box
def __call__(self, samples, context=None):
assert len(self.object_sizes_of_interest) == len(self.downsample_ratios), \
"object_sizes_of_interest', and 'downsample_ratios' should have same length."
for sample in samples:
# im, gt_bbox, gt_class, gt_score = sample
im = sample['image']
bboxes = sample['gt_bbox']
gt_class = sample['gt_class']
# calculate the locations
h, w = im.shape[1:3]
points, num_points_each_level = self._compute_points(w, h)
object_scale_exp = []
for i, num_pts in enumerate(num_points_each_level):
object_scale_exp.append(
np.tile(
np.array([self.object_sizes_of_interest[i]]),
reps=[num_pts, 1]))
object_scale_exp = np.concatenate(object_scale_exp, axis=0)
gt_area = (bboxes[:, 2] - bboxes[:, 0]) * (
bboxes[:, 3] - bboxes[:, 1])
xs, ys = points[:, 0], points[:, 1]
xs = np.reshape(xs, newshape=[xs.shape[0], 1])
xs = np.tile(xs, reps=[1, bboxes.shape[0]])
ys = np.reshape(ys, newshape=[ys.shape[0], 1])
ys = np.tile(ys, reps=[1, bboxes.shape[0]])
l_res = xs - bboxes[:, 0]
r_res = bboxes[:, 2] - xs
t_res = ys - bboxes[:, 1]
b_res = bboxes[:, 3] - ys
reg_targets = np.stack([l_res, t_res, r_res, b_res], axis=2)
if self.center_sampling_radius > 0:
is_inside_box = self._check_inside_boxes_limited(
bboxes, xs, ys, num_points_each_level)
else:
is_inside_box = np.min(reg_targets, axis=2) > 0
# check if the targets is inside the corresponding level
max_reg_targets = np.max(reg_targets, axis=2)
lower_bound = np.tile(
np.expand_dims(
object_scale_exp[:, 0], axis=1),
reps=[1, max_reg_targets.shape[1]])
high_bound = np.tile(
np.expand_dims(
object_scale_exp[:, 1], axis=1),
reps=[1, max_reg_targets.shape[1]])
is_match_current_level = \
(max_reg_targets > lower_bound) & \
(max_reg_targets < high_bound)
points2gtarea = np.tile(
np.expand_dims(
gt_area, axis=0), reps=[xs.shape[0], 1])
points2gtarea[is_inside_box == 0] = self.INF
points2gtarea[is_match_current_level == 0] = self.INF
points2min_area = points2gtarea.min(axis=1)
points2min_area_ind = points2gtarea.argmin(axis=1)
labels = gt_class[points2min_area_ind] + 1
labels[points2min_area == self.INF] = 0
reg_targets = reg_targets[range(xs.shape[0]), points2min_area_ind]
ctn_targets = np.sqrt((reg_targets[:, [0, 2]].min(axis=1) / \
reg_targets[:, [0, 2]].max(axis=1)) * \
(reg_targets[:, [1, 3]].min(axis=1) / \
reg_targets[:, [1, 3]].max(axis=1))).astype(np.float32)
ctn_targets = np.reshape(
ctn_targets, newshape=[ctn_targets.shape[0], 1])
ctn_targets[labels <= 0] = 0
pos_ind = np.nonzero(labels != 0)
reg_targets_pos = reg_targets[pos_ind[0], :]
split_sections = []
beg = 0
for lvl in range(len(num_points_each_level)):
end = beg + num_points_each_level[lvl]
split_sections.append(end)
beg = end
labels_by_level = np.split(labels, split_sections, axis=0)
reg_targets_by_level = np.split(reg_targets, split_sections, axis=0)
ctn_targets_by_level = np.split(ctn_targets, split_sections, axis=0)
for lvl in range(len(self.downsample_ratios)):
grid_w = int(np.ceil(w / self.downsample_ratios[lvl]))
grid_h = int(np.ceil(h / self.downsample_ratios[lvl]))
if self.norm_reg_targets:
sample['reg_target{}'.format(lvl)] = \
np.reshape(
reg_targets_by_level[lvl] / \
self.downsample_ratios[lvl],
newshape=[grid_h, grid_w, 4])
else:
sample['reg_target{}'.format(lvl)] = np.reshape(
reg_targets_by_level[lvl],
newshape=[grid_h, grid_w, 4])
sample['labels{}'.format(lvl)] = np.reshape(
labels_by_level[lvl], newshape=[grid_h, grid_w, 1])
sample['centerness{}'.format(lvl)] = np.reshape(
ctn_targets_by_level[lvl], newshape=[grid_h, grid_w, 1])
sample.pop('is_crowd')
sample.pop('gt_class')
sample.pop('gt_bbox')
return samples
@register_op
class Gt2TTFTargetOp(BaseOperator):
__shared__ = ['num_classes']
"""
Gt2TTFTarget
Generate TTFNet targets by ground truth data
Args:
num_classes(int): the number of classes.
down_ratio(int): the down ratio from images to heatmap, 4 by default.
alpha(float): the alpha parameter to generate gaussian target.
0.54 by default.
"""
def __init__(self, num_classes=80, down_ratio=4, alpha=0.54):
super(Gt2TTFTargetOp, self).__init__()
self.down_ratio = down_ratio
self.num_classes = num_classes
self.alpha = alpha
def __call__(self, samples, context=None):
output_size = samples[0]['image'].shape[1]
feat_size = output_size // self.down_ratio
for sample in samples:
heatmap = np.zeros(
(self.num_classes, feat_size, feat_size), dtype='float32')
box_target = np.ones(
(4, feat_size, feat_size), dtype='float32') * -1
reg_weight = np.zeros((1, feat_size, feat_size), dtype='float32')
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
bbox_w = gt_bbox[:, 2] - gt_bbox[:, 0] + 1
bbox_h = gt_bbox[:, 3] - gt_bbox[:, 1] + 1
area = bbox_w * bbox_h
boxes_areas_log = np.log(area)
boxes_ind = np.argsort(boxes_areas_log, axis=0)[::-1]
boxes_area_topk_log = boxes_areas_log[boxes_ind]
gt_bbox = gt_bbox[boxes_ind]
gt_class = gt_class[boxes_ind]
feat_gt_bbox = gt_bbox / self.down_ratio
feat_gt_bbox = np.clip(feat_gt_bbox, 0, feat_size - 1)
feat_hs, feat_ws = (feat_gt_bbox[:, 3] - feat_gt_bbox[:, 1],
feat_gt_bbox[:, 2] - feat_gt_bbox[:, 0])
ct_inds = np.stack(
[(gt_bbox[:, 0] + gt_bbox[:, 2]) / 2,
(gt_bbox[:, 1] + gt_bbox[:, 3]) / 2],
axis=1) / self.down_ratio
h_radiuses_alpha = (feat_hs / 2. * self.alpha).astype('int32')
w_radiuses_alpha = (feat_ws / 2. * self.alpha).astype('int32')
for k in range(len(gt_bbox)):
cls_id = gt_class[k]
fake_heatmap = np.zeros((feat_size, feat_size), dtype='float32')
self.draw_truncate_gaussian(fake_heatmap, ct_inds[k],
h_radiuses_alpha[k],
w_radiuses_alpha[k])
heatmap[cls_id] = np.maximum(heatmap[cls_id], fake_heatmap)
box_target_inds = fake_heatmap > 0
box_target[:, box_target_inds] = gt_bbox[k][:, None]
local_heatmap = fake_heatmap[box_target_inds]
ct_div = np.sum(local_heatmap)
local_heatmap *= boxes_area_topk_log[k]
reg_weight[0, box_target_inds] = local_heatmap / ct_div
sample['ttf_heatmap'] = heatmap
sample['ttf_box_target'] = box_target
sample['ttf_reg_weight'] = reg_weight
return samples
def draw_truncate_gaussian(self, heatmap, center, h_radius, w_radius):
h, w = 2 * h_radius + 1, 2 * w_radius + 1
sigma_x = w / 6
sigma_y = h / 6
gaussian = gaussian2D((h, w), sigma_x, sigma_y)
x, y = int(center[0]), int(center[1])
height, width = heatmap.shape[0:2]
left, right = min(x, w_radius), min(width - x, w_radius + 1)
top, bottom = min(y, h_radius), min(height - y, h_radius + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian[h_radius - top:h_radius + bottom, w_radius -
left:w_radius + right]
if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
heatmap[y - top:y + bottom, x - left:x + right] = np.maximum(
masked_heatmap, masked_gaussian)
return heatmap
@register_op
class Gt2Solov2TargetOp(BaseOperator):
"""Assign mask target and labels in SOLOv2 network.
Args:
num_grids (list): The list of feature map grids size.
scale_ranges (list): The list of mask boundary range.
coord_sigma (float): The coefficient of coordinate area length.
sampling_ratio (float): The ratio of down sampling.
"""
def __init__(self,
num_grids=[40, 36, 24, 16, 12],
scale_ranges=[[1, 96], [48, 192], [96, 384], [192, 768],
[384, 2048]],
coord_sigma=0.2,
sampling_ratio=4.0):
super(Gt2Solov2TargetOp, self).__init__()
self.num_grids = num_grids
self.scale_ranges = scale_ranges
self.coord_sigma = coord_sigma
self.sampling_ratio = sampling_ratio
def _scale_size(self, im, scale):
h, w = im.shape[:2]
new_size = (int(w * float(scale) + 0.5), int(h * float(scale) + 0.5))
resized_img = cv2.resize(
im, None, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
return resized_img
def __call__(self, samples, context=None):
sample_id = 0
max_ins_num = [0] * len(self.num_grids)
for sample in samples:
gt_bboxes_raw = sample['gt_bbox']
gt_labels_raw = sample['gt_class'] + 1
im_c, im_h, im_w = sample['image'].shape[:]
gt_masks_raw = sample['gt_segm'].astype(np.uint8)
mask_feat_size = [
int(im_h / self.sampling_ratio), int(im_w / self.sampling_ratio)
]
gt_areas = np.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) *
(gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1]))
ins_ind_label_list = []
idx = 0
for (lower_bound, upper_bound), num_grid \
in zip(self.scale_ranges, self.num_grids):
hit_indices = ((gt_areas >= lower_bound) &
(gt_areas <= upper_bound)).nonzero()[0]
num_ins = len(hit_indices)
ins_label = []
grid_order = []
cate_label = np.zeros([num_grid, num_grid], dtype=np.int64)
ins_ind_label = np.zeros([num_grid**2], dtype=np.bool)
if num_ins == 0:
ins_label = np.zeros(
[1, mask_feat_size[0], mask_feat_size[1]],
dtype=np.uint8)
ins_ind_label_list.append(ins_ind_label)
sample['cate_label{}'.format(idx)] = cate_label.flatten()
sample['ins_label{}'.format(idx)] = ins_label
sample['grid_order{}'.format(idx)] = np.asarray(
[sample_id * num_grid * num_grid + 0], dtype=np.int32)
idx += 1
continue
gt_bboxes = gt_bboxes_raw[hit_indices]
gt_labels = gt_labels_raw[hit_indices]
gt_masks = gt_masks_raw[hit_indices, ...]
half_ws = 0.5 * (
gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.coord_sigma
half_hs = 0.5 * (
gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.coord_sigma
for seg_mask, gt_label, half_h, half_w in zip(
gt_masks, gt_labels, half_hs, half_ws):
if seg_mask.sum() == 0:
continue
# mass center
upsampled_size = (mask_feat_size[0] * 4,
mask_feat_size[1] * 4)
center_h, center_w = ndimage.measurements.center_of_mass(
seg_mask)
coord_w = int(
(center_w / upsampled_size[1]) // (1. / num_grid))
coord_h = int(
(center_h / upsampled_size[0]) // (1. / num_grid))
# left, top, right, down
top_box = max(0,
int(((center_h - half_h) / upsampled_size[0])
// (1. / num_grid)))
down_box = min(num_grid - 1,
int(((center_h + half_h) / upsampled_size[0])
// (1. / num_grid)))
left_box = max(0,
int(((center_w - half_w) / upsampled_size[1])
// (1. / num_grid)))
right_box = min(num_grid - 1,
int(((center_w + half_w) /
upsampled_size[1]) // (1. / num_grid)))
top = max(top_box, coord_h - 1)
down = min(down_box, coord_h + 1)
left = max(coord_w - 1, left_box)
right = min(right_box, coord_w + 1)
cate_label[top:(down + 1), left:(right + 1)] = gt_label
seg_mask = self._scale_size(
seg_mask, scale=1. / self.sampling_ratio)
for i in range(top, down + 1):
for j in range(left, right + 1):
label = int(i * num_grid + j)
cur_ins_label = np.zeros(
[mask_feat_size[0], mask_feat_size[1]],
dtype=np.uint8)
cur_ins_label[:seg_mask.shape[0], :seg_mask.shape[
1]] = seg_mask
ins_label.append(cur_ins_label)
ins_ind_label[label] = True
grid_order.append(sample_id * num_grid * num_grid +
label)
if ins_label == []:
ins_label = np.zeros(
[1, mask_feat_size[0], mask_feat_size[1]],
dtype=np.uint8)
ins_ind_label_list.append(ins_ind_label)
sample['cate_label{}'.format(idx)] = cate_label.flatten()
sample['ins_label{}'.format(idx)] = ins_label
sample['grid_order{}'.format(idx)] = np.asarray(
[sample_id * num_grid * num_grid + 0], dtype=np.int32)
else:
ins_label = np.stack(ins_label, axis=0)
ins_ind_label_list.append(ins_ind_label)
sample['cate_label{}'.format(idx)] = cate_label.flatten()
sample['ins_label{}'.format(idx)] = ins_label
sample['grid_order{}'.format(idx)] = np.asarray(
grid_order, dtype=np.int32)
assert len(grid_order) > 0
max_ins_num[idx] = max(
max_ins_num[idx],
sample['ins_label{}'.format(idx)].shape[0])
idx += 1
ins_ind_labels = np.concatenate([
ins_ind_labels_level_img
for ins_ind_labels_level_img in ins_ind_label_list
])
fg_num = np.sum(ins_ind_labels)
sample['fg_num'] = fg_num
sample_id += 1
sample.pop('is_crowd')
sample.pop('gt_class')
sample.pop('gt_bbox')
sample.pop('gt_poly')
sample.pop('gt_segm')
# padding batch
for data in samples:
for idx in range(len(self.num_grids)):
gt_ins_data = np.zeros(
[
max_ins_num[idx],
data['ins_label{}'.format(idx)].shape[1],
data['ins_label{}'.format(idx)].shape[2]
],
dtype=np.uint8)
gt_ins_data[0:data['ins_label{}'.format(idx)].shape[
0], :, :] = data['ins_label{}'.format(idx)]
gt_grid_order = np.zeros([max_ins_num[idx]], dtype=np.int32)
gt_grid_order[0:data['grid_order{}'.format(idx)].shape[
0]] = data['grid_order{}'.format(idx)]
data['ins_label{}'.format(idx)] = gt_ins_data
data['grid_order{}'.format(idx)] = gt_grid_order
return samples
......@@ -23,20 +23,20 @@ except Exception:
import cv2
import numpy as np
from .operator import register_op, BaseOperator
from .operators import register_op, BaseOperator, Resize
from .op_helper import jaccard_overlap, gaussian2D
from .operators import NormalizeImage, Permute
from scipy import ndimage
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = [
'PadBatch',
'RandomShape',
'PadMultiScaleTest',
'BatchRandomResize',
'Gt2YoloTarget',
'Gt2FCOSTarget',
'Gt2TTFTarget',
'Gt2Solov2Target',
]
......@@ -50,20 +50,17 @@ class PadBatch(BaseOperator):
height and width is divisible by `pad_to_stride`.
"""
def __init__(self, pad_to_stride=0, use_padded_im_info=True, pad_gt=False):
def __init__(self, pad_to_stride=0, pad_gt=False):
super(PadBatch, self).__init__()
self.pad_to_stride = pad_to_stride
self.use_padded_im_info = use_padded_im_info
self.pad_gt = pad_gt
def __call__(self, samples):
def __call__(self, samples, context=None):
"""
Args:
samples (list): a batch of sample, each is dict.
"""
coarsest_stride = self.pad_to_stride
#if coarsest_stride == 0:
# return samples
max_shape = np.array([data['image'].shape for data in samples]).max(
axis=0)
......@@ -81,8 +78,20 @@ class PadBatch(BaseOperator):
(im_c, max_shape[1], max_shape[2]), dtype=np.float32)
padding_im[:, :im_h, :im_w] = im
data['image'] = padding_im
if self.use_padded_im_info:
data['im_info'][:2] = max_shape[1:3]
if 'semantic' in data and data['semantic'] is not None:
semantic = data['semantic']
padding_sem = np.zeros(
(1, max_shape[1], max_shape[2]), dtype=np.float32)
padding_sem[:, :im_h, :im_w] = semantic
data['semantic'] = padding_sem
if 'gt_segm' in data and data['gt_segm'] is not None:
gt_segm = data['gt_segm']
padding_segm = np.zeros(
(gt_segm.shape[0], max_shape[1], max_shape[2]),
dtype=np.uint8)
padding_segm[:, :im_h, :im_w] = gt_segm
data['gt_segm'] = padding_segm
if self.pad_gt:
gt_num = []
if 'gt_poly' in data and data['gt_poly'] is not None and len(data[
......@@ -106,9 +115,10 @@ class PadBatch(BaseOperator):
gt_num_max = max(gt_num)
for i, data in enumerate(samples):
gt_box_data = np.zeros([gt_num_max, 4], dtype=np.float32)
gt_class_data = np.zeros([gt_num_max], dtype=np.int32)
gt_box_data = -np.ones([gt_num_max, 4], dtype=np.float32)
gt_class_data = -np.ones([gt_num_max], dtype=np.int32)
is_crowd_data = np.ones([gt_num_max], dtype=np.int32)
if pad_mask:
poly_num_max = max(poly_num)
poly_part_num_max = max(poly_part_num)
......@@ -135,99 +145,56 @@ class PadBatch(BaseOperator):
@register_op
class RandomShape(BaseOperator):
class BatchRandomResize(BaseOperator):
"""
Randomly reshape a batch. If random_inter is True, also randomly
select one an interpolation algorithm [cv2.INTER_NEAREST, cv2.INTER_LINEAR,
cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4]. If random_inter is
False, use cv2.INTER_NEAREST.
Resize image to target size randomly. random target_size and interpolation method
Args:
sizes (list): list of int, random choose a size from these
random_inter (bool): whether to randomly interpolation, defalut true.
target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
keep_ratio (bool): whether keep_raio or not, default true
interp (int): the interpolation method
random_size (bool): whether random select target size of image
random_interp (bool): whether random select interpolation method
"""
def __init__(self, sizes=[], random_inter=False, resize_box=False):
super(RandomShape, self).__init__()
self.sizes = sizes
self.random_inter = random_inter
def __init__(self,
target_size,
keep_ratio,
interp=cv2.INTER_NEAREST,
random_size=True,
random_interp=False):
super(BatchRandomResize, self).__init__()
self.keep_ratio = keep_ratio
self.interps = [
cv2.INTER_NEAREST,
cv2.INTER_LINEAR,
cv2.INTER_AREA,
cv2.INTER_CUBIC,
cv2.INTER_LANCZOS4,
] if random_inter else []
self.resize_box = resize_box
def __call__(self, samples):
shape = np.random.choice(self.sizes)
method = np.random.choice(self.interps) if self.random_inter \
else cv2.INTER_NEAREST
for i in range(len(samples)):
im = samples[i]['image']
h, w = im.shape[:2]
scale_x = float(shape) / w
scale_y = float(shape) / h
im = cv2.resize(
im, None, None, fx=scale_x, fy=scale_y, interpolation=method)
samples[i]['image'] = im
if self.resize_box and 'gt_bbox' in samples[i] and len(samples[0][
'gt_bbox']) > 0:
scale_array = np.array([scale_x, scale_y] * 2, dtype=np.float32)
samples[i]['gt_bbox'] = np.clip(samples[i]['gt_bbox'] *
scale_array, 0,
float(shape) - 1)
return samples
@register_op
class PadMultiScaleTest(BaseOperator):
"""
Pad the image so they can be divisible by a stride for multi-scale testing.
Args:
pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
height and width is divisible by `pad_to_stride`.
"""
def __init__(self, pad_to_stride=0):
super(PadMultiScaleTest, self).__init__()
self.pad_to_stride = pad_to_stride
def __call__(self, samples):
coarsest_stride = self.pad_to_stride
if coarsest_stride == 0:
return samples
]
self.interp = interp
assert isinstance(target_size, (
int, Sequence)), "target_size must be int, list or tuple"
if random_size and not isinstance(target_size, list):
raise TypeError(
"Type of target_size is invalid when random_size is True. Must be List, now is {}".
format(type(target_size)))
self.target_size = target_size
self.random_size = random_size
self.random_interp = random_interp
def __call__(self, samples, context=None):
if self.random_size:
target_size = np.random.choice(self.target_size)
else:
target_size = self.target_size
batch_input = True
if not isinstance(samples, Sequence):
batch_input = False
samples = [samples]
if len(samples) != 1:
raise ValueError("Batch size must be 1 when using multiscale test, "
"but now batch size is {}".format(len(samples)))
for i in range(len(samples)):
sample = samples[i]
for k in sample.keys():
# hard code
if k.startswith('image'):
im = sample[k]
im_c, im_h, im_w = im.shape
max_h = int(
np.ceil(im_h / coarsest_stride) * coarsest_stride)
max_w = int(
np.ceil(im_w / coarsest_stride) * coarsest_stride)
padding_im = np.zeros(
(im_c, max_h, max_w), dtype=np.float32)
if self.random_interp:
interp = np.random.choice(self.interps)
else:
interp = self.interp
padding_im[:, :im_h, :im_w] = im
sample[k] = padding_im
info_name = 'im_info' if k == 'image' else 'im_info_' + k
# update im_info
sample[info_name][:2] = [max_h, max_w]
if not batch_input:
samples = samples[0]
return samples
resizer = Resize(target_size, keep_ratio=self.keep_ratio, interp=interp)
return resizer(samples, context=context)
@register_op
......@@ -250,7 +217,7 @@ class Gt2YoloTarget(BaseOperator):
self.num_classes = num_classes
self.iou_thresh = iou_thresh
def __call__(self, samples):
def __call__(self, samples, context=None):
assert len(self.anchor_masks) == len(self.downsample_ratios), \
"anchor_masks', and 'downsample_ratios' should have same length."
......@@ -261,6 +228,9 @@ class Gt2YoloTarget(BaseOperator):
im = sample['image']
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
if 'gt_score' not in sample:
sample['gt_score'] = np.ones(
(gt_bbox.shape[0], 1), dtype=np.float32)
gt_score = sample['gt_score']
for i, (
mask, downsample_ratio
......@@ -336,6 +306,11 @@ class Gt2YoloTarget(BaseOperator):
# classification
target[idx, 6 + cls, gj, gi] = 1.
sample['target{}'.format(i)] = target
# remove useless gt_class and gt_score after target calculated
sample.pop('gt_class')
sample.pop('gt_score')
return samples
......@@ -434,22 +409,17 @@ class Gt2FCOSTarget(BaseOperator):
inside_gt_box = np.min(clipped_box_reg_targets, axis=2) > 0
return inside_gt_box
def __call__(self, samples):
def __call__(self, samples, context=None):
assert len(self.object_sizes_of_interest) == len(self.downsample_ratios), \
"object_sizes_of_interest', and 'downsample_ratios' should have same length."
for sample in samples:
# im, gt_bbox, gt_class = sample
# im, gt_bbox, gt_class, gt_score = sample
im = sample['image']
im_info = sample['im_info']
bboxes = sample['gt_bbox']
gt_class = sample['gt_class']
bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * np.floor(im_info[1]) / \
np.floor(im_info[1] / im_info[2])
bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * np.floor(im_info[0]) / \
np.floor(im_info[0] / im_info[2])
# calculate the locations
h, w = sample['image'].shape[1:3]
h, w = im.shape[1:3]
points, num_points_each_level = self._compute_points(w, h)
object_scale_exp = []
for i, num_pts in enumerate(num_points_each_level):
......@@ -544,6 +514,7 @@ class Gt2FCOSTarget(BaseOperator):
@register_op
class Gt2TTFTarget(BaseOperator):
__shared__ = ['num_classes']
"""
Gt2TTFTarget
Generate TTFNet targets by ground truth data
......@@ -555,13 +526,13 @@ class Gt2TTFTarget(BaseOperator):
0.54 by default.
"""
def __init__(self, num_classes, down_ratio=4, alpha=0.54):
def __init__(self, num_classes=80, down_ratio=4, alpha=0.54):
super(Gt2TTFTarget, self).__init__()
self.down_ratio = down_ratio
self.num_classes = num_classes
self.alpha = alpha
def __call__(self, samples):
def __call__(self, samples, context=None):
output_size = samples[0]['image'].shape[1]
feat_size = output_size // self.down_ratio
for sample in samples:
......@@ -636,3 +607,183 @@ class Gt2TTFTarget(BaseOperator):
heatmap[y - top:y + bottom, x - left:x + right] = np.maximum(
masked_heatmap, masked_gaussian)
return heatmap
@register_op
class Gt2Solov2Target(BaseOperator):
"""Assign mask target and labels in SOLOv2 network.
Args:
num_grids (list): The list of feature map grids size.
scale_ranges (list): The list of mask boundary range.
coord_sigma (float): The coefficient of coordinate area length.
sampling_ratio (float): The ratio of down sampling.
"""
def __init__(self,
num_grids=[40, 36, 24, 16, 12],
scale_ranges=[[1, 96], [48, 192], [96, 384], [192, 768],
[384, 2048]],
coord_sigma=0.2,
sampling_ratio=4.0):
super(Gt2Solov2Target, self).__init__()
self.num_grids = num_grids
self.scale_ranges = scale_ranges
self.coord_sigma = coord_sigma
self.sampling_ratio = sampling_ratio
def _scale_size(self, im, scale):
h, w = im.shape[:2]
new_size = (int(w * float(scale) + 0.5), int(h * float(scale) + 0.5))
resized_img = cv2.resize(
im, None, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
return resized_img
def __call__(self, samples, context=None):
sample_id = 0
max_ins_num = [0] * len(self.num_grids)
for sample in samples:
gt_bboxes_raw = sample['gt_bbox']
gt_labels_raw = sample['gt_class'] + 1
im_c, im_h, im_w = sample['image'].shape[:]
gt_masks_raw = sample['gt_segm'].astype(np.uint8)
mask_feat_size = [
int(im_h / self.sampling_ratio), int(im_w / self.sampling_ratio)
]
gt_areas = np.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) *
(gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1]))
ins_ind_label_list = []
idx = 0
for (lower_bound, upper_bound), num_grid \
in zip(self.scale_ranges, self.num_grids):
hit_indices = ((gt_areas >= lower_bound) &
(gt_areas <= upper_bound)).nonzero()[0]
num_ins = len(hit_indices)
ins_label = []
grid_order = []
cate_label = np.zeros([num_grid, num_grid], dtype=np.int64)
ins_ind_label = np.zeros([num_grid**2], dtype=np.bool)
if num_ins == 0:
ins_label = np.zeros(
[1, mask_feat_size[0], mask_feat_size[1]],
dtype=np.uint8)
ins_ind_label_list.append(ins_ind_label)
sample['cate_label{}'.format(idx)] = cate_label.flatten()
sample['ins_label{}'.format(idx)] = ins_label
sample['grid_order{}'.format(idx)] = np.asarray(
[sample_id * num_grid * num_grid + 0], dtype=np.int32)
idx += 1
continue
gt_bboxes = gt_bboxes_raw[hit_indices]
gt_labels = gt_labels_raw[hit_indices]
gt_masks = gt_masks_raw[hit_indices, ...]
half_ws = 0.5 * (
gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.coord_sigma
half_hs = 0.5 * (
gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.coord_sigma
for seg_mask, gt_label, half_h, half_w in zip(
gt_masks, gt_labels, half_hs, half_ws):
if seg_mask.sum() == 0:
continue
# mass center
upsampled_size = (mask_feat_size[0] * 4,
mask_feat_size[1] * 4)
center_h, center_w = ndimage.measurements.center_of_mass(
seg_mask)
coord_w = int(
(center_w / upsampled_size[1]) // (1. / num_grid))
coord_h = int(
(center_h / upsampled_size[0]) // (1. / num_grid))
# left, top, right, down
top_box = max(0,
int(((center_h - half_h) / upsampled_size[0])
// (1. / num_grid)))
down_box = min(num_grid - 1,
int(((center_h + half_h) / upsampled_size[0])
// (1. / num_grid)))
left_box = max(0,
int(((center_w - half_w) / upsampled_size[1])
// (1. / num_grid)))
right_box = min(num_grid - 1,
int(((center_w + half_w) /
upsampled_size[1]) // (1. / num_grid)))
top = max(top_box, coord_h - 1)
down = min(down_box, coord_h + 1)
left = max(coord_w - 1, left_box)
right = min(right_box, coord_w + 1)
cate_label[top:(down + 1), left:(right + 1)] = gt_label
seg_mask = self._scale_size(
seg_mask, scale=1. / self.sampling_ratio)
for i in range(top, down + 1):
for j in range(left, right + 1):
label = int(i * num_grid + j)
cur_ins_label = np.zeros(
[mask_feat_size[0], mask_feat_size[1]],
dtype=np.uint8)
cur_ins_label[:seg_mask.shape[0], :seg_mask.shape[
1]] = seg_mask
ins_label.append(cur_ins_label)
ins_ind_label[label] = True
grid_order.append(sample_id * num_grid * num_grid +
label)
if ins_label == []:
ins_label = np.zeros(
[1, mask_feat_size[0], mask_feat_size[1]],
dtype=np.uint8)
ins_ind_label_list.append(ins_ind_label)
sample['cate_label{}'.format(idx)] = cate_label.flatten()
sample['ins_label{}'.format(idx)] = ins_label
sample['grid_order{}'.format(idx)] = np.asarray(
[sample_id * num_grid * num_grid + 0], dtype=np.int32)
else:
ins_label = np.stack(ins_label, axis=0)
ins_ind_label_list.append(ins_ind_label)
sample['cate_label{}'.format(idx)] = cate_label.flatten()
sample['ins_label{}'.format(idx)] = ins_label
sample['grid_order{}'.format(idx)] = np.asarray(
grid_order, dtype=np.int32)
assert len(grid_order) > 0
max_ins_num[idx] = max(
max_ins_num[idx],
sample['ins_label{}'.format(idx)].shape[0])
idx += 1
ins_ind_labels = np.concatenate([
ins_ind_labels_level_img
for ins_ind_labels_level_img in ins_ind_label_list
])
fg_num = np.sum(ins_ind_labels)
sample['fg_num'] = fg_num
sample_id += 1
sample.pop('is_crowd')
sample.pop('gt_class')
sample.pop('gt_bbox')
sample.pop('gt_poly')
sample.pop('gt_segm')
# padding batch
for data in samples:
for idx in range(len(self.num_grids)):
gt_ins_data = np.zeros(
[
max_ins_num[idx],
data['ins_label{}'.format(idx)].shape[1],
data['ins_label{}'.format(idx)].shape[2]
],
dtype=np.uint8)
gt_ins_data[0:data['ins_label{}'.format(idx)].shape[
0], :, :] = data['ins_label{}'.format(idx)]
gt_grid_order = np.zeros([max_ins_num[idx]], dtype=np.int32)
gt_grid_order[0:data['grid_order{}'.format(idx)].shape[
0]] = data['grid_order{}'.format(idx)]
data['ins_label{}'.format(idx)] = gt_ins_data
data['grid_order{}'.format(idx)] = gt_grid_order
return samples
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# function:
# operators to process sample,
# eg: decode/resize/crop image
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from numbers import Number, Integral
import uuid
import random
import math
import numpy as np
import os
import copy
import cv2
from PIL import Image, ImageEnhance, ImageDraw
from ppdet.core.workspace import serializable
from ppdet.modeling.layers import AnchorGrid
from .op_helper import (satisfy_sample_constraint, filter_and_process,
generate_sample_bbox, clip_bbox, data_anchor_sampling,
satisfy_sample_constraint_coverage, crop_image_sampling,
generate_sample_bbox_square, bbox_area_sampling,
is_poly, gaussian_radius, draw_gaussian)
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
registered_ops = []
def register_op(cls):
registered_ops.append(cls.__name__)
if not hasattr(BaseOperator, cls.__name__):
setattr(BaseOperator, cls.__name__, cls)
else:
raise KeyError("The {} class has been registered.".format(cls.__name__))
return serializable(cls)
class BboxError(ValueError):
pass
class ImageError(ValueError):
pass
class BaseOperator(object):
def __init__(self, name=None):
if name is None:
name = self.__class__.__name__
self._id = name + '_' + str(uuid.uuid4())[-6:]
def apply(self, sample, context=None):
""" Process a sample.
Args:
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
context (dict): info about this sample processing
Returns:
result (dict): a processed sample
"""
return sample
def __call__(self, sample, context=None):
""" Process a sample.
Args:
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
context (dict): info about this sample processing
Returns:
result (dict): a processed sample
"""
if isinstance(sample, Sequence):
for i in range(len(sample)):
sample[i] = self.apply(sample[i], context)
else:
sample = self.apply(sample, context)
return sample
def __str__(self):
return str(self._id)
@register_op
class DecodeOp(BaseOperator):
def __init__(self):
""" Transform the image data to numpy format following the rgb format
"""
super(DecodeOp, self).__init__()
def apply(self, sample, context=None):
""" load image if 'im_file' field is not empty but 'image' is"""
if 'image' not in sample:
with open(sample['im_file'], 'rb') as f:
sample['image'] = f.read()
sample.pop('im_file')
im = sample['image']
data = np.frombuffer(im, dtype='uint8')
im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
sample['image'] = im
if 'h' not in sample:
sample['h'] = im.shape[0]
elif sample['h'] != im.shape[0]:
logger.warn(
"The actual image height: {} is not equal to the "
"height: {} in annotation, and update sample['h'] by actual "
"image height.".format(im.shape[0], sample['h']))
sample['h'] = im.shape[0]
if 'w' not in sample:
sample['w'] = im.shape[1]
elif sample['w'] != im.shape[1]:
logger.warn(
"The actual image width: {} is not equal to the "
"width: {} in annotation, and update sample['w'] by actual "
"image width.".format(im.shape[1], sample['w']))
sample['w'] = im.shape[1]
sample['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
return sample
@register_op
class PermuteOp(BaseOperator):
def __init__(self):
"""
Change the channel to be (C, H, W)
"""
super(PermuteOp, self).__init__()
def apply(self, sample, context=None):
im = sample['image']
im = im.transpose((2, 0, 1))
sample['image'] = im
return sample
@register_op
class LightingOp(BaseOperator):
"""
Lighting the imagen by eigenvalues and eigenvectors
Args:
eigval (list): eigenvalues
eigvec (list): eigenvectors
alphastd (float): random weight of lighting, 0.1 by default
"""
def __init__(self, eigval, eigvec, alphastd=0.1):
super(LightingOp, self).__init__()
self.alphastd = alphastd
self.eigval = np.array(eigval).astype('float32')
self.eigvec = np.array(eigvec).astype('float32')
def apply(self, sample, context=None):
alpha = np.random.normal(scale=self.alphastd, size=(3, ))
sample['image'] += np.dot(self.eigvec, self.eigval * alpha)
return sample
@register_op
class RandomErasingImageOp(BaseOperator):
def __init__(self, prob=0.5, lower=0.02, higher=0.4, aspect_ratio=0.3):
"""
Random Erasing Data Augmentation, see https://arxiv.org/abs/1708.04896
Args:
prob (float): probability to carry out random erasing
lower (float): lower limit of the erasing area ratio
heigher (float): upper limit of the erasing area ratio
aspect_ratio (float): aspect ratio of the erasing region
"""
super(RandomErasingImageOp, self).__init__()
self.prob = prob
self.lower = lower
self.heigher = heigher
self.aspect_ratio = aspect_ratio
def apply(self, sample):
gt_bbox = sample['gt_bbox']
im = sample['image']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image is not a numpy array.".format(self))
if len(im.shape) != 3:
raise ImageError("{}: image is not 3-dimensional.".format(self))
for idx in range(gt_bbox.shape[0]):
if self.prob <= np.random.rand():
continue
x1, y1, x2, y2 = gt_bbox[idx, :]
w_bbox = x2 - x1 + 1
h_bbox = y2 - y1 + 1
area = w_bbox * h_bbox
target_area = random.uniform(self.lower, self.higher) * area
aspect_ratio = random.uniform(self.aspect_ratio,
1 / self.aspect_ratio)
h = int(round(math.sqrt(target_area * aspect_ratio)))
w = int(round(math.sqrt(target_area / aspect_ratio)))
if w < w_bbox and h < h_bbox:
off_y1 = random.randint(0, int(h_bbox - h))
off_x1 = random.randint(0, int(w_bbox - w))
im[int(y1 + off_y1):int(y1 + off_y1 + h), int(x1 + off_x1):int(
x1 + off_x1 + w), :] = 0
sample['image'] = im
return sample
@register_op
class NormalizeImageOp(BaseOperator):
def __init__(self, mean=[0.485, 0.456, 0.406], std=[1, 1, 1],
is_scale=True):
"""
Args:
mean (list): the pixel mean
std (list): the pixel variance
"""
super(NormalizeImageOp, self).__init__()
self.mean = mean
self.std = std
self.is_scale = is_scale
if not (isinstance(self.mean, list) and isinstance(self.std, list) and
isinstance(self.is_scale, bool)):
raise TypeError("{}: input type is invalid.".format(self))
from functools import reduce
if reduce(lambda x, y: x * y, self.std) == 0:
raise ValueError('{}: std is invalid!'.format(self))
def apply(self, sample, context=None):
"""Normalize the image.
Operators:
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
im = sample['image']
im = im.astype(np.float32, copy=False)
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
if self.is_scale:
im = im / 255.0
im -= mean
im /= std
sample['image'] = im
return sample
@register_op
class GridMask(BaseOperator):
def __init__(self,
use_h=True,
use_w=True,
rotate=1,
offset=False,
ratio=0.5,
mode=1,
prob=0.7,
upper_iter=360000):
"""
GridMask Data Augmentation, see https://arxiv.org/abs/2001.04086
Args:
use_h (bool): whether to mask vertically
use_w (boo;): whether to mask horizontally
rotate (float): angle for the mask to rotate
offset (float): mask offset
ratio (float): mask ratio
mode (int): gridmask mode
prob (float): max probability to carry out gridmask
upper_iter (int): suggested to be equal to global max_iter
"""
super(GridMask, self).__init__()
self.use_h = use_h
self.use_w = use_w
self.rotate = rotate
self.offset = offset
self.ratio = ratio
self.mode = mode
self.prob = prob
self.upper_iter = upper_iter
from .gridmask_utils import GridMask
self.gridmask_op = GridMask(
use_h,
use_w,
rotate=rotate,
offset=offset,
ratio=ratio,
mode=mode,
prob=prob,
upper_iter=upper_iter)
def apply(self, sample, context=None):
sample['image'] = self.gridmask_op(sample['image'], sample['curr_iter'])
return sample
@register_op
class RandomDistortOp(BaseOperator):
"""Random color distortion.
Args:
hue (list): hue settings. in [lower, upper, probability] format.
saturation (list): saturation settings. in [lower, upper, probability] format.
contrast (list): contrast settings. in [lower, upper, probability] format.
brightness (list): brightness settings. in [lower, upper, probability] format.
random_apply (bool): whether to apply in random (yolo) or fixed (SSD)
order.
count (int): the number of doing distrot
random_channel (bool): whether to swap channels randomly
"""
def __init__(self,
hue=[-18, 18, 0.5],
saturation=[0.5, 1.5, 0.5],
contrast=[0.5, 1.5, 0.5],
brightness=[0.5, 1.5, 0.5],
random_apply=True,
count=4,
random_channel=False):
super(RandomDistortOp, self).__init__()
self.hue = hue
self.saturation = saturation
self.contrast = contrast
self.brightness = brightness
self.random_apply = random_apply
self.count = count
self.random_channel = random_channel
def apply_hue(self, img):
low, high, prob = self.hue
if np.random.uniform(0., 1.) < prob:
return img
img = img.astype(np.float32)
# it works, but result differ from HSV version
delta = np.random.uniform(low, high)
u = np.cos(delta * np.pi)
w = np.sin(delta * np.pi)
bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321],
[0.211, -0.523, 0.311]])
ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
[1.0, -1.107, 1.705]])
t = np.dot(np.dot(ityiq, bt), tyiq).T
img = np.dot(img, t)
return img
def apply_saturation(self, img):
low, high, prob = self.saturation
if np.random.uniform(0., 1.) < prob:
return img
delta = np.random.uniform(low, high)
img = img.astype(np.float32)
# it works, but result differ from HSV version
gray = img * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
gray = gray.sum(axis=2, keepdims=True)
gray *= (1.0 - delta)
img *= delta
img += gray
return img
def apply_contrast(self, img):
low, high, prob = self.contrast
if np.random.uniform(0., 1.) < prob:
return img
delta = np.random.uniform(low, high)
img = img.astype(np.float32)
img *= delta
return img
def apply_brightness(self, img):
low, high, prob = self.brightness
if np.random.uniform(0., 1.) < prob:
return img
delta = np.random.uniform(low, high)
img = img.astype(np.float32)
img += delta
return img
def apply(self, sample, context=None):
img = sample['image']
if self.random_apply:
functions = [
self.apply_brightness, self.apply_contrast,
self.apply_saturation, self.apply_hue
]
distortions = np.random.permutation(functions)[:self.count]
for func in distortions:
img = func(img)
sample['image'] = img
return sample
img = self.apply_brightness(img)
mode = np.random.randint(0, 2)
if mode:
img = self.apply_contrast(img)
img = self.apply_saturation(img)
img = self.apply_hue(img)
if not mode:
img = self.apply_contrast(img)
if self.random_channel:
if np.random.randint(0, 2):
img = img[..., np.random.permutation(3)]
sample['image'] = img
return sample
@register_op
class AutoAugmentOp(BaseOperator):
def __init__(self, autoaug_type="v1"):
"""
Args:
autoaug_type (str): autoaug type, support v0, v1, v2, v3, test
"""
super(AutoAugmentOp, self).__init__()
self.autoaug_type = autoaug_type
def apply(self, sample, context=None):
"""
Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172
"""
im = sample['image']
gt_bbox = sample['gt_bbox']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image is not a numpy array.".format(self))
if len(im.shape) != 3:
raise ImageError("{}: image is not 3-dimensional.".format(self))
if len(gt_bbox) == 0:
return sample
height, width, _ = im.shape
norm_gt_bbox = np.ones_like(gt_bbox, dtype=np.float32)
norm_gt_bbox[:, 0] = gt_bbox[:, 1] / float(height)
norm_gt_bbox[:, 1] = gt_bbox[:, 0] / float(width)
norm_gt_bbox[:, 2] = gt_bbox[:, 3] / float(height)
norm_gt_bbox[:, 3] = gt_bbox[:, 2] / float(width)
from .autoaugment_utils import distort_image_with_autoaugment
im, norm_gt_bbox = distort_image_with_autoaugment(im, norm_gt_bbox,
self.autoaug_type)
gt_bbox[:, 0] = norm_gt_bbox[:, 1] * float(width)
gt_bbox[:, 1] = norm_gt_bbox[:, 0] * float(height)
gt_bbox[:, 2] = norm_gt_bbox[:, 3] * float(width)
gt_bbox[:, 3] = norm_gt_bbox[:, 2] * float(height)
sample['image'] = im
sample['gt_bbox'] = gt_bbox
return sample
@register_op
class RandomFlipOp(BaseOperator):
def __init__(self, prob=0.5):
"""
Args:
prob (float): the probability of flipping image
"""
super(RandomFlipOp, self).__init__()
self.prob = prob
if not (isinstance(self.prob, float)):
raise TypeError("{}: input type is invalid.".format(self))
def apply_segm(self, segms, height, width):
def _flip_poly(poly, width):
flipped_poly = np.array(poly)
flipped_poly[0::2] = width - np.array(poly[0::2])
return flipped_poly.tolist()
def _flip_rle(rle, height, width):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects(rle, height, width)
mask = mask_util.decode(rle)
mask = mask[:, ::-1]
rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
return rle
flipped_segms = []
for segm in segms:
if is_poly(segm):
# Polygon format
flipped_segms.append([_flip_poly(poly, width) for poly in segm])
else:
# RLE format
import pycocotools.mask as mask_util
flipped_segms.append(_flip_rle(segm, height, width))
return flipped_segms
def apply_keypoint(self, gt_keypoint, width):
for i in range(gt_keypoint.shape[1]):
if i % 2 == 0:
old_x = gt_keypoint[:, i].copy()
gt_keypoint[:, i] = width - old_x
return gt_keypoint
def apply_image(self, image):
return image[:, ::-1, :]
def apply_bbox(self, bbox, width):
oldx1 = bbox[:, 0].copy()
oldx2 = bbox[:, 2].copy()
bbox[:, 0] = width - oldx2
bbox[:, 2] = width - oldx1
return bbox
def apply(self, sample, context=None):
"""Filp the image and bounding box.
Operators:
1. Flip the image numpy.
2. Transform the bboxes' x coordinates.
(Must judge whether the coordinates are normalized!)
3. Transform the segmentations' x coordinates.
(Must judge whether the coordinates are normalized!)
Output:
sample: the image, bounding box and segmentation part
in sample are flipped.
"""
if np.random.uniform(0, 1) < self.prob:
im = sample['image']
height, width = im.shape[:2]
im = self.apply_image(im)
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], width)
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], height,
width)
if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
sample['gt_keypoint'] = self.apply_keypoint(
sample['gt_keypoint'], width)
if 'semantic' in sample and sample['semantic']:
sample['semantic'] = sample['semantic'][:, ::-1]
if 'gt_segm' in sample and sample['gt_segm'].any():
sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
sample['flipped'] = True
sample['image'] = im
return sample
@register_op
class ResizeOp(BaseOperator):
def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
"""
Resize image to target size. if keep_ratio is True,
resize the image's long side to the maximum of target_size
if keep_ratio is False, resize the image to target size(h, w)
Args:
target_size (int|list): image target size
keep_ratio (bool): whether keep_ratio or not, default true
interp (int): the interpolation method
"""
super(ResizeOp, self).__init__()
self.keep_ratio = keep_ratio
self.interp = interp
if not isinstance(target_size, (Integral, Sequence)):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
format(type(target_size)))
if isinstance(target_size, Integral):
target_size = [target_size, target_size]
self.target_size = target_size
def apply_image(self, image, scale):
im_scale_x, im_scale_y = scale
return cv2.resize(
image,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
def apply_bbox(self, bbox, scale, size):
im_scale_x, im_scale_y = scale
resize_w, resize_h = size
bbox[:, 0::2] *= im_scale_x
bbox[:, 1::2] *= im_scale_y
bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w)
bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
return bbox
def apply_segm(self, segms, im_size, scale):
def _resize_poly(poly, im_scale_x, im_scale_y):
resized_poly = np.array(poly)
resized_poly[0::2] *= im_scale_x
resized_poly[1::2] *= im_scale_y
return resized_poly.tolist()
def _resize_rle(rle, im_h, im_w, im_scale_x, im_scale_y):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects(rle, im_h, im_w)
mask = mask_util.decode(rle)
mask = cv2.resize(
image,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
return rle
im_h, im_w = im_size
im_scale_x, im_scale_y = scale
resized_segms = []
for segm in segms:
if is_poly(segm):
# Polygon format
resized_segms.append([
_resize_poly(poly, im_scale_x, im_scale_y) for poly in segm
])
else:
# RLE format
import pycocotools.mask as mask_util
resized_segms.append(
_resize_rle(segm, im_h, im_w, im_scale_x, im_scale_y))
return resized_segms
def apply(self, sample, context=None):
""" Resize the image numpy.
"""
im = sample['image']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
raise ImageError('{}: image is not 3-dimensional.'.format(self))
# apply image
im_shape = im.shape
if self.keep_ratio:
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
target_size_min = np.min(self.target_size)
target_size_max = np.max(self.target_size)
im_scale = min(target_size_min / im_size_min,
target_size_max / im_size_max)
resize_h = im_scale * float(im_shape[0])
resize_w = im_scale * float(im_shape[1])
im_scale_x = im_scale
im_scale_y = im_scale
else:
resize_h, resize_w = self.target_size
im_scale_y = resize_h / im_shape[0]
im_scale_x = resize_w / im_shape[1]
im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
sample['image'] = im
sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
if 'scale_factor' in sample:
scale_factor = sample['scale_factor']
sample['scale_factor'] = np.asarray(
[scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
dtype=np.float32)
else:
sample['scale_factor'] = np.asarray(
[im_scale_y, im_scale_x], dtype=np.float32)
# apply bbox
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'],
[im_scale_x, im_scale_y],
[resize_w, resize_h])
# apply polygon
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
[im_scale_x, im_scale_y])
# apply semantic
if 'semantic' in sample and sample['semantic']:
semantic = sample['semantic']
semantic = cv2.resize(
semantic.astype('float32'),
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
semantic = np.asarray(semantic).astype('int32')
semantic = np.expand_dims(semantic, 0)
sample['semantic'] = semantic
# apply gt_segm
if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
masks = [
cv2.resize(
gt_segm,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=cv2.INTER_NEAREST)
for gt_segm in sample['gt_segm']
]
sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
return sample
@register_op
class MultiscaleTestResizeOp(BaseOperator):
def __init__(self,
origin_target_size=[800, 1333],
target_size=[],
interp=cv2.INTER_LINEAR,
use_flip=True):
"""
Rescale image to the each size in target size, and capped at max_size.
Args:
origin_target_size (list): origin target size of image
target_size (list): A list of target sizes of image.
interp (int): the interpolation method.
use_flip (bool): whether use flip augmentation.
"""
super(MultiscaleTestResizeOp, self).__init__()
self.interp = interp
self.use_flip = use_flip
if not isinstance(target_size, Sequence):
raise TypeError(
"Type of target_size is invalid. Must be List or Tuple, now is {}".
format(type(target_size)))
self.target_size = target_size
if not isinstance(origin_target_size, Sequence):
raise TypeError(
"Type of origin_target_size is invalid. Must be List or Tuple, now is {}".
format(type(origin_target_size)))
self.origin_target_size = origin_target_size
def apply(self, sample, context=None):
""" Resize the image numpy for multi-scale test.
"""
samples = []
resizer = ResizeOp(
self.origin_target_size, keep_ratio=True, interp=self.interp)
samples.append(resizer(sample.copy(), context))
if self.use_flip:
flipper = RandomFlipOp(1.1)
samples.append(flipper(sample.copy(), context=context))
for size in self.target_size:
resizer = ResizeOp(size, keep_ratio=True, interp=self.interp)
samples.append(resizer(sample.copy(), context))
return samples
@register_op
class RandomResizeOp(BaseOperator):
def __init__(self,
target_size,
keep_ratio=True,
interp=cv2.INTER_LINEAR,
random_size=True,
random_interp=False):
"""
Resize image to target size randomly. random target_size and interpolation method
Args:
target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
keep_ratio (bool): whether keep_raio or not, default true
interp (int): the interpolation method
random_size (bool): whether random select target size of image
random_interp (bool): whether random select interpolation method
"""
super(RandomResizeOp, self).__init__()
self.keep_ratio = keep_ratio
self.interp = interp
self.interps = [
cv2.INTER_NEAREST,
cv2.INTER_LINEAR,
cv2.INTER_AREA,
cv2.INTER_CUBIC,
cv2.INTER_LANCZOS4,
]
assert isinstance(target_size, (
Integral, Sequence)), "target_size must be Integer, List or Tuple"
if random_size and not isinstance(target_size, Sequence):
raise TypeError(
"Type of target_size is invalid when random_size is True. Must be List or Tuple, now is {}".
format(type(target_size)))
self.target_size = target_size
self.random_size = random_size
self.random_interp = random_interp
def apply(self, sample, context=None):
""" Resize the image numpy.
"""
if self.random_size:
target_size = random.choice(self.target_size)
else:
target_size = self.target_size
if self.random_interp:
interp = random.choice(self.interps)
else:
interp = self.interp
resizer = ResizeOp(target_size, self.keep_ratio, interp)
return resizer(sample, context=context)
@register_op
class RandomExpandOp(BaseOperator):
"""Random expand the canvas.
Args:
ratio (float): maximum expansion ratio.
prob (float): probability to expand.
fill_value (list): color value used to fill the canvas. in RGB order.
"""
def __init__(self, ratio=4., prob=0.5, fill_value=(127.5, 127.5, 127.5)):
super(RandomExpandOp, self).__init__()
assert ratio > 1.01, "expand ratio must be larger than 1.01"
self.ratio = ratio
self.prob = prob
assert isinstance(fill_value, (Number, Sequence)), \
"fill value must be either float or sequence"
if isinstance(fill_value, Number):
fill_value = (fill_value, ) * 3
if not isinstance(fill_value, tuple):
fill_value = tuple(fill_value)
self.fill_value = fill_value
def apply(self, sample, context=None):
if np.random.uniform(0., 1.) < self.prob:
return sample
im = sample['image']
height, width = im.shape[:2]
ratio = np.random.uniform(1., self.ratio)
h = int(height * ratio)
w = int(width * ratio)
if not h > height or not w > width:
return sample
y = np.random.randint(0, h - height)
x = np.random.randint(0, w - width)
offsets, size = [x, y], [h, w]
pad = Pad(size,
pad_mode=-1,
offsets=offsets,
fill_value=self.fill_value)
return pad(sample, context=context)
@register_op
class CropWithSampling(BaseOperator):
def __init__(self, batch_sampler, satisfy_all=False, avoid_no_bbox=True):
"""
Args:
batch_sampler (list): Multiple sets of different
parameters for cropping.
satisfy_all (bool): whether all boxes must satisfy.
e.g.[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap]
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super(CropWithSampling, self).__init__()
self.batch_sampler = batch_sampler
self.satisfy_all = satisfy_all
self.avoid_no_bbox = avoid_no_bbox
def apply(self, sample, context):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
assert 'image' in sample, "image data not found"
im = sample['image']
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
im_height, im_width = im.shape[:2]
gt_score = None
if 'gt_score' in sample:
gt_score = sample['gt_score']
sampled_bbox = []
gt_bbox = gt_bbox.tolist()
for sampler in self.batch_sampler:
found = 0
for i in range(sampler[1]):
if found >= sampler[0]:
break
sample_bbox = generate_sample_bbox(sampler)
if satisfy_sample_constraint(sampler, sample_bbox, gt_bbox,
self.satisfy_all):
sampled_bbox.append(sample_bbox)
found = found + 1
im = np.array(im)
while sampled_bbox:
idx = int(np.random.uniform(0, len(sampled_bbox)))
sample_bbox = sampled_bbox.pop(idx)
sample_bbox = clip_bbox(sample_bbox)
crop_bbox, crop_class, crop_score = \
filter_and_process(sample_bbox, gt_bbox, gt_class, scores=gt_score)
if self.avoid_no_bbox:
if len(crop_bbox) < 1:
continue
xmin = int(sample_bbox[0] * im_width)
xmax = int(sample_bbox[2] * im_width)
ymin = int(sample_bbox[1] * im_height)
ymax = int(sample_bbox[3] * im_height)
im = im[ymin:ymax, xmin:xmax]
sample['image'] = im
sample['gt_bbox'] = crop_bbox
sample['gt_class'] = crop_class
sample['gt_score'] = crop_score
return sample
return sample
@register_op
class CropWithDataAchorSampling(BaseOperator):
def __init__(self,
batch_sampler,
anchor_sampler=None,
target_size=None,
das_anchor_scales=[16, 32, 64, 128],
sampling_prob=0.5,
min_size=8.,
avoid_no_bbox=True):
"""
Args:
anchor_sampler (list): anchor_sampling sets of different
parameters for cropping.
batch_sampler (list): Multiple sets of different
parameters for cropping.
e.g.[[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]]
[[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]]
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap, min coverage, max coverage]
target_size (bool): target image size.
das_anchor_scales (list[float]): a list of anchor scales in data
anchor smapling.
min_size (float): minimum size of sampled bbox.
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super(CropWithDataAchorSampling, self).__init__()
self.anchor_sampler = anchor_sampler
self.batch_sampler = batch_sampler
self.target_size = target_size
self.sampling_prob = sampling_prob
self.min_size = min_size
self.avoid_no_bbox = avoid_no_bbox
self.das_anchor_scales = np.array(das_anchor_scales)
def apply(self, sample, context):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
assert 'image' in sample, "image data not found"
im = sample['image']
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
image_height, image_width = im.shape[:2]
gt_score = None
if 'gt_score' in sample:
gt_score = sample['gt_score']
sampled_bbox = []
gt_bbox = gt_bbox.tolist()
prob = np.random.uniform(0., 1.)
if prob > self.sampling_prob: # anchor sampling
assert self.anchor_sampler
for sampler in self.anchor_sampler:
found = 0
for i in range(sampler[1]):
if found >= sampler[0]:
break
sample_bbox = data_anchor_sampling(
gt_bbox, image_width, image_height,
self.das_anchor_scales, self.target_size)
if sample_bbox == 0:
break
if satisfy_sample_constraint_coverage(sampler, sample_bbox,
gt_bbox):
sampled_bbox.append(sample_bbox)
found = found + 1
im = np.array(im)
while sampled_bbox:
idx = int(np.random.uniform(0, len(sampled_bbox)))
sample_bbox = sampled_bbox.pop(idx)
if 'gt_keypoint' in sample.keys():
keypoints = (sample['gt_keypoint'],
sample['keypoint_ignore'])
crop_bbox, crop_class, crop_score, gt_keypoints = \
filter_and_process(sample_bbox, gt_bbox, gt_class,
scores=gt_score,
keypoints=keypoints)
else:
crop_bbox, crop_class, crop_score = filter_and_process(
sample_bbox, gt_bbox, gt_class, scores=gt_score)
crop_bbox, crop_class, crop_score = bbox_area_sampling(
crop_bbox, crop_class, crop_score, self.target_size,
self.min_size)
if self.avoid_no_bbox:
if len(crop_bbox) < 1:
continue
im = crop_image_sampling(im, sample_bbox, image_width,
image_height, self.target_size)
sample['image'] = im
sample['gt_bbox'] = crop_bbox
sample['gt_class'] = crop_class
sample['gt_score'] = crop_score
if 'gt_keypoint' in sample.keys():
sample['gt_keypoint'] = gt_keypoints[0]
sample['keypoint_ignore'] = gt_keypoints[1]
return sample
return sample
else:
for sampler in self.batch_sampler:
found = 0
for i in range(sampler[1]):
if found >= sampler[0]:
break
sample_bbox = generate_sample_bbox_square(
sampler, image_width, image_height)
if satisfy_sample_constraint_coverage(sampler, sample_bbox,
gt_bbox):
sampled_bbox.append(sample_bbox)
found = found + 1
im = np.array(im)
while sampled_bbox:
idx = int(np.random.uniform(0, len(sampled_bbox)))
sample_bbox = sampled_bbox.pop(idx)
sample_bbox = clip_bbox(sample_bbox)
if 'gt_keypoint' in sample.keys():
keypoints = (sample['gt_keypoint'],
sample['keypoint_ignore'])
crop_bbox, crop_class, crop_score, gt_keypoints = \
filter_and_process(sample_bbox, gt_bbox, gt_class,
scores=gt_score,
keypoints=keypoints)
else:
crop_bbox, crop_class, crop_score = filter_and_process(
sample_bbox, gt_bbox, gt_class, scores=gt_score)
# sampling bbox according the bbox area
crop_bbox, crop_class, crop_score = bbox_area_sampling(
crop_bbox, crop_class, crop_score, self.target_size,
self.min_size)
if self.avoid_no_bbox:
if len(crop_bbox) < 1:
continue
xmin = int(sample_bbox[0] * image_width)
xmax = int(sample_bbox[2] * image_width)
ymin = int(sample_bbox[1] * image_height)
ymax = int(sample_bbox[3] * image_height)
im = im[ymin:ymax, xmin:xmax]
sample['image'] = im
sample['gt_bbox'] = crop_bbox
sample['gt_class'] = crop_class
sample['gt_score'] = crop_score
if 'gt_keypoint' in sample.keys():
sample['gt_keypoint'] = gt_keypoints[0]
sample['keypoint_ignore'] = gt_keypoints[1]
return sample
return sample
@register_op
class RandomCropOp(BaseOperator):
"""Random crop image and bboxes.
Args:
aspect_ratio (list): aspect ratio of cropped region.
in [min, max] format.
thresholds (list): iou thresholds for decide a valid bbox crop.
scaling (list): ratio between a cropped region and the original image.
in [min, max] format.
num_attempts (int): number of tries before giving up.
allow_no_crop (bool): allow return without actually cropping them.
cover_all_box (bool): ensure all bboxes are covered in the final crop.
is_mask_crop(bool): whether crop the segmentation.
"""
def __init__(self,
aspect_ratio=[.5, 2.],
thresholds=[.0, .1, .3, .5, .7, .9],
scaling=[.3, 1.],
num_attempts=50,
allow_no_crop=True,
cover_all_box=False,
is_mask_crop=False):
super(RandomCropOp, self).__init__()
self.aspect_ratio = aspect_ratio
self.thresholds = thresholds
self.scaling = scaling
self.num_attempts = num_attempts
self.allow_no_crop = allow_no_crop
self.cover_all_box = cover_all_box
self.is_mask_crop = is_mask_crop
def crop_segms(self, segms, valid_ids, crop, height, width):
def _crop_poly(segm, crop):
xmin, ymin, xmax, ymax = crop
crop_coord = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
crop_p = np.array(crop_coord).reshape(4, 2)
crop_p = Polygon(crop_p)
crop_segm = list()
for poly in segm:
poly = np.array(poly).reshape(len(poly) // 2, 2)
polygon = Polygon(poly)
if not polygon.is_valid:
exterior = polygon.exterior
multi_lines = exterior.intersection(exterior)
polygons = shapely.ops.polygonize(multi_lines)
polygon = MultiPolygon(polygons)
multi_polygon = list()
if isinstance(polygon, MultiPolygon):
multi_polygon = copy.deepcopy(polygon)
else:
multi_polygon.append(copy.deepcopy(polygon))
for per_polygon in multi_polygon:
inter = per_polygon.intersection(crop_p)
if not inter:
continue
if isinstance(inter, (MultiPolygon, GeometryCollection)):
for part in inter:
if not isinstance(part, Polygon):
continue
part = np.squeeze(
np.array(part.exterior.coords[:-1]).reshape(1,
-1))
part[0::2] -= xmin
part[1::2] -= ymin
crop_segm.append(part.tolist())
elif isinstance(inter, Polygon):
crop_poly = np.squeeze(
np.array(inter.exterior.coords[:-1]).reshape(1, -1))
crop_poly[0::2] -= xmin
crop_poly[1::2] -= ymin
crop_segm.append(crop_poly.tolist())
else:
continue
return crop_segm
def _crop_rle(rle, crop, height, width):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects(rle, height, width)
mask = mask_util.decode(rle)
mask = mask[crop[1]:crop[3], crop[0]:crop[2]]
rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
return rle
crop_segms = []
for id in valid_ids:
segm = segms[id]
if is_poly(segm):
import copy
import shapely.ops
from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
logging.getLogger("shapely").setLevel(logging.WARNING)
# Polygon format
crop_segms.append(_crop_poly(segm, crop))
else:
# RLE format
import pycocotools.mask as mask_util
crop_segms.append(_crop_rle(segm, crop, height, width))
return crop_segms
def apply(self, sample, context=None):
if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
return sample
h, w = sample['image'].shape[:2]
gt_bbox = sample['gt_bbox']
# NOTE Original method attempts to generate one candidate for each
# threshold then randomly sample one from the resulting list.
# Here a short circuit approach is taken, i.e., randomly choose a
# threshold and attempt to find a valid crop, and simply return the
# first one found.
# The probability is not exactly the same, kinda resembling the
# "Monty Hall" problem. Actually carrying out the attempts will affect
# observability (just like opening doors in the "Monty Hall" game).
thresholds = list(self.thresholds)
if self.allow_no_crop:
thresholds.append('no_crop')
np.random.shuffle(thresholds)
for thresh in thresholds:
if thresh == 'no_crop':
return sample
found = False
for i in range(self.num_attempts):
scale = np.random.uniform(*self.scaling)
if self.aspect_ratio is not None:
min_ar, max_ar = self.aspect_ratio
aspect_ratio = np.random.uniform(
max(min_ar, scale**2), min(max_ar, scale**-2))
h_scale = scale / np.sqrt(aspect_ratio)
w_scale = scale * np.sqrt(aspect_ratio)
else:
h_scale = np.random.uniform(*self.scaling)
w_scale = np.random.uniform(*self.scaling)
crop_h = h * h_scale
crop_w = w * w_scale
if self.aspect_ratio is None:
if crop_h / crop_w < 0.5 or crop_h / crop_w > 2.0:
continue
crop_h = int(crop_h)
crop_w = int(crop_w)
crop_y = np.random.randint(0, h - crop_h)
crop_x = np.random.randint(0, w - crop_w)
crop_box = [crop_x, crop_y, crop_x + crop_w, crop_y + crop_h]
iou = self._iou_matrix(
gt_bbox, np.array(
[crop_box], dtype=np.float32))
if iou.max() < thresh:
continue
if self.cover_all_box and iou.min() < thresh:
continue
cropped_box, valid_ids = self._crop_box_with_center_constraint(
gt_bbox, np.array(
crop_box, dtype=np.float32))
if valid_ids.size > 0:
found = True
break
if found:
if self.is_mask_crop and 'gt_poly' in sample and len(sample[
'gt_poly']) > 0:
crop_polys = self.crop_segms(
sample['gt_poly'],
valid_ids,
np.array(
crop_box, dtype=np.int64),
h,
w)
if [] in crop_polys:
delete_id = list()
valid_polys = list()
for id, crop_poly in enumerate(crop_polys):
if crop_poly == []:
delete_id.append(id)
else:
valid_polys.append(crop_poly)
valid_ids = np.delete(valid_ids, delete_id)
if len(valid_polys) == 0:
return sample
sample['gt_poly'] = valid_polys
else:
sample['gt_poly'] = crop_polys
if 'gt_segm' in sample:
sample['gt_segm'] = self._crop_segm(sample['gt_segm'],
crop_box)
sample['gt_segm'] = np.take(
sample['gt_segm'], valid_ids, axis=0)
sample['image'] = self._crop_image(sample['image'], crop_box)
sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
sample['gt_class'] = np.take(
sample['gt_class'], valid_ids, axis=0)
if 'gt_score' in sample:
sample['gt_score'] = np.take(
sample['gt_score'], valid_ids, axis=0)
if 'is_crowd' in sample:
sample['is_crowd'] = np.take(
sample['is_crowd'], valid_ids, axis=0)
return sample
return sample
def _iou_matrix(self, a, b):
tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
area_o = (area_a[:, np.newaxis] + area_b - area_i)
return area_i / (area_o + 1e-10)
def _crop_box_with_center_constraint(self, box, crop):
cropped_box = box.copy()
cropped_box[:, :2] = np.maximum(box[:, :2], crop[:2])
cropped_box[:, 2:] = np.minimum(box[:, 2:], crop[2:])
cropped_box[:, :2] -= crop[:2]
cropped_box[:, 2:] -= crop[:2]
centers = (box[:, :2] + box[:, 2:]) / 2
valid = np.logical_and(crop[:2] <= centers,
centers < crop[2:]).all(axis=1)
valid = np.logical_and(
valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
return cropped_box, np.where(valid)[0]
def _crop_image(self, img, crop):
x1, y1, x2, y2 = crop
return img[y1:y2, x1:x2, :]
def _crop_segm(self, segm, crop):
x1, y1, x2, y2 = crop
return segm[:, y1:y2, x1:x2]
@register_op
class RandomScaledCropOp(BaseOperator):
"""Resize image and bbox based on long side (with optional random scaling),
then crop or pad image to target size.
Args:
target_dim (int): target size.
scale_range (list): random scale range.
interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
"""
def __init__(self,
target_dim=512,
scale_range=[.1, 2.],
interp=cv2.INTER_LINEAR):
super(RandomScaledCropOp, self).__init__()
self.target_dim = target_dim
self.scale_range = scale_range
self.interp = interp
def apply(self, sample, context=None):
img = sample['image']
h, w = img.shape[:2]
random_scale = np.random.uniform(*self.scale_range)
dim = self.target_dim
random_dim = int(dim * random_scale)
dim_max = max(h, w)
scale = random_dim / dim_max
resize_w = w * scale
resize_h = h * scale
offset_x = int(max(0, np.random.uniform(0., resize_w - dim)))
offset_y = int(max(0, np.random.uniform(0., resize_h - dim)))
img = cv2.resize(img, (resize_w, resize_h), interpolation=self.interp)
img = np.array(img)
canvas = np.zeros((dim, dim, 3), dtype=img.dtype)
canvas[:min(dim, resize_h), :min(dim, resize_w), :] = img[
offset_y:offset_y + dim, offset_x:offset_x + dim, :]
sample['image'] = canvas
sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
scale_factor = sample['sacle_factor']
sample['scale_factor'] = np.asarray(
[scale_factor[0] * scale, scale_factor[1] * scale],
dtype=np.float32)
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
scale_array = np.array([scale, scale] * 2, dtype=np.float32)
shift_array = np.array([offset_x, offset_y] * 2, dtype=np.float32)
boxes = sample['gt_bbox'] * scale_array - shift_array
boxes = np.clip(boxes, 0, dim - 1)
# filter boxes with no area
area = np.prod(boxes[..., 2:] - boxes[..., :2], axis=1)
valid = (area > 1.).nonzero()[0]
sample['gt_bbox'] = boxes[valid]
sample['gt_class'] = sample['gt_class'][valid]
return sample
@register_op
class CutmixOp(BaseOperator):
def __init__(self, alpha=1.5, beta=1.5):
"""
CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
Cutmix image and gt_bbbox/gt_score
Args:
alpha (float): alpha parameter of beta distribute
beta (float): beta parameter of beta distribute
"""
super(CutmixOp, self).__init__()
self.alpha = alpha
self.beta = beta
if self.alpha <= 0.0:
raise ValueError("alpha shold be positive in {}".format(self))
if self.beta <= 0.0:
raise ValueError("beta shold be positive in {}".format(self))
def apply_image(self, img1, img2, factor):
""" _rand_bbox """
h = max(img1.shape[0], img2.shape[0])
w = max(img1.shape[1], img2.shape[1])
cut_rat = np.sqrt(1. - factor)
cut_w = np.int(w * cut_rat)
cut_h = np.int(h * cut_rat)
# uniform
cx = np.random.randint(w)
cy = np.random.randint(h)
bbx1 = np.clip(cx - cut_w // 2, 0, w - 1)
bby1 = np.clip(cy - cut_h // 2, 0, h - 1)
bbx2 = np.clip(cx + cut_w // 2, 0, w - 1)
bby2 = np.clip(cy + cut_h // 2, 0, h - 1)
img_1 = np.zeros((h, w, img1.shape[2]), 'float32')
img_1[:img1.shape[0], :img1.shape[1], :] = \
img1.astype('float32')
img_2 = np.zeros((h, w, img2.shape[2]), 'float32')
img_2[:img2.shape[0], :img2.shape[1], :] = \
img2.astype('float32')
img_1[bby1:bby2, bbx1:bbx2, :] = img2[bby1:bby2, bbx1:bbx2, :]
return img_1
def __call__(self, sample, context=None):
if not isinstance(sample, Sequence):
return sample
assert len(sample) == 2, 'cutmix need two samples'
factor = np.random.beta(self.alpha, self.beta)
factor = max(0.0, min(1.0, factor))
if factor >= 1.0:
return sample[0]
if factor <= 0.0:
return sample[1]
img1 = sample[0]['image']
img2 = sample[1]['image']
img = self.apply_image(img1, img2, factor)
gt_bbox1 = sample[0]['gt_bbox']
gt_bbox2 = sample[1]['gt_bbox']
gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
gt_class1 = sample[0]['gt_class']
gt_class2 = sample[1]['gt_class']
gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
gt_score1 = sample[0]['gt_score']
gt_score2 = sample[1]['gt_score']
gt_score = np.concatenate(
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
sample = sample[0]
sample['image'] = img
sample['gt_bbox'] = gt_bbox
sample['gt_score'] = gt_score
sample['gt_class'] = gt_class
return sample
@register_op
class MixupOp(BaseOperator):
def __init__(self, alpha=1.5, beta=1.5):
""" Mixup image and gt_bbbox/gt_score
Args:
alpha (float): alpha parameter of beta distribute
beta (float): beta parameter of beta distribute
"""
super(MixupOp, self).__init__()
self.alpha = alpha
self.beta = beta
if self.alpha <= 0.0:
raise ValueError("alpha shold be positive in {}".format(self))
if self.beta <= 0.0:
raise ValueError("beta shold be positive in {}".format(self))
def apply_image(self, img1, img2, factor):
h = max(img1.shape[0], img2.shape[0])
w = max(img1.shape[1], img2.shape[1])
img = np.zeros((h, w, img1.shape[2]), 'float32')
img[:img1.shape[0], :img1.shape[1], :] = \
img1.astype('float32') * factor
img[:img2.shape[0], :img2.shape[1], :] += \
img2.astype('float32') * (1.0 - factor)
return img.astype('uint8')
def __call__(self, sample, context=None):
if not isinstance(sample, Sequence):
return sample
assert len(sample) == 2, 'mixup need two samples'
factor = np.random.beta(self.alpha, self.beta)
factor = max(0.0, min(1.0, factor))
if factor >= 1.0:
return sample[0]
if factor <= 0.0:
return sample[1]
im = self.apply_image(sample[0]['image'], sample[1]['image'], factor)
result = copy.deepcopy(sample[0])
result['image'] = im
# apply bbox and score
if 'gt_bbox' in sample[0]:
gt_bbox1 = sample[0]['gt_bbox']
gt_bbox2 = sample[1]['gt_bbox']
gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
result['gt_bbox'] = gt_bbox
if 'gt_class' in sample[0]:
gt_class1 = sample[0]['gt_class']
gt_class2 = sample[1]['gt_class']
gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
result['gt_class'] = gt_class
gt_score1 = np.ones_like(sample[0]['gt_class'])
gt_score2 = np.ones_like(sample[1]['gt_class'])
gt_score = np.concatenate(
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
result['gt_score'] = gt_score
if 'is_crowd' in sample[0]:
is_crowd1 = sample[0]['is_crowd']
is_crowd2 = sample[1]['is_crowd']
is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
result['is_crowd'] = is_crowd
if 'difficult' in sample[0]:
is_difficult1 = sample[0]['difficult']
is_difficult2 = sample[1]['difficult']
is_difficult = np.concatenate(
(is_difficult1, is_difficult2), axis=0)
result['difficult'] = is_difficult
return result
@register_op
class NormalizeBoxOp(BaseOperator):
"""Transform the bounding box's coornidates to [0,1]."""
def __init__(self):
super(NormalizeBoxOp, self).__init__()
def apply(self, sample, context):
im = sample['image']
gt_bbox = sample['gt_bbox']
height, width, _ = im.shape
for i in range(gt_bbox.shape[0]):
gt_bbox[i][0] = gt_bbox[i][0] / width
gt_bbox[i][1] = gt_bbox[i][1] / height
gt_bbox[i][2] = gt_bbox[i][2] / width
gt_bbox[i][3] = gt_bbox[i][3] / height
sample['gt_bbox'] = gt_bbox
if 'gt_keypoint' in sample.keys():
gt_keypoint = sample['gt_keypoint']
for i in range(gt_keypoint.shape[1]):
if i % 2:
gt_keypoint[:, i] = gt_keypoint[:, i] / height
else:
gt_keypoint[:, i] = gt_keypoint[:, i] / width
sample['gt_keypoint'] = gt_keypoint
return sample
@register_op
class BboxXYXY2XYWHOp(BaseOperator):
"""
Convert bbox XYXY format to XYWH format.
"""
def __init__(self):
super(BboxXYXY2XYWHOp, self).__init__()
def apply(self, sample, context=None):
assert 'gt_bbox' in sample
bbox = sample['gt_bbox']
bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2]
bbox[:, :2] = bbox[:, :2] + bbox[:, 2:4] / 2.
sample['gt_bbox'] = bbox
return sample
@register_op
class PadBoxOp(BaseOperator):
def __init__(self, num_max_boxes=50):
"""
Pad zeros to bboxes if number of bboxes is less than num_max_boxes.
Args:
num_max_boxes (int): the max number of bboxes
"""
self.num_max_boxes = num_max_boxes
super(PadBoxOp, self).__init__()
def apply(self, sample, context=None):
assert 'gt_bbox' in sample
bbox = sample['gt_bbox']
gt_num = min(self.num_max_boxes, len(bbox))
num_max = self.num_max_boxes
# fields = context['fields'] if context else []
pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
if gt_num > 0:
pad_bbox[:gt_num, :] = bbox[:gt_num, :]
sample['gt_bbox'] = pad_bbox
if 'gt_class' in sample:
pad_class = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_class[:gt_num] = sample['gt_class'][:gt_num, 0]
sample['gt_class'] = pad_class
if 'gt_score' in sample:
pad_score = np.zeros((num_max, ), dtype=np.float32)
if gt_num > 0:
pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
sample['gt_score'] = pad_score
# in training, for example in op ExpandImage,
# the bbox and gt_class is expandded, but the difficult is not,
# so, judging by it's length
if 'difficult' in sample:
pad_diff = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
sample['difficult'] = pad_diff
if 'is_crowd' in sample:
pad_crowd = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
sample['is_crowd'] = pad_crowd
return sample
@register_op
class DebugVisibleImageOp(BaseOperator):
"""
In debug mode, visualize images according to `gt_box`.
(Currently only supported when not cropping and flipping image.)
"""
def __init__(self, output_dir='output/debug', is_normalized=False):
super(DebugVisibleImageOp, self).__init__()
self.is_normalized = is_normalized
self.output_dir = output_dir
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
if not isinstance(self.is_normalized, bool):
raise TypeError("{}: input type is invalid.".format(self))
def apply(self, sample, context=None):
image = Image.open(sample['im_file']).convert('RGB')
out_file_name = sample['im_file'].split('/')[-1]
width = sample['w']
height = sample['h']
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
draw = ImageDraw.Draw(image)
for i in range(gt_bbox.shape[0]):
if self.is_normalized:
gt_bbox[i][0] = gt_bbox[i][0] * width
gt_bbox[i][1] = gt_bbox[i][1] * height
gt_bbox[i][2] = gt_bbox[i][2] * width
gt_bbox[i][3] = gt_bbox[i][3] * height
xmin, ymin, xmax, ymax = gt_bbox[i]
draw.line(
[(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
(xmin, ymin)],
width=2,
fill='green')
# draw label
text = str(gt_class[i][0])
tw, th = draw.textsize(text)
draw.rectangle(
[(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
if 'gt_keypoint' in sample.keys():
gt_keypoint = sample['gt_keypoint']
if self.is_normalized:
for i in range(gt_keypoint.shape[1]):
if i % 2:
gt_keypoint[:, i] = gt_keypoint[:, i] * height
else:
gt_keypoint[:, i] = gt_keypoint[:, i] * width
for i in range(gt_keypoint.shape[0]):
keypoint = gt_keypoint[i]
for j in range(int(keypoint.shape[0] / 2)):
x1 = round(keypoint[2 * j]).astype(np.int32)
y1 = round(keypoint[2 * j + 1]).astype(np.int32)
draw.ellipse(
(x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
save_path = os.path.join(self.output_dir, out_file_name)
image.save(save_path, quality=95)
return sample
@register_op
class Pad(BaseOperator):
def __init__(self,
size=None,
size_divisor=32,
pad_mode=0,
offsets=None,
fill_value=(127.5, 127.5, 127.5)):
"""
Pad image to a specified size or multiple of size_divisor. random target_size and interpolation method
Args:
size (int, Sequence): image target size, if None, pad to multiple of size_divisor, default None
size_divisor (int): size divisor, default 32
pad_mode (int): pad mode, currently only supports four modes [-1, 0, 1, 2]. if -1, use specified offsets
if 0, only pad to right and bottom. if 1, pad according to center. if 2, only pad left and top
fill_value (bool): rgb value of pad area, default (127.5, 127.5, 127.5)
"""
super(Pad, self).__init__()
if not isinstance(size, (int, Sequence)):
raise TypeError(
"Type of target_size is invalid when random_size is True. \
Must be List, now is {}".format(type(size)))
if isinstance(size, int):
size = [size, size]
assert pad_mode in [
-1, 0, 1, 2
], 'currently only supports four modes [-1, 0, 1, 2]'
assert pad_mode == -1 and offsets, 'if pad_mode is -1, offsets should not be None'
self.size = size
self.size_divisor = size_divisor
self.pad_mode = pad_mode
self.fill_value = fill_value
self.offsets = offsets
def apply_segm(self, segms, offsets, im_size, size):
def _expand_poly(poly, x, y):
expanded_poly = np.array(poly)
expanded_poly[0::2] += x
expanded_poly[1::2] += y
return expanded_poly.tolist()
def _expand_rle(rle, x, y, height, width, h, w):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects(rle, height, width)
mask = mask_util.decode(rle)
expanded_mask = np.full((h, w), 0).astype(mask.dtype)
expanded_mask[y:y + height, x:x + width] = mask
rle = mask_util.encode(
np.array(
expanded_mask, order='F', dtype=np.uint8))
return rle
x, y = offsets
height, width = im_size
h, w = size
expanded_segms = []
for segm in segms:
if is_poly(segm):
# Polygon format
expanded_segms.append(
[_expand_poly(poly, x, y) for poly in segm])
else:
# RLE format
import pycocotools.mask as mask_util
expanded_segms.append(
_expand_rle(segm, x, y, height, width, h, w))
return expanded_segms
def apply_bbox(self, bbox, offsets):
return bbox + np.array(offsets * 2, dtype=np.float32)
def apply_keypoint(self, keypoints, offsets):
n = len(keypoints[0]) // 2
return keypoints + np.array(offsets * n, dtype=np.float32)
def apply_image(self, image, offsets, im_size, size):
x, y = offsets
im_h, im_w = im_size
h, w = size
canvas = np.ones((h, w, 3), dtype=np.float32)
canvas *= np.array(self.fill_value, dtype=np.float32)
canvas[y:y + im_h, x:x + im_w, :] = image.astype(np.float32)
return canvas
def apply(self, sample, context=None):
im = sample['image']
im_h, im_w = im.shape[:2]
if self.size:
h, w = self.size
assert (
im_h < h and im_w < w
), '(h, w) of target size should be greater than (im_h, im_w)'
else:
h = np.ceil(im_h // self.size_divisor) * self.size_divisor
w = np.ceil(im_w / self.size_divisor) * self.size_divisor
if h == im_h and w == im_w:
return sample
if self.pad_mode == -1:
offset_x, offset_y = self.offsets
elif self.pad_mode == 0:
offset_y, offset_x = 0, 0
elif self.pad_mode == 1:
offset_y, offset_x = (h - im_h) // 2, (w - im_w) // 2
else:
offset_y, offset_x = h - im_h, w - im_w
offsets, im_size, size = [offset_x, offset_y], [im_h, im_w], [h, w]
sample['image'] = self.apply_image(im, offsets, im_size, size)
if self.pad_mode == 0:
return sample
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], offsets)
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], offsets,
im_size, size)
if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
sample['gt_keypoint'] = self.apply_keypoint(sample['gt_keypoint'],
offsets)
return sample
@register_op
class Poly2Mask(BaseOperator):
"""
gt poly to mask annotations
"""
def __init__(self):
super(Poly2Mask, self).__init__()
import pycocotools.mask as maskUtils
self.maskutils = maskUtils
def _poly2mask(self, mask_ann, img_h, img_w):
if isinstance(mask_ann, list):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
rle = self.maskutils.merge(rles)
elif isinstance(mask_ann['counts'], list):
# uncompressed RLE
rle = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
else:
# rle
rle = mask_ann
mask = self.maskutils.decode(rle)
return mask
def apply(self, sample, context=None):
assert 'gt_poly' in sample
im_h = sample['h']
im_w = sample['w']
masks = [
self._poly2mask(gt_poly, im_h, im_w)
for gt_poly in sample['gt_poly']
]
sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
return sample
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -25,20 +25,20 @@ try:
except Exception:
from collections import Sequence
from numbers import Number
from numbers import Number, Integral
import uuid
import random
import math
import numpy as np
import os
import copy
import cv2
from PIL import Image, ImageEnhance, ImageDraw
from ppdet.core.workspace import serializable
from ppdet.modeling.layers import AnchorGrid
from .operator import register_op, BaseOperator, BboxError, ImageError
from .op_helper import (satisfy_sample_constraint, filter_and_process,
generate_sample_bbox, clip_bbox, data_anchor_sampling,
......@@ -49,40 +49,81 @@ from .op_helper import (satisfy_sample_constraint, filter_and_process,
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
registered_ops = []
@register_op
class DecodeImage(BaseOperator):
def __init__(self, to_rgb=True, with_mixup=False, with_cutmix=False):
""" Transform the image data to numpy format.
def register_op(cls):
registered_ops.append(cls.__name__)
if not hasattr(BaseOperator, cls.__name__):
setattr(BaseOperator, cls.__name__, cls)
else:
raise KeyError("The {} class has been registered.".format(cls.__name__))
return serializable(cls)
class BboxError(ValueError):
pass
class ImageError(ValueError):
pass
class BaseOperator(object):
def __init__(self, name=None):
if name is None:
name = self.__class__.__name__
self._id = name + '_' + str(uuid.uuid4())[-6:]
def apply(self, sample, context=None):
""" Process a sample.
Args:
to_rgb (bool): whether to convert BGR to RGB
with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score
with_cutmix (bool): whether or not to cutmix image and gt_bbbox/gt_score
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
context (dict): info about this sample processing
Returns:
result (dict): a processed sample
"""
return sample
super(DecodeImage, self).__init__()
self.to_rgb = to_rgb
self.with_mixup = with_mixup
self.with_cutmix = with_cutmix
if not isinstance(self.to_rgb, bool):
raise TypeError("{}: input type is invalid.".format(self))
if not isinstance(self.with_mixup, bool):
raise TypeError("{}: input type is invalid.".format(self))
if not isinstance(self.with_cutmix, bool):
raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, sample, context=None):
""" Process a sample.
Args:
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
context (dict): info about this sample processing
Returns:
result (dict): a processed sample
"""
if isinstance(sample, Sequence):
for i in range(len(sample)):
sample[i] = self.apply(sample[i], context)
else:
sample = self.apply(sample, context)
return sample
def __str__(self):
return str(self._id)
@register_op
class Decode(BaseOperator):
def __init__(self):
""" Transform the image data to numpy format following the rgb format
"""
super(Decode, self).__init__()
def __call__(self, sample):
def apply(self, sample, context=None):
""" load image if 'im_file' field is not empty but 'image' is"""
if 'image' not in sample:
with open(sample['im_file'], 'rb') as f:
sample['image'] = f.read()
sample.pop('im_file')
im = sample['image']
data = np.frombuffer(im, dtype='uint8')
im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
if self.to_rgb:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
sample['image'] = im
if 'h' not in sample:
sample['h'] = im.shape[0]
......@@ -101,353 +142,66 @@ class DecodeImage(BaseOperator):
"image width.".format(im.shape[1], sample['w']))
sample['w'] = im.shape[1]
# make default im_info with [h, w, 1]
sample['im_info'] = np.array(
[im.shape[0], im.shape[1], 1.], dtype=np.float32)
# decode mixup image
if self.with_mixup and 'mixup' in sample:
self.__call__(sample['mixup'])
# decode cutmix image
if self.with_cutmix and 'cutmix' in sample:
self.__call__(sample['cutmix'])
return sample
@register_op
class MultiscaleTestResize(BaseOperator):
def __init__(self,
origin_target_size=800,
origin_max_size=1333,
target_size=[],
max_size=2000,
interp=cv2.INTER_LINEAR,
use_flip=True):
"""
Rescale image to the each size in target size, and capped at max_size.
Args:
origin_target_size(int): original target size of image's short side.
origin_max_size(int): original max size of image.
target_size (list): A list of target sizes of image's short side.
max_size (int): the max size of image.
interp (int): the interpolation method.
use_flip (bool): whether use flip augmentation.
"""
super(MultiscaleTestResize, self).__init__()
self.origin_target_size = int(origin_target_size)
self.origin_max_size = int(origin_max_size)
self.max_size = int(max_size)
self.interp = int(interp)
self.use_flip = use_flip
if not isinstance(target_size, list):
raise TypeError(
"Type of target_size is invalid. Must be List, now is {}".
format(type(target_size)))
self.target_size = target_size
if not (isinstance(self.origin_target_size, int) and isinstance(
self.origin_max_size, int) and isinstance(self.max_size, int)
and isinstance(self.interp, int)):
raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, sample):
""" Resize the image numpy for multi-scale test.
"""
origin_ims = {}
im = sample['image']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
raise ImageError('{}: image is not 3-dimensional.'.format(self))
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
if float(im_size_min) == 0:
raise ZeroDivisionError('{}: min size of image is 0'.format(self))
base_name_list = ['image']
origin_ims['image'] = im
if self.use_flip:
sample['image_flip'] = im[:, ::-1, :]
base_name_list.append('image_flip')
origin_ims['image_flip'] = sample['image_flip']
for base_name in base_name_list:
im_scale = float(self.origin_target_size) / float(im_size_min)
# Prevent the biggest axis from being more than max_size
if np.round(im_scale * im_size_max) > self.origin_max_size:
im_scale = float(self.origin_max_size) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
resize_w = np.round(im_scale_x * float(im_shape[1]))
resize_h = np.round(im_scale_y * float(im_shape[0]))
im_resize = cv2.resize(
origin_ims[base_name],
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
sample[base_name] = im_resize
info_name = 'im_info' if base_name == 'image' else 'im_info_image_flip'
sample[base_name] = im_resize
sample[info_name] = np.array(
[resize_h, resize_w, im_scale], dtype=np.float32)
for i, size in enumerate(self.target_size):
im_scale = float(size) / float(im_size_min)
if np.round(im_scale * im_size_max) > self.max_size:
im_scale = float(self.max_size) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
resize_w = np.round(im_scale_x * float(im_shape[1]))
resize_h = np.round(im_scale_y * float(im_shape[0]))
im_resize = cv2.resize(
origin_ims[base_name],
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
im_info = [resize_h, resize_w, im_scale]
# hard-code here, must be consistent with
# ppdet/modeling/architectures/input_helper.py
name = base_name + '_scale_' + str(i)
info_name = 'im_info_' + name
sample[name] = im_resize
sample[info_name] = np.array(
[resize_h, resize_w, im_scale], dtype=np.float32)
sample['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
sample['scale_factor'] = np.array([1., 1.], dtype=np.float32)
return sample
@register_op
class ResizeImage(BaseOperator):
def __init__(self,
target_size=0,
max_size=0,
interp=cv2.INTER_LINEAR,
use_cv2=True):
class Permute(BaseOperator):
def __init__(self):
"""
Rescale image to the specified target size, and capped at max_size
if max_size != 0.
If target_size is list, selected a scale randomly as the specified
target size.
Args:
target_size (int|list): the target size of image's short side,
multi-scale training is adopted when type is list.
max_size (int): the max size of image
interp (int): the interpolation method
use_cv2 (bool): use the cv2 interpolation method or use PIL
interpolation method
Change the channel to be (C, H, W)
"""
super(ResizeImage, self).__init__()
self.max_size = int(max_size)
self.interp = int(interp)
self.use_cv2 = use_cv2
if not (isinstance(target_size, int) or isinstance(target_size, list)):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List, now is {}".
format(type(target_size)))
self.target_size = target_size
if not (isinstance(self.max_size, int) and isinstance(self.interp,
int)):
raise TypeError("{}: input type is invalid.".format(self))
super(Permute, self).__init__()
def __call__(self, sample, context=None):
""" Resize the image numpy.
"""
def apply(self, sample, context=None):
im = sample['image']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
raise ImageError('{}: image is not 3-dimensional.'.format(self))
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
if isinstance(self.target_size, list):
# Case for multi-scale training
selected_size = random.choice(self.target_size)
else:
selected_size = self.target_size
if float(im_size_min) == 0:
raise ZeroDivisionError('{}: min size of image is 0'.format(self))
if self.max_size != 0:
im_scale = float(selected_size) / float(im_size_min)
# Prevent the biggest axis from being more than max_size
if np.round(im_scale * im_size_max) > self.max_size:
im_scale = float(self.max_size) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
resize_w = im_scale_x * float(im_shape[1])
resize_h = im_scale_y * float(im_shape[0])
im_info = [resize_h, resize_w, im_scale]
if 'im_info' in sample and sample['im_info'][2] != 1.:
sample['im_info'] = np.append(
list(sample['im_info']), im_info).astype(np.float32)
else:
sample['im_info'] = np.array(im_info).astype(np.float32)
else:
im_scale_x = float(selected_size) / float(im_shape[1])
im_scale_y = float(selected_size) / float(im_shape[0])
resize_w = selected_size
resize_h = selected_size
if self.use_cv2:
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
else:
if self.max_size != 0:
raise TypeError(
'If you set max_size to cap the maximum size of image,'
'please set use_cv2 to True to resize the image.')
im = im.astype('uint8')
im = Image.fromarray(im)
im = im.resize((int(resize_w), int(resize_h)), self.interp)
im = np.array(im)
im = im.transpose((2, 0, 1))
sample['image'] = im
return sample
@register_op
class RandomFlipImage(BaseOperator):
def __init__(self, prob=0.5, is_normalized=False, is_mask_flip=False):
class Lighting(BaseOperator):
"""
Lighting the imagen by eigenvalues and eigenvectors
Args:
prob (float): the probability of flipping image
is_normalized (bool): whether the bbox scale to [0,1]
is_mask_flip (bool): whether flip the segmentation
eigval (list): eigenvalues
eigvec (list): eigenvectors
alphastd (float): random weight of lighting, 0.1 by default
"""
super(RandomFlipImage, self).__init__()
self.prob = prob
self.is_normalized = is_normalized
self.is_mask_flip = is_mask_flip
if not (isinstance(self.prob, float) and
isinstance(self.is_normalized, bool) and
isinstance(self.is_mask_flip, bool)):
raise TypeError("{}: input type is invalid.".format(self))
def flip_segms(self, segms, height, width):
def _flip_poly(poly, width):
flipped_poly = np.array(poly)
flipped_poly[0::2] = width - np.array(poly[0::2]) - 1
return flipped_poly.tolist()
def _flip_rle(rle, height, width):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects(rle, height, width)
mask = mask_util.decode(rle)
mask = mask[:, ::-1]
rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
return rle
flipped_segms = []
for segm in segms:
if is_poly(segm):
# Polygon format
flipped_segms.append([_flip_poly(poly, width) for poly in segm])
else:
# RLE format
import pycocotools.mask as mask_util
flipped_segms.append(_flip_rle(segm, height, width))
return flipped_segms
def flip_keypoint(self, gt_keypoint, width):
for i in range(gt_keypoint.shape[1]):
if i % 2 == 0:
old_x = gt_keypoint[:, i].copy()
if self.is_normalized:
gt_keypoint[:, i] = 1 - old_x
else:
gt_keypoint[:, i] = width - old_x - 1
return gt_keypoint
def __call__(self, sample):
"""Filp the image and bounding box.
Operators:
1. Flip the image numpy.
2. Transform the bboxes' x coordinates.
(Must judge whether the coordinates are normalized!)
3. Transform the segmentations' x coordinates.
(Must judge whether the coordinates are normalized!)
Output:
sample: the image, bounding box and segmentation part
in sample are flipped.
"""
def __init__(self, eigval, eigvec, alphastd=0.1):
super(Lighting, self).__init__()
self.alphastd = alphastd
self.eigval = np.array(eigval).astype('float32')
self.eigvec = np.array(eigvec).astype('float32')
samples = sample
batch_input = True
if not isinstance(samples, Sequence):
batch_input = False
samples = [samples]
for sample in samples:
gt_bbox = sample['gt_bbox']
im = sample['image']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image is not a numpy array.".format(self))
if len(im.shape) != 3:
raise ImageError("{}: image is not 3-dimensional.".format(self))
height, width, _ = im.shape
if np.random.uniform(0, 1) < self.prob:
im = im[:, ::-1, :]
if gt_bbox.shape[0] == 0:
return sample
oldx1 = gt_bbox[:, 0].copy()
oldx2 = gt_bbox[:, 2].copy()
if self.is_normalized:
gt_bbox[:, 0] = 1 - oldx2
gt_bbox[:, 2] = 1 - oldx1
else:
gt_bbox[:, 0] = width - oldx2 - 1
gt_bbox[:, 2] = width - oldx1 - 1
if gt_bbox.shape[0] != 0 and (
gt_bbox[:, 2] < gt_bbox[:, 0]).all():
m = "{}: invalid box, x2 should be greater than x1".format(
self)
raise BboxError(m)
sample['gt_bbox'] = gt_bbox
if self.is_mask_flip and len(sample['gt_poly']) != 0:
sample['gt_poly'] = self.flip_segms(sample['gt_poly'],
height, width)
if 'gt_keypoint' in sample.keys():
sample['gt_keypoint'] = self.flip_keypoint(
sample['gt_keypoint'], width)
sample['flipped'] = True
sample['image'] = im
sample = samples if batch_input else samples[0]
def apply(self, sample, context=None):
alpha = np.random.normal(scale=self.alphastd, size=(3, ))
sample['image'] += np.dot(self.eigvec, self.eigval * alpha)
return sample
@register_op
class RandomErasingImage(BaseOperator):
def __init__(self, prob=0.5, sl=0.02, sh=0.4, r1=0.3):
def __init__(self, prob=0.5, lower=0.02, higher=0.4, aspect_ratio=0.3):
"""
Random Erasing Data Augmentation, see https://arxiv.org/abs/1708.04896
Args:
prob (float): probability to carry out random erasing
sl (float): lower limit of the erasing area ratio
sh (float): upper limit of the erasing area ratio
r1 (float): aspect ratio of the erasing region
lower (float): lower limit of the erasing area ratio
heigher (float): upper limit of the erasing area ratio
aspect_ratio (float): aspect ratio of the erasing region
"""
super(RandomErasingImage, self).__init__()
self.prob = prob
self.sl = sl
self.sh = sh
self.r1 = r1
def __call__(self, sample):
samples = sample
batch_input = True
if not isinstance(samples, Sequence):
batch_input = False
samples = [samples]
for sample in samples:
self.lower = lower
self.heigher = heigher
self.aspect_ratio = aspect_ratio
def apply(self, sample):
gt_bbox = sample['gt_bbox']
im = sample['image']
if not isinstance(im, np.ndarray):
......@@ -464,8 +218,9 @@ class RandomErasingImage(BaseOperator):
h_bbox = y2 - y1 + 1
area = w_bbox * h_bbox
target_area = random.uniform(self.sl, self.sh) * area
aspect_ratio = random.uniform(self.r1, 1 / self.r1)
target_area = random.uniform(self.lower, self.higher) * area
aspect_ratio = random.uniform(self.aspect_ratio,
1 / self.aspect_ratio)
h = int(round(math.sqrt(target_area * aspect_ratio)))
w = int(round(math.sqrt(target_area / aspect_ratio)))
......@@ -473,16 +228,55 @@ class RandomErasingImage(BaseOperator):
if w < w_bbox and h < h_bbox:
off_y1 = random.randint(0, int(h_bbox - h))
off_x1 = random.randint(0, int(w_bbox - w))
im[int(y1 + off_y1):int(y1 + off_y1 + h), int(x1 + off_x1):
int(x1 + off_x1 + w), :] = 0
im[int(y1 + off_y1):int(y1 + off_y1 + h), int(x1 + off_x1):int(
x1 + off_x1 + w), :] = 0
sample['image'] = im
return sample
sample = samples if batch_input else samples[0]
@register_op
class NormalizeImage(BaseOperator):
def __init__(self, mean=[0.485, 0.456, 0.406], std=[1, 1, 1],
is_scale=True):
"""
Args:
mean (list): the pixel mean
std (list): the pixel variance
"""
super(NormalizeImage, self).__init__()
self.mean = mean
self.std = std
self.is_scale = is_scale
if not (isinstance(self.mean, list) and isinstance(self.std, list) and
isinstance(self.is_scale, bool)):
raise TypeError("{}: input type is invalid.".format(self))
from functools import reduce
if reduce(lambda x, y: x * y, self.std) == 0:
raise ValueError('{}: std is invalid!'.format(self))
def apply(self, sample, context=None):
"""Normalize the image.
Operators:
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
im = sample['image']
im = im.astype(np.float32, copy=False)
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
if self.is_scale:
im = im / 255.0
im -= mean
im /= std
sample['image'] = im
return sample
@register_op
class GridMaskOp(BaseOperator):
class GridMask(BaseOperator):
def __init__(self,
use_h=True,
use_w=True,
......@@ -504,7 +298,7 @@ class GridMaskOp(BaseOperator):
prob (float): max probability to carry out gridmask
upper_iter (int): suggested to be equal to global max_iter
"""
super(GridMaskOp, self).__init__()
super(GridMask, self).__init__()
self.use_h = use_h
self.use_w = use_w
self.rotate = rotate
......@@ -525,880 +319,23 @@ class GridMaskOp(BaseOperator):
prob=prob,
upper_iter=upper_iter)
def __call__(self, sample):
samples = sample
batch_input = True
if not isinstance(samples, Sequence):
batch_input = False
samples = [samples]
for sample in samples:
sample['image'] = self.gridmask_op(sample['image'],
sample['curr_iter'])
if not batch_input:
samples = samples[0]
def apply(self, sample, context=None):
sample['image'] = self.gridmask_op(sample['image'], sample['curr_iter'])
return sample
@register_op
class AutoAugmentImage(BaseOperator):
def __init__(self, is_normalized=False, autoaug_type="v1"):
"""
class RandomDistort(BaseOperator):
"""Random color distortion.
Args:
is_normalized (bool): whether the bbox scale to [0,1]
autoaug_type (str): autoaug type, support v0, v1, v2, v3, test
"""
super(AutoAugmentImage, self).__init__()
self.is_normalized = is_normalized
self.autoaug_type = autoaug_type
if not isinstance(self.is_normalized, bool):
raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, sample):
"""
Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172
"""
samples = sample
batch_input = True
if not isinstance(samples, Sequence):
batch_input = False
samples = [samples]
for sample in samples:
gt_bbox = sample['gt_bbox']
im = sample['image']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image is not a numpy array.".format(self))
if len(im.shape) != 3:
raise ImageError("{}: image is not 3-dimensional.".format(self))
if len(gt_bbox) == 0:
continue
# gt_boxes : [x1, y1, x2, y2]
# norm_gt_boxes: [y1, x1, y2, x2]
height, width, _ = im.shape
norm_gt_bbox = np.ones_like(gt_bbox, dtype=np.float32)
if not self.is_normalized:
norm_gt_bbox[:, 0] = gt_bbox[:, 1] / float(height)
norm_gt_bbox[:, 1] = gt_bbox[:, 0] / float(width)
norm_gt_bbox[:, 2] = gt_bbox[:, 3] / float(height)
norm_gt_bbox[:, 3] = gt_bbox[:, 2] / float(width)
else:
norm_gt_bbox[:, 0] = gt_bbox[:, 1]
norm_gt_bbox[:, 1] = gt_bbox[:, 0]
norm_gt_bbox[:, 2] = gt_bbox[:, 3]
norm_gt_bbox[:, 3] = gt_bbox[:, 2]
from .autoaugment_utils import distort_image_with_autoaugment
im, norm_gt_bbox = distort_image_with_autoaugment(im, norm_gt_bbox,
self.autoaug_type)
if not self.is_normalized:
gt_bbox[:, 0] = norm_gt_bbox[:, 1] * float(width)
gt_bbox[:, 1] = norm_gt_bbox[:, 0] * float(height)
gt_bbox[:, 2] = norm_gt_bbox[:, 3] * float(width)
gt_bbox[:, 3] = norm_gt_bbox[:, 2] * float(height)
else:
gt_bbox[:, 0] = norm_gt_bbox[:, 1]
gt_bbox[:, 1] = norm_gt_bbox[:, 0]
gt_bbox[:, 2] = norm_gt_bbox[:, 3]
gt_bbox[:, 3] = norm_gt_bbox[:, 2]
sample['gt_bbox'] = gt_bbox
sample['image'] = im
sample = samples if batch_input else samples[0]
return sample
@register_op
class NormalizeImage(BaseOperator):
def __init__(self,
mean=[0.485, 0.456, 0.406],
std=[1, 1, 1],
is_scale=True,
is_channel_first=True):
"""
Args:
mean (list): the pixel mean
std (list): the pixel variance
"""
super(NormalizeImage, self).__init__()
self.mean = mean
self.std = std
self.is_scale = is_scale
self.is_channel_first = is_channel_first
if not (isinstance(self.mean, list) and isinstance(self.std, list) and
isinstance(self.is_scale, bool)):
raise TypeError("{}: input type is invalid.".format(self))
from functools import reduce
if reduce(lambda x, y: x * y, self.std) == 0:
raise ValueError('{}: std is invalid!'.format(self))
def __call__(self, sample):
"""Normalize the image.
Operators:
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
samples = sample
batch_input = True
if not isinstance(samples, Sequence):
batch_input = False
samples = [samples]
for sample in samples:
for k in sample.keys():
# hard code
if k.startswith('image'):
im = sample[k]
im = im.astype(np.float32, copy=False)
if self.is_channel_first:
mean = np.array(self.mean)[:, np.newaxis, np.newaxis]
std = np.array(self.std)[:, np.newaxis, np.newaxis]
else:
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
if self.is_scale:
im = im / 255.0
im -= mean
im /= std
sample[k] = im
if not batch_input:
samples = samples[0]
return samples
@register_op
class RandomDistort(BaseOperator):
def __init__(self,
brightness_lower=0.5,
brightness_upper=1.5,
contrast_lower=0.5,
contrast_upper=1.5,
saturation_lower=0.5,
saturation_upper=1.5,
hue_lower=-18,
hue_upper=18,
brightness_prob=0.5,
contrast_prob=0.5,
saturation_prob=0.5,
hue_prob=0.5,
count=4,
is_order=False):
"""
Args:
brightness_lower/ brightness_upper (float): the brightness
between brightness_lower and brightness_upper
contrast_lower/ contrast_upper (float): the contrast between
contrast_lower and contrast_lower
saturation_lower/ saturation_upper (float): the saturation
between saturation_lower and saturation_upper
hue_lower/ hue_upper (float): the hue between
hue_lower and hue_upper
brightness_prob (float): the probability of changing brightness
contrast_prob (float): the probability of changing contrast
saturation_prob (float): the probability of changing saturation
hue_prob (float): the probability of changing hue
count (int): the kinds of doing distrot
is_order (bool): whether determine the order of distortion
"""
super(RandomDistort, self).__init__()
self.brightness_lower = brightness_lower
self.brightness_upper = brightness_upper
self.contrast_lower = contrast_lower
self.contrast_upper = contrast_upper
self.saturation_lower = saturation_lower
self.saturation_upper = saturation_upper
self.hue_lower = hue_lower
self.hue_upper = hue_upper
self.brightness_prob = brightness_prob
self.contrast_prob = contrast_prob
self.saturation_prob = saturation_prob
self.hue_prob = hue_prob
self.count = count
self.is_order = is_order
def random_brightness(self, img):
brightness_delta = np.random.uniform(self.brightness_lower,
self.brightness_upper)
prob = np.random.uniform(0, 1)
if prob < self.brightness_prob:
img = ImageEnhance.Brightness(img).enhance(brightness_delta)
return img
def random_contrast(self, img):
contrast_delta = np.random.uniform(self.contrast_lower,
self.contrast_upper)
prob = np.random.uniform(0, 1)
if prob < self.contrast_prob:
img = ImageEnhance.Contrast(img).enhance(contrast_delta)
return img
def random_saturation(self, img):
saturation_delta = np.random.uniform(self.saturation_lower,
self.saturation_upper)
prob = np.random.uniform(0, 1)
if prob < self.saturation_prob:
img = ImageEnhance.Color(img).enhance(saturation_delta)
return img
def random_hue(self, img):
hue_delta = np.random.uniform(self.hue_lower, self.hue_upper)
prob = np.random.uniform(0, 1)
if prob < self.hue_prob:
img = np.array(img.convert('HSV'))
img[:, :, 0] = img[:, :, 0] + hue_delta
img = Image.fromarray(img, mode='HSV').convert('RGB')
return img
def __call__(self, sample):
"""random distort the image"""
ops = [
self.random_brightness, self.random_contrast,
self.random_saturation, self.random_hue
]
if self.is_order:
prob = np.random.uniform(0, 1)
if prob < 0.5:
ops = [
self.random_brightness,
self.random_saturation,
self.random_hue,
self.random_contrast,
]
else:
ops = random.sample(ops, self.count)
assert 'image' in sample, "image data not found"
im = sample['image']
im = Image.fromarray(im)
for id in range(self.count):
im = ops[id](im)
im = np.asarray(im)
sample['image'] = im
return sample
@register_op
class ExpandImage(BaseOperator):
def __init__(self, max_ratio, prob, mean=[127.5, 127.5, 127.5]):
"""
Args:
max_ratio (float): the ratio of expanding
prob (float): the probability of expanding image
mean (list): the pixel mean
"""
super(ExpandImage, self).__init__()
self.max_ratio = max_ratio
self.mean = mean
self.prob = prob
def __call__(self, sample):
"""
Expand the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Construct new images with new height and width.
3. Fill the new image with the mean.
4. Put original imge into new image.
5. Rescale the bounding box.
6. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
prob = np.random.uniform(0, 1)
assert 'image' in sample, 'not found image data'
im = sample['image']
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
im_width = sample['w']
im_height = sample['h']
if prob < self.prob:
if self.max_ratio - 1 >= 0.01:
expand_ratio = np.random.uniform(1, self.max_ratio)
height = int(im_height * expand_ratio)
width = int(im_width * expand_ratio)
h_off = math.floor(np.random.uniform(0, height - im_height))
w_off = math.floor(np.random.uniform(0, width - im_width))
expand_bbox = [
-w_off / im_width, -h_off / im_height,
(width - w_off) / im_width, (height - h_off) / im_height
]
expand_im = np.ones((height, width, 3))
expand_im = np.uint8(expand_im * np.squeeze(self.mean))
expand_im = Image.fromarray(expand_im)
im = Image.fromarray(im)
expand_im.paste(im, (int(w_off), int(h_off)))
expand_im = np.asarray(expand_im)
if 'gt_keypoint' in sample.keys(
) and 'keypoint_ignore' in sample.keys():
keypoints = (sample['gt_keypoint'],
sample['keypoint_ignore'])
gt_bbox, gt_class, _, gt_keypoints = filter_and_process(
expand_bbox, gt_bbox, gt_class, keypoints=keypoints)
sample['gt_keypoint'] = gt_keypoints[0]
sample['keypoint_ignore'] = gt_keypoints[1]
else:
gt_bbox, gt_class, _ = filter_and_process(expand_bbox,
gt_bbox, gt_class)
sample['image'] = expand_im
sample['gt_bbox'] = gt_bbox
sample['gt_class'] = gt_class
sample['w'] = width
sample['h'] = height
return sample
@register_op
class CropImage(BaseOperator):
def __init__(self, batch_sampler, satisfy_all=False, avoid_no_bbox=True):
"""
Args:
batch_sampler (list): Multiple sets of different
parameters for cropping.
satisfy_all (bool): whether all boxes must satisfy.
e.g.[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap]
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super(CropImage, self).__init__()
self.batch_sampler = batch_sampler
self.satisfy_all = satisfy_all
self.avoid_no_bbox = avoid_no_bbox
def __call__(self, sample):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
assert 'image' in sample, "image data not found"
im = sample['image']
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
im_width = sample['w']
im_height = sample['h']
gt_score = None
if 'gt_score' in sample:
gt_score = sample['gt_score']
sampled_bbox = []
gt_bbox = gt_bbox.tolist()
for sampler in self.batch_sampler:
found = 0
for i in range(sampler[1]):
if found >= sampler[0]:
break
sample_bbox = generate_sample_bbox(sampler)
if satisfy_sample_constraint(sampler, sample_bbox, gt_bbox,
self.satisfy_all):
sampled_bbox.append(sample_bbox)
found = found + 1
im = np.array(im)
while sampled_bbox:
idx = int(np.random.uniform(0, len(sampled_bbox)))
sample_bbox = sampled_bbox.pop(idx)
sample_bbox = clip_bbox(sample_bbox)
crop_bbox, crop_class, crop_score = \
filter_and_process(sample_bbox, gt_bbox, gt_class, scores=gt_score)
if self.avoid_no_bbox:
if len(crop_bbox) < 1:
continue
xmin = int(sample_bbox[0] * im_width)
xmax = int(sample_bbox[2] * im_width)
ymin = int(sample_bbox[1] * im_height)
ymax = int(sample_bbox[3] * im_height)
im = im[ymin:ymax, xmin:xmax]
sample['image'] = im
sample['gt_bbox'] = crop_bbox
sample['gt_class'] = crop_class
sample['gt_score'] = crop_score
return sample
return sample
@register_op
class CropImageWithDataAchorSampling(BaseOperator):
def __init__(self,
batch_sampler,
anchor_sampler=None,
target_size=None,
das_anchor_scales=[16, 32, 64, 128],
sampling_prob=0.5,
min_size=8.,
avoid_no_bbox=True):
"""
Args:
anchor_sampler (list): anchor_sampling sets of different
parameters for cropping.
batch_sampler (list): Multiple sets of different
parameters for cropping.
e.g.[[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]]
[[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]]
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap, min coverage, max coverage]
target_size (bool): target image size.
das_anchor_scales (list[float]): a list of anchor scales in data
anchor smapling.
min_size (float): minimum size of sampled bbox.
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super(CropImageWithDataAchorSampling, self).__init__()
self.anchor_sampler = anchor_sampler
self.batch_sampler = batch_sampler
self.target_size = target_size
self.sampling_prob = sampling_prob
self.min_size = min_size
self.avoid_no_bbox = avoid_no_bbox
self.das_anchor_scales = np.array(das_anchor_scales)
def __call__(self, sample):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
assert 'image' in sample, "image data not found"
im = sample['image']
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
image_width = sample['w']
image_height = sample['h']
gt_score = None
if 'gt_score' in sample:
gt_score = sample['gt_score']
sampled_bbox = []
gt_bbox = gt_bbox.tolist()
prob = np.random.uniform(0., 1.)
if prob > self.sampling_prob: # anchor sampling
assert self.anchor_sampler
for sampler in self.anchor_sampler:
found = 0
for i in range(sampler[1]):
if found >= sampler[0]:
break
sample_bbox = data_anchor_sampling(
gt_bbox, image_width, image_height,
self.das_anchor_scales, self.target_size)
if sample_bbox == 0:
break
if satisfy_sample_constraint_coverage(sampler, sample_bbox,
gt_bbox):
sampled_bbox.append(sample_bbox)
found = found + 1
im = np.array(im)
while sampled_bbox:
idx = int(np.random.uniform(0, len(sampled_bbox)))
sample_bbox = sampled_bbox.pop(idx)
if 'gt_keypoint' in sample.keys():
keypoints = (sample['gt_keypoint'],
sample['keypoint_ignore'])
crop_bbox, crop_class, crop_score, gt_keypoints = \
filter_and_process(sample_bbox, gt_bbox, gt_class,
scores=gt_score,
keypoints=keypoints)
else:
crop_bbox, crop_class, crop_score = filter_and_process(
sample_bbox, gt_bbox, gt_class, scores=gt_score)
crop_bbox, crop_class, crop_score = bbox_area_sampling(
crop_bbox, crop_class, crop_score, self.target_size,
self.min_size)
if self.avoid_no_bbox:
if len(crop_bbox) < 1:
continue
im = crop_image_sampling(im, sample_bbox, image_width,
image_height, self.target_size)
sample['image'] = im
sample['gt_bbox'] = crop_bbox
sample['gt_class'] = crop_class
sample['gt_score'] = crop_score
if 'gt_keypoint' in sample.keys():
sample['gt_keypoint'] = gt_keypoints[0]
sample['keypoint_ignore'] = gt_keypoints[1]
return sample
return sample
else:
for sampler in self.batch_sampler:
found = 0
for i in range(sampler[1]):
if found >= sampler[0]:
break
sample_bbox = generate_sample_bbox_square(
sampler, image_width, image_height)
if satisfy_sample_constraint_coverage(sampler, sample_bbox,
gt_bbox):
sampled_bbox.append(sample_bbox)
found = found + 1
im = np.array(im)
while sampled_bbox:
idx = int(np.random.uniform(0, len(sampled_bbox)))
sample_bbox = sampled_bbox.pop(idx)
sample_bbox = clip_bbox(sample_bbox)
if 'gt_keypoint' in sample.keys():
keypoints = (sample['gt_keypoint'],
sample['keypoint_ignore'])
crop_bbox, crop_class, crop_score, gt_keypoints = \
filter_and_process(sample_bbox, gt_bbox, gt_class,
scores=gt_score,
keypoints=keypoints)
else:
crop_bbox, crop_class, crop_score = filter_and_process(
sample_bbox, gt_bbox, gt_class, scores=gt_score)
# sampling bbox according the bbox area
crop_bbox, crop_class, crop_score = bbox_area_sampling(
crop_bbox, crop_class, crop_score, self.target_size,
self.min_size)
if self.avoid_no_bbox:
if len(crop_bbox) < 1:
continue
xmin = int(sample_bbox[0] * image_width)
xmax = int(sample_bbox[2] * image_width)
ymin = int(sample_bbox[1] * image_height)
ymax = int(sample_bbox[3] * image_height)
im = im[ymin:ymax, xmin:xmax]
sample['image'] = im
sample['gt_bbox'] = crop_bbox
sample['gt_class'] = crop_class
sample['gt_score'] = crop_score
if 'gt_keypoint' in sample.keys():
sample['gt_keypoint'] = gt_keypoints[0]
sample['keypoint_ignore'] = gt_keypoints[1]
return sample
return sample
@register_op
class NormalizeBox(BaseOperator):
"""Transform the bounding box's coornidates to [0,1]."""
def __init__(self):
super(NormalizeBox, self).__init__()
def __call__(self, sample):
gt_bbox = sample['gt_bbox']
width = sample['w']
height = sample['h']
for i in range(gt_bbox.shape[0]):
gt_bbox[i][0] = gt_bbox[i][0] / width
gt_bbox[i][1] = gt_bbox[i][1] / height
gt_bbox[i][2] = gt_bbox[i][2] / width
gt_bbox[i][3] = gt_bbox[i][3] / height
sample['gt_bbox'] = gt_bbox
if 'gt_keypoint' in sample.keys():
gt_keypoint = sample['gt_keypoint']
for i in range(gt_keypoint.shape[1]):
if i % 2:
gt_keypoint[:, i] = gt_keypoint[:, i] / height
else:
gt_keypoint[:, i] = gt_keypoint[:, i] / width
sample['gt_keypoint'] = gt_keypoint
return sample
@register_op
class Permute(BaseOperator):
def __init__(self, to_bgr=True, channel_first=True):
"""
Change the channel.
Args:
to_bgr (bool): confirm whether to convert RGB to BGR
channel_first (bool): confirm whether to change channel
"""
super(Permute, self).__init__()
self.to_bgr = to_bgr
self.channel_first = channel_first
if not (isinstance(self.to_bgr, bool) and
isinstance(self.channel_first, bool)):
raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, sample, context=None):
samples = sample
batch_input = True
if not isinstance(samples, Sequence):
batch_input = False
samples = [samples]
for sample in samples:
assert 'image' in sample, "image data not found"
for k in sample.keys():
# hard code
if k.startswith('image'):
im = sample[k]
if self.channel_first:
im = np.swapaxes(im, 1, 2)
im = np.swapaxes(im, 1, 0)
if self.to_bgr:
im = im[[2, 1, 0], :, :]
sample[k] = im
if not batch_input:
samples = samples[0]
return samples
@register_op
class MixupImage(BaseOperator):
def __init__(self, alpha=1.5, beta=1.5):
""" Mixup image and gt_bbbox/gt_score
Args:
alpha (float): alpha parameter of beta distribute
beta (float): beta parameter of beta distribute
"""
super(MixupImage, self).__init__()
self.alpha = alpha
self.beta = beta
if self.alpha <= 0.0:
raise ValueError("alpha shold be positive in {}".format(self))
if self.beta <= 0.0:
raise ValueError("beta shold be positive in {}".format(self))
def _mixup_img(self, img1, img2, factor):
h = max(img1.shape[0], img2.shape[0])
w = max(img1.shape[1], img2.shape[1])
img = np.zeros((h, w, img1.shape[2]), 'float32')
img[:img1.shape[0], :img1.shape[1], :] = \
img1.astype('float32') * factor
img[:img2.shape[0], :img2.shape[1], :] += \
img2.astype('float32') * (1.0 - factor)
return img.astype('uint8')
def __call__(self, sample, context=None):
if 'mixup' not in sample:
return sample
factor = np.random.beta(self.alpha, self.beta)
factor = max(0.0, min(1.0, factor))
if factor >= 1.0:
sample.pop('mixup')
return sample
if factor <= 0.0:
return sample['mixup']
im = self._mixup_img(sample['image'], sample['mixup']['image'], factor)
gt_bbox1 = sample['gt_bbox']
gt_bbox2 = sample['mixup']['gt_bbox']
gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
gt_class1 = sample['gt_class']
gt_class2 = sample['mixup']['gt_class']
gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
gt_score1 = sample['gt_score']
gt_score2 = sample['mixup']['gt_score']
gt_score = np.concatenate(
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
is_crowd1 = sample['is_crowd']
is_crowd2 = sample['mixup']['is_crowd']
is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
sample['image'] = im
sample['gt_bbox'] = gt_bbox
sample['gt_score'] = gt_score
sample['gt_class'] = gt_class
sample['is_crowd'] = is_crowd
sample['h'] = im.shape[0]
sample['w'] = im.shape[1]
sample.pop('mixup')
return sample
@register_op
class CutmixImage(BaseOperator):
def __init__(self, alpha=1.5, beta=1.5):
"""
CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://https://arxiv.org/abs/1905.04899
Cutmix image and gt_bbbox/gt_score
Args:
alpha (float): alpha parameter of beta distribute
beta (float): beta parameter of beta distribute
"""
super(CutmixImage, self).__init__()
self.alpha = alpha
self.beta = beta
if self.alpha <= 0.0:
raise ValueError("alpha shold be positive in {}".format(self))
if self.beta <= 0.0:
raise ValueError("beta shold be positive in {}".format(self))
def _rand_bbox(self, img1, img2, factor):
""" _rand_bbox """
h = max(img1.shape[0], img2.shape[0])
w = max(img1.shape[1], img2.shape[1])
cut_rat = np.sqrt(1. - factor)
cut_w = np.int(w * cut_rat)
cut_h = np.int(h * cut_rat)
# uniform
cx = np.random.randint(w)
cy = np.random.randint(h)
bbx1 = np.clip(cx - cut_w // 2, 0, w)
bby1 = np.clip(cy - cut_h // 2, 0, h)
bbx2 = np.clip(cx + cut_w // 2, 0, w)
bby2 = np.clip(cy + cut_h // 2, 0, h)
img_1 = np.zeros((h, w, img1.shape[2]), 'float32')
img_1[:img1.shape[0], :img1.shape[1], :] = \
img1.astype('float32')
img_2 = np.zeros((h, w, img2.shape[2]), 'float32')
img_2[:img2.shape[0], :img2.shape[1], :] = \
img2.astype('float32')
img_1[bby1:bby2, bbx1:bbx2, :] = img2[bby1:bby2, bbx1:bbx2, :]
return img_1
def __call__(self, sample, context=None):
if 'cutmix' not in sample:
return sample
factor = np.random.beta(self.alpha, self.beta)
factor = max(0.0, min(1.0, factor))
if factor >= 1.0:
sample.pop('cutmix')
return sample
if factor <= 0.0:
return sample['cutmix']
img1 = sample['image']
img2 = sample['cutmix']['image']
img = self._rand_bbox(img1, img2, factor)
gt_bbox1 = sample['gt_bbox']
gt_bbox2 = sample['cutmix']['gt_bbox']
gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
gt_class1 = sample['gt_class']
gt_class2 = sample['cutmix']['gt_class']
gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
gt_score1 = sample['gt_score']
gt_score2 = sample['cutmix']['gt_score']
gt_score = np.concatenate(
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
sample['image'] = img
sample['gt_bbox'] = gt_bbox
sample['gt_score'] = gt_score
sample['gt_class'] = gt_class
sample['h'] = img.shape[0]
sample['w'] = img.shape[1]
sample.pop('cutmix')
return sample
@register_op
class RandomInterpImage(BaseOperator):
def __init__(self, target_size=0, max_size=0):
"""
Random reisze image by multiply interpolate method.
Args:
target_size (int): the taregt size of image's short side
max_size (int): the max size of image
"""
super(RandomInterpImage, self).__init__()
self.target_size = target_size
self.max_size = max_size
if not (isinstance(self.target_size, int) and
isinstance(self.max_size, int)):
raise TypeError('{}: input type is invalid.'.format(self))
interps = [
cv2.INTER_NEAREST,
cv2.INTER_LINEAR,
cv2.INTER_AREA,
cv2.INTER_CUBIC,
cv2.INTER_LANCZOS4,
]
self.resizers = []
for interp in interps:
self.resizers.append(ResizeImage(target_size, max_size, interp))
def __call__(self, sample, context=None):
"""Resise the image numpy by random resizer."""
resizer = random.choice(self.resizers)
return resizer(sample, context)
@register_op
class Resize(BaseOperator):
"""Resize image and bbox.
Args:
target_dim (int or list): target size, can be a single number or a list
(for random shape).
interp (int or str): interpolation method, can be an integer or
'random' (for randomized interpolation).
default to `cv2.INTER_LINEAR`.
"""
def __init__(self, target_dim=[], interp=cv2.INTER_LINEAR):
super(Resize, self).__init__()
self.target_dim = target_dim
self.interp = interp # 'random' for yolov3
def __call__(self, sample, context=None):
w = sample['w']
h = sample['h']
interp = self.interp
if interp == 'random':
interp = np.random.choice(range(5))
if isinstance(self.target_dim, Sequence):
dim = np.random.choice(self.target_dim)
else:
dim = self.target_dim
resize_w = resize_h = dim
scale_x = dim / w
scale_y = dim / h
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
scale_array = np.array([scale_x, scale_y] * 2, dtype=np.float32)
sample['gt_bbox'] = np.clip(sample['gt_bbox'] * scale_array, 0,
dim - 1)
sample['scale_factor'] = [scale_x, scale_y] * 2
sample['h'] = resize_h
sample['w'] = resize_w
sample['image'] = cv2.resize(
sample['image'], (resize_w, resize_h), interpolation=interp)
return sample
@register_op
class ColorDistort(BaseOperator):
"""Random color distortion.
Args:
hue (list): hue settings.
in [lower, upper, probability] format.
saturation (list): saturation settings.
in [lower, upper, probability] format.
contrast (list): contrast settings.
in [lower, upper, probability] format.
brightness (list): brightness settings.
in [lower, upper, probability] format.
random_apply (bool): whether to apply in random (yolo) or fixed (SSD)
order.
hsv_format (bool): whether to convert color from BGR to HSV
random_channel (bool): whether to swap channels randomly
hue (list): hue settings. in [lower, upper, probability] format.
saturation (list): saturation settings. in [lower, upper, probability] format.
contrast (list): contrast settings. in [lower, upper, probability] format.
brightness (list): brightness settings. in [lower, upper, probability] format.
random_apply (bool): whether to apply in random (yolo) or fixed (SSD)
order.
count (int): the number of doing distrot
random_channel (bool): whether to swap channels randomly
"""
def __init__(self,
......@@ -1407,15 +344,15 @@ class ColorDistort(BaseOperator):
contrast=[0.5, 1.5, 0.5],
brightness=[0.5, 1.5, 0.5],
random_apply=True,
hsv_format=False,
count=4,
random_channel=False):
super(ColorDistort, self).__init__()
super(RandomDistort, self).__init__()
self.hue = hue
self.saturation = saturation
self.contrast = contrast
self.brightness = brightness
self.random_apply = random_apply
self.hsv_format = hsv_format
self.count = count
self.random_channel = random_channel
def apply_hue(self, img):
......@@ -1424,13 +361,7 @@ class ColorDistort(BaseOperator):
return img
img = img.astype(np.float32)
if self.hsv_format:
img[..., 0] += random.uniform(low, high)
img[..., 0][img[..., 0] > 360] -= 360
img[..., 0][img[..., 0] < 0] += 360
return img
# XXX works, but result differ from HSV version
# it works, but result differ from HSV version
delta = np.random.uniform(low, high)
u = np.cos(delta * np.pi)
w = np.sin(delta * np.pi)
......@@ -1449,9 +380,7 @@ class ColorDistort(BaseOperator):
return img
delta = np.random.uniform(low, high)
img = img.astype(np.float32)
if self.hsv_format:
img[..., 1] *= delta
return img
# it works, but result differ from HSV version
gray = img * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
gray = gray.sum(axis=2, keepdims=True)
gray *= (1.0 - delta)
......@@ -1464,7 +393,6 @@ class ColorDistort(BaseOperator):
if np.random.uniform(0., 1.) < prob:
return img
delta = np.random.uniform(low, high)
img = img.astype(np.float32)
img *= delta
return img
......@@ -1474,139 +402,446 @@ class ColorDistort(BaseOperator):
if np.random.uniform(0., 1.) < prob:
return img
delta = np.random.uniform(low, high)
img = img.astype(np.float32)
img += delta
return img
def __call__(self, sample, context=None):
def apply(self, sample, context=None):
img = sample['image']
if self.random_apply:
functions = [
self.apply_brightness,
self.apply_contrast,
self.apply_saturation,
self.apply_hue,
self.apply_brightness, self.apply_contrast,
self.apply_saturation, self.apply_hue
]
distortions = np.random.permutation(functions)
distortions = np.random.permutation(functions)[:self.count]
for func in distortions:
img = func(img)
sample['image'] = img
return sample
img = self.apply_brightness(img)
img = self.apply_brightness(img)
mode = np.random.randint(0, 2)
if mode:
img = self.apply_contrast(img)
img = self.apply_saturation(img)
img = self.apply_hue(img)
if not mode:
img = self.apply_contrast(img)
if self.random_channel:
if np.random.randint(0, 2):
img = img[..., np.random.permutation(3)]
sample['image'] = img
return sample
@register_op
class AutoAugment(BaseOperator):
def __init__(self, autoaug_type="v1"):
"""
Args:
autoaug_type (str): autoaug type, support v0, v1, v2, v3, test
"""
super(AutoAugment, self).__init__()
self.autoaug_type = autoaug_type
def apply(self, sample, context=None):
"""
Learning Data Augmentation Strategies for Object Detection, see https://arxiv.org/abs/1906.11172
"""
im = sample['image']
gt_bbox = sample['gt_bbox']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image is not a numpy array.".format(self))
if len(im.shape) != 3:
raise ImageError("{}: image is not 3-dimensional.".format(self))
if len(gt_bbox) == 0:
return sample
height, width, _ = im.shape
norm_gt_bbox = np.ones_like(gt_bbox, dtype=np.float32)
norm_gt_bbox[:, 0] = gt_bbox[:, 1] / float(height)
norm_gt_bbox[:, 1] = gt_bbox[:, 0] / float(width)
norm_gt_bbox[:, 2] = gt_bbox[:, 3] / float(height)
norm_gt_bbox[:, 3] = gt_bbox[:, 2] / float(width)
from .autoaugment_utils import distort_image_with_autoaugment
im, norm_gt_bbox = distort_image_with_autoaugment(im, norm_gt_bbox,
self.autoaug_type)
gt_bbox[:, 0] = norm_gt_bbox[:, 1] * float(width)
gt_bbox[:, 1] = norm_gt_bbox[:, 0] * float(height)
gt_bbox[:, 2] = norm_gt_bbox[:, 3] * float(width)
gt_bbox[:, 3] = norm_gt_bbox[:, 2] * float(height)
sample['image'] = im
sample['gt_bbox'] = gt_bbox
return sample
@register_op
class RandomFlip(BaseOperator):
def __init__(self, prob=0.5):
"""
Args:
prob (float): the probability of flipping image
"""
super(RandomFlip, self).__init__()
self.prob = prob
if not (isinstance(self.prob, float)):
raise TypeError("{}: input type is invalid.".format(self))
def apply_segm(self, segms, height, width):
def _flip_poly(poly, width):
flipped_poly = np.array(poly)
flipped_poly[0::2] = width - np.array(poly[0::2])
return flipped_poly.tolist()
def _flip_rle(rle, height, width):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects(rle, height, width)
mask = mask_util.decode(rle)
mask = mask[:, ::-1]
rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
return rle
flipped_segms = []
for segm in segms:
if is_poly(segm):
# Polygon format
flipped_segms.append([_flip_poly(poly, width) for poly in segm])
else:
# RLE format
import pycocotools.mask as mask_util
flipped_segms.append(_flip_rle(segm, height, width))
return flipped_segms
def apply_keypoint(self, gt_keypoint, width):
for i in range(gt_keypoint.shape[1]):
if i % 2 == 0:
old_x = gt_keypoint[:, i].copy()
gt_keypoint[:, i] = width - old_x
return gt_keypoint
def apply_image(self, image):
return image[:, ::-1, :]
def apply_bbox(self, bbox, width):
oldx1 = bbox[:, 0].copy()
oldx2 = bbox[:, 2].copy()
bbox[:, 0] = width - oldx2
bbox[:, 2] = width - oldx1
return bbox
def apply(self, sample, context=None):
"""Filp the image and bounding box.
Operators:
1. Flip the image numpy.
2. Transform the bboxes' x coordinates.
(Must judge whether the coordinates are normalized!)
3. Transform the segmentations' x coordinates.
(Must judge whether the coordinates are normalized!)
Output:
sample: the image, bounding box and segmentation part
in sample are flipped.
"""
if np.random.uniform(0, 1) < self.prob:
im = sample['image']
height, width = im.shape[:2]
im = self.apply_image(im)
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], width)
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], height,
width)
if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
sample['gt_keypoint'] = self.apply_keypoint(
sample['gt_keypoint'], width)
if 'semantic' in sample and sample['semantic']:
sample['semantic'] = sample['semantic'][:, ::-1]
if 'gt_segm' in sample and sample['gt_segm'].any():
sample['gt_segm'] = sample['gt_segm'][:, :, ::-1]
sample['flipped'] = True
sample['image'] = im
return sample
@register_op
class Resize(BaseOperator):
def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
"""
Resize image to target size. if keep_ratio is True,
resize the image's long side to the maximum of target_size
if keep_ratio is False, resize the image to target size(h, w)
Args:
target_size (int|list): image target size
keep_ratio (bool): whether keep_ratio or not, default true
interp (int): the interpolation method
"""
super(Resize, self).__init__()
self.keep_ratio = keep_ratio
self.interp = interp
if not isinstance(target_size, (Integral, Sequence)):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
format(type(target_size)))
if isinstance(target_size, Integral):
target_size = [target_size, target_size]
self.target_size = target_size
def apply_image(self, image, scale):
im_scale_x, im_scale_y = scale
return cv2.resize(
image,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
def apply_bbox(self, bbox, scale, size):
im_scale_x, im_scale_y = scale
resize_w, resize_h = size
bbox[:, 0::2] *= im_scale_x
bbox[:, 1::2] *= im_scale_y
bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, resize_w)
bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, resize_h)
return bbox
def apply_segm(self, segms, im_size, scale):
def _resize_poly(poly, im_scale_x, im_scale_y):
resized_poly = np.array(poly)
resized_poly[0::2] *= im_scale_x
resized_poly[1::2] *= im_scale_y
return resized_poly.tolist()
def _resize_rle(rle, im_h, im_w, im_scale_x, im_scale_y):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects(rle, im_h, im_w)
mask = mask_util.decode(rle)
mask = cv2.resize(
image,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
return rle
im_h, im_w = im_size
im_scale_x, im_scale_y = scale
resized_segms = []
for segm in segms:
if is_poly(segm):
# Polygon format
resized_segms.append([
_resize_poly(poly, im_scale_x, im_scale_y) for poly in segm
])
else:
# RLE format
import pycocotools.mask as mask_util
resized_segms.append(
_resize_rle(segm, im_h, im_w, im_scale_x, im_scale_y))
return resized_segms
def apply(self, sample, context=None):
""" Resize the image numpy.
"""
im = sample['image']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
raise ImageError('{}: image is not 3-dimensional.'.format(self))
# apply image
im_shape = im.shape
if self.keep_ratio:
if np.random.randint(0, 2):
img = self.apply_contrast(img)
if self.hsv_format:
img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
img = self.apply_saturation(img)
img = self.apply_hue(img)
if self.hsv_format:
img = cv2.cvtColor(img, cv2.COLOR_HSV2RGB)
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
target_size_min = np.min(self.target_size)
target_size_max = np.max(self.target_size)
im_scale = min(target_size_min / im_size_min,
target_size_max / im_size_max)
resize_h = im_scale * float(im_shape[0])
resize_w = im_scale * float(im_shape[1])
im_scale_x = im_scale
im_scale_y = im_scale
else:
if self.hsv_format:
img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
img = self.apply_saturation(img)
img = self.apply_hue(img)
if self.hsv_format:
img = cv2.cvtColor(img, cv2.COLOR_HSV2RGB)
img = self.apply_contrast(img)
resize_h, resize_w = self.target_size
im_scale_y = resize_h / im_shape[0]
im_scale_x = resize_w / im_shape[1]
im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
sample['image'] = im
sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
if 'scale_factor' in sample:
scale_factor = sample['scale_factor']
sample['scale_factor'] = np.asarray(
[scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
dtype=np.float32)
else:
sample['scale_factor'] = np.asarray(
[im_scale_y, im_scale_x], dtype=np.float32)
# apply bbox
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'],
[im_scale_x, im_scale_y],
[resize_w, resize_h])
# apply polygon
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], im_shape[:2],
[im_scale_x, im_scale_y])
# apply semantic
if 'semantic' in sample and sample['semantic']:
semantic = sample['semantic']
semantic = cv2.resize(
semantic.astype('float32'),
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
semantic = np.asarray(semantic).astype('int32')
semantic = np.expand_dims(semantic, 0)
sample['semantic'] = semantic
# apply gt_segm
if 'gt_segm' in sample and len(sample['gt_segm']) > 0:
masks = [
cv2.resize(
gt_segm,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=cv2.INTER_NEAREST)
for gt_segm in sample['gt_segm']
]
sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
if self.random_channel:
if np.random.randint(0, 2):
img = img[..., np.random.permutation(3)]
sample['image'] = img
return sample
@register_op
class CornerRandColor(ColorDistort):
"""Random color for CornerNet series models.
class MultiscaleTestResize(BaseOperator):
def __init__(self,
origin_target_size=[800, 1333],
target_size=[],
interp=cv2.INTER_LINEAR,
use_flip=True):
"""
Rescale image to the each size in target size, and capped at max_size.
Args:
saturation (float): saturation settings.
contrast (float): contrast settings.
brightness (float): brightness settings.
is_scale (bool): whether to scale the input image.
origin_target_size (list): origin target size of image
target_size (list): A list of target sizes of image.
interp (int): the interpolation method.
use_flip (bool): whether use flip augmentation.
"""
super(MultiscaleTestResize, self).__init__()
self.interp = interp
self.use_flip = use_flip
def __init__(self,
saturation=0.4,
contrast=0.4,
brightness=0.4,
is_scale=True):
super(CornerRandColor, self).__init__(
saturation=saturation, contrast=contrast, brightness=brightness)
self.is_scale = is_scale
if not isinstance(target_size, Sequence):
raise TypeError(
"Type of target_size is invalid. Must be List or Tuple, now is {}".
format(type(target_size)))
self.target_size = target_size
def apply_saturation(self, img, img_gray):
alpha = 1. + np.random.uniform(
low=-self.saturation, high=self.saturation)
self._blend(alpha, img, img_gray[:, :, None])
return img
if not isinstance(origin_target_size, Sequence):
raise TypeError(
"Type of origin_target_size is invalid. Must be List or Tuple, now is {}".
format(type(origin_target_size)))
def apply_contrast(self, img, img_gray):
alpha = 1. + np.random.uniform(low=-self.contrast, high=self.contrast)
img_mean = img_gray.mean()
self._blend(alpha, img, img_mean)
return img
self.origin_target_size = origin_target_size
def apply_brightness(self, img, img_gray):
alpha = 1 + np.random.uniform(
low=-self.brightness, high=self.brightness)
img *= alpha
return img
def apply(self, sample, context=None):
""" Resize the image numpy for multi-scale test.
"""
samples = []
resizer = Resize(
self.origin_target_size, keep_ratio=True, interp=self.interp)
samples.append(resizer(sample.copy(), context))
if self.use_flip:
flipper = RandomFlip(1.1)
samples.append(flipper(sample.copy(), context=context))
def _blend(self, alpha, img, img_mean):
img *= alpha
img_mean *= (1 - alpha)
img += img_mean
for size in self.target_size:
resizer = Resize(size, keep_ratio=True, interp=self.interp)
samples.append(resizer(sample.copy(), context))
def __call__(self, sample, context=None):
img = sample['image']
if self.is_scale:
img = img.astype(np.float32, copy=False)
img /= 255.
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
functions = [
self.apply_brightness,
self.apply_contrast,
self.apply_saturation,
]
distortions = np.random.permutation(functions)
for func in distortions:
img = func(img, img_gray)
sample['image'] = img
return sample
return samples
@register_op
class NormalizePermute(BaseOperator):
"""Normalize and permute channel order.
class RandomResize(BaseOperator):
def __init__(self,
target_size,
keep_ratio=True,
interp=cv2.INTER_LINEAR,
random_size=True,
random_interp=False):
"""
Resize image to target size randomly. random target_size and interpolation method
Args:
mean (list): mean values in RGB order.
std (list): std values in RGB order.
target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
keep_ratio (bool): whether keep_raio or not, default true
interp (int): the interpolation method
random_size (bool): whether random select target size of image
random_interp (bool): whether random select interpolation method
"""
super(RandomResize, self).__init__()
self.keep_ratio = keep_ratio
self.interp = interp
self.interps = [
cv2.INTER_NEAREST,
cv2.INTER_LINEAR,
cv2.INTER_AREA,
cv2.INTER_CUBIC,
cv2.INTER_LANCZOS4,
]
assert isinstance(target_size, (
Integral, Sequence)), "target_size must be Integer, List or Tuple"
if random_size and not isinstance(target_size, Sequence):
raise TypeError(
"Type of target_size is invalid when random_size is True. Must be List or Tuple, now is {}".
format(type(target_size)))
self.target_size = target_size
self.random_size = random_size
self.random_interp = random_interp
def __init__(self,
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.120, 57.375]):
super(NormalizePermute, self).__init__()
self.mean = mean
self.std = std
def apply(self, sample, context=None):
""" Resize the image numpy.
"""
if self.random_size:
target_size = random.choice(self.target_size)
else:
target_size = self.target_size
def __call__(self, sample, context=None):
img = sample['image']
img = img.astype(np.float32)
if self.random_interp:
interp = random.choice(self.interps)
else:
interp = self.interp
img = img.transpose((2, 0, 1))
mean = np.array(self.mean, dtype=np.float32)
std = np.array(self.std, dtype=np.float32)
invstd = 1. / std
for v, m, s in zip(img, mean, invstd):
v.__isub__(m).__imul__(s)
sample['image'] = img
return sample
resizer = Resize(target_size, self.keep_ratio, interp)
return resizer(sample, context=context)
@register_op
......@@ -1616,14 +851,9 @@ class RandomExpand(BaseOperator):
ratio (float): maximum expansion ratio.
prob (float): probability to expand.
fill_value (list): color value used to fill the canvas. in RGB order.
is_mask_expand(bool): whether expand the segmentation.
"""
def __init__(self,
ratio=4.,
prob=0.5,
fill_value=(127.5, ) * 3,
is_mask_expand=False):
def __init__(self, ratio=4., prob=0.5, fill_value=(127.5, 127.5, 127.5)):
super(RandomExpand, self).__init__()
assert ratio > 1.01, "expand ratio must be larger than 1.01"
self.ratio = ratio
......@@ -1635,68 +865,273 @@ class RandomExpand(BaseOperator):
if not isinstance(fill_value, tuple):
fill_value = tuple(fill_value)
self.fill_value = fill_value
self.is_mask_expand = is_mask_expand
def expand_segms(self, segms, x, y, height, width, ratio):
def _expand_poly(poly, x, y):
expanded_poly = np.array(poly)
expanded_poly[0::2] += x
expanded_poly[1::2] += y
return expanded_poly.tolist()
def apply(self, sample, context=None):
if np.random.uniform(0., 1.) < self.prob:
return sample
def _expand_rle(rle, x, y, height, width, ratio):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects(rle, height, width)
mask = mask_util.decode(rle)
expanded_mask = np.full((int(height * ratio), int(width * ratio)),
0).astype(mask.dtype)
expanded_mask[y:y + height, x:x + width] = mask
rle = mask_util.encode(
np.array(
expanded_mask, order='F', dtype=np.uint8))
return rle
im = sample['image']
height, width = im.shape[:2]
ratio = np.random.uniform(1., self.ratio)
h = int(height * ratio)
w = int(width * ratio)
if not h > height or not w > width:
return sample
y = np.random.randint(0, h - height)
x = np.random.randint(0, w - width)
offsets, size = [x, y], [h, w]
pad = Pad(size,
pad_mode=-1,
offsets=offsets,
fill_value=self.fill_value)
return pad(sample, context=context)
@register_op
class CropWithSampling(BaseOperator):
def __init__(self, batch_sampler, satisfy_all=False, avoid_no_bbox=True):
"""
Args:
batch_sampler (list): Multiple sets of different
parameters for cropping.
satisfy_all (bool): whether all boxes must satisfy.
e.g.[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap]
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super(CropWithSampling, self).__init__()
self.batch_sampler = batch_sampler
self.satisfy_all = satisfy_all
self.avoid_no_bbox = avoid_no_bbox
def apply(self, sample, context):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
assert 'image' in sample, "image data not found"
im = sample['image']
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
im_height, im_width = im.shape[:2]
gt_score = None
if 'gt_score' in sample:
gt_score = sample['gt_score']
sampled_bbox = []
gt_bbox = gt_bbox.tolist()
for sampler in self.batch_sampler:
found = 0
for i in range(sampler[1]):
if found >= sampler[0]:
break
sample_bbox = generate_sample_bbox(sampler)
if satisfy_sample_constraint(sampler, sample_bbox, gt_bbox,
self.satisfy_all):
sampled_bbox.append(sample_bbox)
found = found + 1
im = np.array(im)
while sampled_bbox:
idx = int(np.random.uniform(0, len(sampled_bbox)))
sample_bbox = sampled_bbox.pop(idx)
sample_bbox = clip_bbox(sample_bbox)
crop_bbox, crop_class, crop_score = \
filter_and_process(sample_bbox, gt_bbox, gt_class, scores=gt_score)
if self.avoid_no_bbox:
if len(crop_bbox) < 1:
continue
xmin = int(sample_bbox[0] * im_width)
xmax = int(sample_bbox[2] * im_width)
ymin = int(sample_bbox[1] * im_height)
ymax = int(sample_bbox[3] * im_height)
im = im[ymin:ymax, xmin:xmax]
sample['image'] = im
sample['gt_bbox'] = crop_bbox
sample['gt_class'] = crop_class
sample['gt_score'] = crop_score
return sample
return sample
@register_op
class CropWithDataAchorSampling(BaseOperator):
def __init__(self,
batch_sampler,
anchor_sampler=None,
target_size=None,
das_anchor_scales=[16, 32, 64, 128],
sampling_prob=0.5,
min_size=8.,
avoid_no_bbox=True):
"""
Args:
anchor_sampler (list): anchor_sampling sets of different
parameters for cropping.
batch_sampler (list): Multiple sets of different
parameters for cropping.
e.g.[[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]]
[[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]]
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap, min coverage, max coverage]
target_size (bool): target image size.
das_anchor_scales (list[float]): a list of anchor scales in data
anchor smapling.
min_size (float): minimum size of sampled bbox.
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super(CropWithDataAchorSampling, self).__init__()
self.anchor_sampler = anchor_sampler
self.batch_sampler = batch_sampler
self.target_size = target_size
self.sampling_prob = sampling_prob
self.min_size = min_size
self.avoid_no_bbox = avoid_no_bbox
self.das_anchor_scales = np.array(das_anchor_scales)
def apply(self, sample, context):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
assert 'image' in sample, "image data not found"
im = sample['image']
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
image_height, image_width = im.shape[:2]
gt_score = None
if 'gt_score' in sample:
gt_score = sample['gt_score']
sampled_bbox = []
gt_bbox = gt_bbox.tolist()
expanded_segms = []
for segm in segms:
if is_poly(segm):
# Polygon format
expanded_segms.append(
[_expand_poly(poly, x, y) for poly in segm])
prob = np.random.uniform(0., 1.)
if prob > self.sampling_prob: # anchor sampling
assert self.anchor_sampler
for sampler in self.anchor_sampler:
found = 0
for i in range(sampler[1]):
if found >= sampler[0]:
break
sample_bbox = data_anchor_sampling(
gt_bbox, image_width, image_height,
self.das_anchor_scales, self.target_size)
if sample_bbox == 0:
break
if satisfy_sample_constraint_coverage(sampler, sample_bbox,
gt_bbox):
sampled_bbox.append(sample_bbox)
found = found + 1
im = np.array(im)
while sampled_bbox:
idx = int(np.random.uniform(0, len(sampled_bbox)))
sample_bbox = sampled_bbox.pop(idx)
if 'gt_keypoint' in sample.keys():
keypoints = (sample['gt_keypoint'],
sample['keypoint_ignore'])
crop_bbox, crop_class, crop_score, gt_keypoints = \
filter_and_process(sample_bbox, gt_bbox, gt_class,
scores=gt_score,
keypoints=keypoints)
else:
# RLE format
import pycocotools.mask as mask_util
expanded_segms.append(
_expand_rle(segm, x, y, height, width, ratio))
return expanded_segms
crop_bbox, crop_class, crop_score = filter_and_process(
sample_bbox, gt_bbox, gt_class, scores=gt_score)
crop_bbox, crop_class, crop_score = bbox_area_sampling(
crop_bbox, crop_class, crop_score, self.target_size,
self.min_size)
def __call__(self, sample, context=None):
if np.random.uniform(0., 1.) < self.prob:
if self.avoid_no_bbox:
if len(crop_bbox) < 1:
continue
im = crop_image_sampling(im, sample_bbox, image_width,
image_height, self.target_size)
sample['image'] = im
sample['gt_bbox'] = crop_bbox
sample['gt_class'] = crop_class
sample['gt_score'] = crop_score
if 'gt_keypoint' in sample.keys():
sample['gt_keypoint'] = gt_keypoints[0]
sample['keypoint_ignore'] = gt_keypoints[1]
return sample
return sample
img = sample['image']
height = int(sample['h'])
width = int(sample['w'])
else:
for sampler in self.batch_sampler:
found = 0
for i in range(sampler[1]):
if found >= sampler[0]:
break
sample_bbox = generate_sample_bbox_square(
sampler, image_width, image_height)
if satisfy_sample_constraint_coverage(sampler, sample_bbox,
gt_bbox):
sampled_bbox.append(sample_bbox)
found = found + 1
im = np.array(im)
while sampled_bbox:
idx = int(np.random.uniform(0, len(sampled_bbox)))
sample_bbox = sampled_bbox.pop(idx)
sample_bbox = clip_bbox(sample_bbox)
expand_ratio = np.random.uniform(1., self.ratio)
h = int(height * expand_ratio)
w = int(width * expand_ratio)
if not h > height or not w > width:
return sample
y = np.random.randint(0, h - height)
x = np.random.randint(0, w - width)
canvas = np.ones((h, w, 3), dtype=np.uint8)
canvas *= np.array(self.fill_value, dtype=np.uint8)
canvas[y:y + height, x:x + width, :] = img.astype(np.uint8)
if 'gt_keypoint' in sample.keys():
keypoints = (sample['gt_keypoint'],
sample['keypoint_ignore'])
crop_bbox, crop_class, crop_score, gt_keypoints = \
filter_and_process(sample_bbox, gt_bbox, gt_class,
scores=gt_score,
keypoints=keypoints)
else:
crop_bbox, crop_class, crop_score = filter_and_process(
sample_bbox, gt_bbox, gt_class, scores=gt_score)
# sampling bbox according the bbox area
crop_bbox, crop_class, crop_score = bbox_area_sampling(
crop_bbox, crop_class, crop_score, self.target_size,
self.min_size)
sample['h'] = h
sample['w'] = w
sample['image'] = canvas
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] += np.array([x, y] * 2, dtype=np.float32)
if self.is_mask_expand and 'gt_poly' in sample and len(sample[
'gt_poly']) > 0:
sample['gt_poly'] = self.expand_segms(sample['gt_poly'], x, y,
height, width, expand_ratio)
if self.avoid_no_bbox:
if len(crop_bbox) < 1:
continue
xmin = int(sample_bbox[0] * image_width)
xmax = int(sample_bbox[2] * image_width)
ymin = int(sample_bbox[1] * image_height)
ymax = int(sample_bbox[3] * image_height)
im = im[ymin:ymax, xmin:xmax]
sample['image'] = im
sample['gt_bbox'] = crop_bbox
sample['gt_class'] = crop_class
sample['gt_score'] = crop_score
if 'gt_keypoint' in sample.keys():
sample['gt_keypoint'] = gt_keypoints[0]
sample['keypoint_ignore'] = gt_keypoints[1]
return sample
return sample
......@@ -1801,12 +1236,11 @@ class RandomCrop(BaseOperator):
crop_segms.append(_crop_rle(segm, crop, height, width))
return crop_segms
def __call__(self, sample, context=None):
def apply(self, sample, context=None):
if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0:
return sample
h = sample['h']
w = sample['w']
h, w = sample['image'].shape[:2]
gt_bbox = sample['gt_bbox']
# NOTE Original method attempts to generate one candidate for each
......@@ -1889,12 +1323,17 @@ class RandomCrop(BaseOperator):
sample['gt_poly'] = valid_polys
else:
sample['gt_poly'] = crop_polys
if 'gt_segm' in sample:
sample['gt_segm'] = self._crop_segm(sample['gt_segm'],
crop_box)
sample['gt_segm'] = np.take(
sample['gt_segm'], valid_ids, axis=0)
sample['image'] = self._crop_image(sample['image'], crop_box)
sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0)
sample['gt_class'] = np.take(
sample['gt_class'], valid_ids, axis=0)
sample['w'] = crop_box[2] - crop_box[0]
sample['h'] = crop_box[3] - crop_box[1]
if 'gt_score' in sample:
sample['gt_score'] = np.take(
sample['gt_score'], valid_ids, axis=0)
......@@ -1936,494 +1375,313 @@ class RandomCrop(BaseOperator):
x1, y1, x2, y2 = crop
return img[y1:y2, x1:x2, :]
@register_op
class PadBox(BaseOperator):
def __init__(self, num_max_boxes=50):
"""
Pad zeros to bboxes if number of bboxes is less than num_max_boxes.
Args:
num_max_boxes (int): the max number of bboxes
"""
self.num_max_boxes = num_max_boxes
super(PadBox, self).__init__()
def __call__(self, sample):
assert 'gt_bbox' in sample
bbox = sample['gt_bbox']
gt_num = min(self.num_max_boxes, len(bbox))
num_max = self.num_max_boxes
pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
if gt_num > 0:
pad_bbox[:gt_num, :] = bbox[:gt_num, :]
sample['gt_bbox'] = pad_bbox
if 'gt_class' in sample.keys():
pad_class = np.zeros((num_max), dtype=np.int32)
if gt_num > 0:
pad_class[:gt_num] = sample['gt_class'][:gt_num, 0]
sample['gt_class'] = pad_class
if 'gt_score' in sample.keys():
pad_score = np.zeros((num_max), dtype=np.float32)
if gt_num > 0:
pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
sample['gt_score'] = pad_score
# in training, for example in op ExpandImage,
# the bbox and gt_class is expandded, but the difficult is not,
# so, judging by it's length
if 'is_difficult' in sample.keys():
pad_diff = np.zeros((num_max), dtype=np.int32)
if gt_num > 0:
pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
sample['difficult'] = pad_diff
return sample
def _crop_segm(self, segm, crop):
x1, y1, x2, y2 = crop
return segm[:, y1:y2, x1:x2]
@register_op
class BboxXYXY2XYWH(BaseOperator):
"""
Convert bbox XYXY format to XYWH format.
class RandomScaledCrop(BaseOperator):
"""Resize image and bbox based on long side (with optional random scaling),
then crop or pad image to target size.
Args:
target_dim (int): target size.
scale_range (list): random scale range.
interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
"""
def __init__(self):
super(BboxXYXY2XYWH, self).__init__()
def __call__(self, sample):
assert 'gt_bbox' in sample
bbox = sample['gt_bbox']
bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2]
bbox[:, :2] = bbox[:, :2] + bbox[:, 2:4] / 2.
sample['gt_bbox'] = bbox
return sample
def __init__(self,
target_dim=512,
scale_range=[.1, 2.],
interp=cv2.INTER_LINEAR):
super(RandomScaledCrop, self).__init__()
self.target_dim = target_dim
self.scale_range = scale_range
self.interp = interp
def apply(self, sample, context=None):
img = sample['image']
h, w = img.shape[:2]
random_scale = np.random.uniform(*self.scale_range)
dim = self.target_dim
random_dim = int(dim * random_scale)
dim_max = max(h, w)
scale = random_dim / dim_max
resize_w = w * scale
resize_h = h * scale
offset_x = int(max(0, np.random.uniform(0., resize_w - dim)))
offset_y = int(max(0, np.random.uniform(0., resize_h - dim)))
class Lighting(BaseOperator):
"""
Lighting the imagen by eigenvalues and eigenvectors
Args:
eigval (list): eigenvalues
eigvec (list): eigenvectors
alphastd (float): random weight of lighting, 0.1 by default
"""
img = cv2.resize(img, (resize_w, resize_h), interpolation=self.interp)
img = np.array(img)
canvas = np.zeros((dim, dim, 3), dtype=img.dtype)
canvas[:min(dim, resize_h), :min(dim, resize_w), :] = img[
offset_y:offset_y + dim, offset_x:offset_x + dim, :]
sample['image'] = canvas
sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
scale_factor = sample['sacle_factor']
sample['scale_factor'] = np.asarray(
[scale_factor[0] * scale, scale_factor[1] * scale],
dtype=np.float32)
def __init__(self, eigval, eigvec, alphastd=0.1):
super(Lighting, self).__init__()
self.alphastd = alphastd
self.eigval = np.array(eigval).astype('float32')
self.eigvec = np.array(eigvec).astype('float32')
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
scale_array = np.array([scale, scale] * 2, dtype=np.float32)
shift_array = np.array([offset_x, offset_y] * 2, dtype=np.float32)
boxes = sample['gt_bbox'] * scale_array - shift_array
boxes = np.clip(boxes, 0, dim - 1)
# filter boxes with no area
area = np.prod(boxes[..., 2:] - boxes[..., :2], axis=1)
valid = (area > 1.).nonzero()[0]
sample['gt_bbox'] = boxes[valid]
sample['gt_class'] = sample['gt_class'][valid]
def __call__(self, sample):
alpha = np.random.normal(scale=self.alphastd, size=(3, ))
sample['image'] += np.dot(self.eigvec, self.eigval * alpha)
return sample
@register_op
class CornerTarget(BaseOperator):
class Cutmix(BaseOperator):
def __init__(self, alpha=1.5, beta=1.5):
"""
Generate targets for CornerNet by ground truth data.
CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://arxiv.org/abs/1905.04899
Cutmix image and gt_bbbox/gt_score
Args:
output_size (int): the size of output heatmaps.
num_classes (int): num of classes.
gaussian_bump (bool): whether to apply gaussian bump on gt targets.
True by default.
gaussian_rad (int): radius of gaussian bump. If it is set to -1, the
radius will be calculated by iou. -1 by default.
gaussian_iou (float): the threshold iou of predicted bbox to gt bbox.
If the iou is larger than threshold, the predicted bboox seems as
positive sample. 0.3 by default
max_tag_len (int): max num of gt box per image.
alpha (float): alpha parameter of beta distribute
beta (float): beta parameter of beta distribute
"""
super(Cutmix, self).__init__()
self.alpha = alpha
self.beta = beta
if self.alpha <= 0.0:
raise ValueError("alpha shold be positive in {}".format(self))
if self.beta <= 0.0:
raise ValueError("beta shold be positive in {}".format(self))
def __init__(self,
output_size,
num_classes,
gaussian_bump=True,
gaussian_rad=-1,
gaussian_iou=0.3,
max_tag_len=128):
super(CornerTarget, self).__init__()
self.num_classes = num_classes
self.output_size = output_size
self.gaussian_bump = gaussian_bump
self.gaussian_rad = gaussian_rad
self.gaussian_iou = gaussian_iou
self.max_tag_len = max_tag_len
def __call__(self, sample):
tl_heatmaps = np.zeros(
(self.num_classes, self.output_size[0], self.output_size[1]),
dtype=np.float32)
br_heatmaps = np.zeros(
(self.num_classes, self.output_size[0], self.output_size[1]),
dtype=np.float32)
def apply_image(self, img1, img2, factor):
""" _rand_bbox """
h = max(img1.shape[0], img2.shape[0])
w = max(img1.shape[1], img2.shape[1])
cut_rat = np.sqrt(1. - factor)
tl_regrs = np.zeros((self.max_tag_len, 2), dtype=np.float32)
br_regrs = np.zeros((self.max_tag_len, 2), dtype=np.float32)
tl_tags = np.zeros((self.max_tag_len), dtype=np.int64)
br_tags = np.zeros((self.max_tag_len), dtype=np.int64)
tag_masks = np.zeros((self.max_tag_len), dtype=np.uint8)
tag_lens = np.zeros((), dtype=np.int32)
tag_nums = np.zeros((1), dtype=np.int32)
cut_w = np.int(w * cut_rat)
cut_h = np.int(h * cut_rat)
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
keep_inds = ((gt_bbox[:, 2] - gt_bbox[:, 0]) > 0) & \
((gt_bbox[:, 3] - gt_bbox[:, 1]) > 0)
gt_bbox = gt_bbox[keep_inds]
gt_class = gt_class[keep_inds]
sample['gt_bbox'] = gt_bbox
sample['gt_class'] = gt_class
width_ratio = self.output_size[1] / sample['w']
height_ratio = self.output_size[0] / sample['h']
for i in range(gt_bbox.shape[0]):
width = gt_bbox[i][2] - gt_bbox[i][0]
height = gt_bbox[i][3] - gt_bbox[i][1]
xtl, ytl = gt_bbox[i][0], gt_bbox[i][1]
xbr, ybr = gt_bbox[i][2], gt_bbox[i][3]
fxtl = (xtl * width_ratio)
fytl = (ytl * height_ratio)
fxbr = (xbr * width_ratio)
fybr = (ybr * height_ratio)
xtl = int(fxtl)
ytl = int(fytl)
xbr = int(fxbr)
ybr = int(fybr)
if self.gaussian_bump:
width = math.ceil(width * width_ratio)
height = math.ceil(height * height_ratio)
if self.gaussian_rad == -1:
radius = gaussian_radius((height, width), self.gaussian_iou)
radius = max(0, int(radius))
else:
radius = self.gaussian_rad
draw_gaussian(tl_heatmaps[gt_class[i][0]], [xtl, ytl], radius)
draw_gaussian(br_heatmaps[gt_class[i][0]], [xbr, ybr], radius)
else:
tl_heatmaps[gt_class[i][0], ytl, xtl] = 1
br_heatmaps[gt_class[i][0], ybr, xbr] = 1
# uniform
cx = np.random.randint(w)
cy = np.random.randint(h)
tl_regrs[i, :] = [fxtl - xtl, fytl - ytl]
br_regrs[i, :] = [fxbr - xbr, fybr - ybr]
tl_tags[tag_lens] = ytl * self.output_size[1] + xtl
br_tags[tag_lens] = ybr * self.output_size[1] + xbr
tag_lens += 1
bbx1 = np.clip(cx - cut_w // 2, 0, w - 1)
bby1 = np.clip(cy - cut_h // 2, 0, h - 1)
bbx2 = np.clip(cx + cut_w // 2, 0, w - 1)
bby2 = np.clip(cy + cut_h // 2, 0, h - 1)
img_1 = np.zeros((h, w, img1.shape[2]), 'float32')
img_1[:img1.shape[0], :img1.shape[1], :] = \
img1.astype('float32')
img_2 = np.zeros((h, w, img2.shape[2]), 'float32')
img_2[:img2.shape[0], :img2.shape[1], :] = \
img2.astype('float32')
img_1[bby1:bby2, bbx1:bbx2, :] = img2[bby1:bby2, bbx1:bbx2, :]
return img_1
tag_masks[:tag_lens] = 1
def __call__(self, sample, context=None):
if not isinstance(sample, Sequence):
return sample
sample['tl_heatmaps'] = tl_heatmaps
sample['br_heatmaps'] = br_heatmaps
sample['tl_regrs'] = tl_regrs
sample['br_regrs'] = br_regrs
sample['tl_tags'] = tl_tags
sample['br_tags'] = br_tags
sample['tag_masks'] = tag_masks
assert len(sample) == 2, 'cutmix need two samples'
factor = np.random.beta(self.alpha, self.beta)
factor = max(0.0, min(1.0, factor))
if factor >= 1.0:
return sample[0]
if factor <= 0.0:
return sample[1]
img1 = sample[0]['image']
img2 = sample[1]['image']
img = self.apply_image(img1, img2, factor)
gt_bbox1 = sample[0]['gt_bbox']
gt_bbox2 = sample[1]['gt_bbox']
gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
gt_class1 = sample[0]['gt_class']
gt_class2 = sample[1]['gt_class']
gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
gt_score1 = sample[0]['gt_score']
gt_score2 = sample[1]['gt_score']
gt_score = np.concatenate(
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
sample = sample[0]
sample['image'] = img
sample['gt_bbox'] = gt_bbox
sample['gt_score'] = gt_score
sample['gt_class'] = gt_class
return sample
@register_op
class CornerCrop(BaseOperator):
"""
Random crop for CornerNet
class Mixup(BaseOperator):
def __init__(self, alpha=1.5, beta=1.5):
""" Mixup image and gt_bbbox/gt_score
Args:
random_scales (list): scales of output_size to input_size.
border (int): border of corp center
is_train (bool): train or test
input_size (int): size of input image
alpha (float): alpha parameter of beta distribute
beta (float): beta parameter of beta distribute
"""
super(Mixup, self).__init__()
self.alpha = alpha
self.beta = beta
if self.alpha <= 0.0:
raise ValueError("alpha shold be positive in {}".format(self))
if self.beta <= 0.0:
raise ValueError("beta shold be positive in {}".format(self))
def __init__(self,
random_scales=[0.6, 0.7, 0.8, 0.9, 1., 1.1, 1.2, 1.3],
border=128,
is_train=True,
input_size=511):
super(CornerCrop, self).__init__()
self.random_scales = random_scales
self.border = border
self.is_train = is_train
self.input_size = input_size
def __call__(self, sample):
im_h, im_w = int(sample['h']), int(sample['w'])
if self.is_train:
scale = np.random.choice(self.random_scales)
height = int(self.input_size * scale)
width = int(self.input_size * scale)
w_border = self._get_border(self.border, im_w)
h_border = self._get_border(self.border, im_h)
ctx = np.random.randint(low=w_border, high=im_w - w_border)
cty = np.random.randint(low=h_border, high=im_h - h_border)
else:
cty, ctx = im_h // 2, im_w // 2
height = im_h | 127
width = im_w | 127
cropped_image = np.zeros(
(height, width, 3), dtype=sample['image'].dtype)
x0, x1 = max(ctx - width // 2, 0), min(ctx + width // 2, im_w)
y0, y1 = max(cty - height // 2, 0), min(cty + height // 2, im_h)
left_w, right_w = ctx - x0, x1 - ctx
top_h, bottom_h = cty - y0, y1 - cty
# crop image
cropped_ctx, cropped_cty = width // 2, height // 2
x_slice = slice(int(cropped_ctx - left_w), int(cropped_ctx + right_w))
y_slice = slice(int(cropped_cty - top_h), int(cropped_cty + bottom_h))
cropped_image[y_slice, x_slice, :] = sample['image'][y0:y1, x0:x1, :]
sample['image'] = cropped_image
sample['h'], sample['w'] = height, width
if self.is_train:
# crop detections
gt_bbox = sample['gt_bbox']
gt_bbox[:, 0:4:2] -= x0
gt_bbox[:, 1:4:2] -= y0
gt_bbox[:, 0:4:2] += cropped_ctx - left_w
gt_bbox[:, 1:4:2] += cropped_cty - top_h
else:
sample['borders'] = np.array(
[
cropped_cty - top_h, cropped_cty + bottom_h,
cropped_ctx - left_w, cropped_ctx + right_w
],
dtype=np.float32)
def apply_image(self, img1, img2, factor):
h = max(img1.shape[0], img2.shape[0])
w = max(img1.shape[1], img2.shape[1])
img = np.zeros((h, w, img1.shape[2]), 'float32')
img[:img1.shape[0], :img1.shape[1], :] = \
img1.astype('float32') * factor
img[:img2.shape[0], :img2.shape[1], :] += \
img2.astype('float32') * (1.0 - factor)
return img.astype('uint8')
def __call__(self, sample, context=None):
if not isinstance(sample, Sequence):
return sample
def _get_border(self, border, size):
i = 1
while size - border // i <= border // i:
i *= 2
return border // i
assert len(sample) == 2, 'mixup need two samples'
factor = np.random.beta(self.alpha, self.beta)
factor = max(0.0, min(1.0, factor))
if factor >= 1.0:
return sample[0]
if factor <= 0.0:
return sample[1]
im = self.apply_image(sample[0]['image'], sample[1]['image'], factor)
result = copy.deepcopy(sample[0])
result['image'] = im
# apply bbox and score
if 'gt_bbox' in sample[0]:
gt_bbox1 = sample[0]['gt_bbox']
gt_bbox2 = sample[1]['gt_bbox']
gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
result['gt_bbox'] = gt_bbox
if 'gt_class' in sample[0]:
gt_class1 = sample[0]['gt_class']
gt_class2 = sample[1]['gt_class']
gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
result['gt_class'] = gt_class
@register_op
class CornerRatio(BaseOperator):
"""
Ratio of output size to image size
Args:
input_size (int): the size of input size
output_size (int): the size of heatmap
"""
gt_score1 = np.ones_like(sample[0]['gt_class'])
gt_score2 = np.ones_like(sample[1]['gt_class'])
gt_score = np.concatenate(
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
result['gt_score'] = gt_score
if 'is_crowd' in sample[0]:
is_crowd1 = sample[0]['is_crowd']
is_crowd2 = sample[1]['is_crowd']
is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
result['is_crowd'] = is_crowd
if 'difficult' in sample[0]:
is_difficult1 = sample[0]['difficult']
is_difficult2 = sample[1]['difficult']
is_difficult = np.concatenate(
(is_difficult1, is_difficult2), axis=0)
result['difficult'] = is_difficult
def __init__(self, input_size=511, output_size=64):
super(CornerRatio, self).__init__()
self.input_size = input_size
self.output_size = output_size
return result
def __call__(self, sample):
scale = (self.input_size + 1) // self.output_size
out_height, out_width = (sample['h'] + 1) // scale, (
sample['w'] + 1) // scale
height_ratio = out_height / float(sample['h'])
width_ratio = out_width / float(sample['w'])
sample['ratios'] = np.array([height_ratio, width_ratio])
return sample
@register_op
class NormalizeBox(BaseOperator):
"""Transform the bounding box's coornidates to [0,1]."""
def __init__(self):
super(NormalizeBox, self).__init__()
@register_op
class RandomScaledCrop(BaseOperator):
"""Resize image and bbox based on long side (with optional random scaling),
then crop or pad image to target size.
Args:
target_dim (int): target size.
scale_range (list): random scale range.
interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
"""
def apply(self, sample, context):
im = sample['image']
gt_bbox = sample['gt_bbox']
height, width, _ = im.shape
for i in range(gt_bbox.shape[0]):
gt_bbox[i][0] = gt_bbox[i][0] / width
gt_bbox[i][1] = gt_bbox[i][1] / height
gt_bbox[i][2] = gt_bbox[i][2] / width
gt_bbox[i][3] = gt_bbox[i][3] / height
sample['gt_bbox'] = gt_bbox
def __init__(self,
target_dim=512,
scale_range=[.1, 2.],
interp=cv2.INTER_LINEAR):
super(RandomScaledCrop, self).__init__()
self.target_dim = target_dim
self.scale_range = scale_range
self.interp = interp
if 'gt_keypoint' in sample.keys():
gt_keypoint = sample['gt_keypoint']
def __call__(self, sample):
w = sample['w']
h = sample['h']
random_scale = np.random.uniform(*self.scale_range)
dim = self.target_dim
random_dim = int(dim * random_scale)
dim_max = max(h, w)
scale = random_dim / dim_max
resize_w = int(round(w * scale))
resize_h = int(round(h * scale))
offset_x = int(max(0, np.random.uniform(0., resize_w - dim)))
offset_y = int(max(0, np.random.uniform(0., resize_h - dim)))
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
scale_array = np.array([scale, scale] * 2, dtype=np.float32)
shift_array = np.array([offset_x, offset_y] * 2, dtype=np.float32)
boxes = sample['gt_bbox'] * scale_array - shift_array
boxes = np.clip(boxes, 0, dim - 1)
# filter boxes with no area
area = np.prod(boxes[..., 2:] - boxes[..., :2], axis=1)
valid = (area > 1.).nonzero()[0]
sample['gt_bbox'] = boxes[valid]
sample['gt_class'] = sample['gt_class'][valid]
for i in range(gt_keypoint.shape[1]):
if i % 2:
gt_keypoint[:, i] = gt_keypoint[:, i] / height
else:
gt_keypoint[:, i] = gt_keypoint[:, i] / width
sample['gt_keypoint'] = gt_keypoint
img = sample['image']
img = cv2.resize(img, (resize_w, resize_h), interpolation=self.interp)
img = np.array(img)
canvas = np.zeros((dim, dim, 3), dtype=img.dtype)
canvas[:min(dim, resize_h), :min(dim, resize_w), :] = img[
offset_y:offset_y + dim, offset_x:offset_x + dim, :]
sample['h'] = dim
sample['w'] = dim
sample['image'] = canvas
sample['im_info'] = [resize_h, resize_w, scale]
return sample
@register_op
class ResizeAndPad(BaseOperator):
"""Resize image and bbox, then pad image to target size.
Args:
target_dim (int): target size
interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
class BboxXYXY2XYWH(BaseOperator):
"""
Convert bbox XYXY format to XYWH format.
"""
def __init__(self, target_dim=512, interp=cv2.INTER_LINEAR):
super(ResizeAndPad, self).__init__()
self.target_dim = target_dim
self.interp = interp
def __init__(self):
super(BboxXYXY2XYWH, self).__init__()
def __call__(self, sample):
w = sample['w']
h = sample['h']
interp = self.interp
dim = self.target_dim
dim_max = max(h, w)
scale = self.target_dim / dim_max
resize_w = int(round(w * scale))
resize_h = int(round(h * scale))
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
scale_array = np.array([scale, scale] * 2, dtype=np.float32)
sample['gt_bbox'] = np.clip(sample['gt_bbox'] * scale_array, 0,
dim - 1)
img = sample['image']
img = cv2.resize(img, (resize_w, resize_h), interpolation=interp)
img = np.array(img)
canvas = np.zeros((dim, dim, 3), dtype=img.dtype)
canvas[:resize_h, :resize_w, :] = img
sample['h'] = dim
sample['w'] = dim
sample['image'] = canvas
sample['im_info'] = [resize_h, resize_w, scale]
def apply(self, sample, context=None):
assert 'gt_bbox' in sample
bbox = sample['gt_bbox']
bbox[:, 2:4] = bbox[:, 2:4] - bbox[:, :2]
bbox[:, :2] = bbox[:, :2] + bbox[:, 2:4] / 2.
sample['gt_bbox'] = bbox
return sample
@register_op
class TargetAssign(BaseOperator):
"""Assign regression target and labels.
class PadBox(BaseOperator):
def __init__(self, num_max_boxes=50):
"""
Pad zeros to bboxes if number of bboxes is less than num_max_boxes.
Args:
image_size (int or list): input image size, a single integer or list of
[h, w]. Default: 512
min_level (int): min level of the feature pyramid. Default: 3
max_level (int): max level of the feature pyramid. Default: 7
anchor_base_scale (int): base anchor scale. Default: 4
num_scales (int): number of anchor scales. Default: 3
aspect_ratios (list): aspect ratios.
Default: [(1, 1), (1.4, 0.7), (0.7, 1.4)]
match_threshold (float): threshold for foreground IoU. Default: 0.5
num_max_boxes (int): the max number of bboxes
"""
self.num_max_boxes = num_max_boxes
super(PadBox, self).__init__()
def __init__(self,
image_size=512,
min_level=3,
max_level=7,
anchor_base_scale=4,
num_scales=3,
aspect_ratios=[(1, 1), (1.4, 0.7), (0.7, 1.4)],
match_threshold=0.5):
super(TargetAssign, self).__init__()
assert image_size % 2 ** max_level == 0, \
"image size should be multiple of the max level stride"
self.image_size = image_size
self.min_level = min_level
self.max_level = max_level
self.anchor_base_scale = anchor_base_scale
self.num_scales = num_scales
self.aspect_ratios = aspect_ratios
self.match_threshold = match_threshold
@property
def anchors(self):
if not hasattr(self, '_anchors'):
anchor_grid = AnchorGrid(self.image_size, self.min_level,
self.max_level, self.anchor_base_scale,
self.num_scales, self.aspect_ratios)
self._anchors = np.concatenate(anchor_grid.generate())
return self._anchors
def iou_matrix(self, a, b):
tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
area_i = np.prod(br_i - tl_i, axis=2) * (tl_i < br_i).all(axis=2)
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
area_o = (area_a[:, np.newaxis] + area_b - area_i)
# return area_i / (area_o + 1e-10)
return np.where(area_i == 0., np.zeros_like(area_i), area_i / area_o)
def match(self, anchors, gt_boxes):
# XXX put smaller matrix first would be a little bit faster
mat = self.iou_matrix(gt_boxes, anchors)
max_anchor_for_each_gt = mat.argmax(axis=1)
max_for_each_anchor = mat.max(axis=0)
anchor_to_gt = mat.argmax(axis=0)
anchor_to_gt[max_for_each_anchor < self.match_threshold] = -1
# XXX ensure each gt has at least one anchor assigned,
# see `force_match_for_each_row` in TF implementation
one_hot = np.zeros_like(mat)
one_hot[np.arange(mat.shape[0]), max_anchor_for_each_gt] = 1.
max_anchor_indices = one_hot.sum(axis=0).nonzero()[0]
max_gt_indices = one_hot.argmax(axis=0)[max_anchor_indices]
anchor_to_gt[max_anchor_indices] = max_gt_indices
return anchor_to_gt
def encode(self, anchors, boxes):
wha = anchors[..., 2:] - anchors[..., :2] + 1
ca = anchors[..., :2] + wha * .5
whb = boxes[..., 2:] - boxes[..., :2] + 1
cb = boxes[..., :2] + whb * .5
offsets = np.empty_like(anchors)
offsets[..., :2] = (cb - ca) / wha
offsets[..., 2:] = np.log(whb / wha)
return offsets
def __call__(self, sample):
gt_boxes = sample['gt_bbox']
gt_labels = sample['gt_class']
labels = np.full((self.anchors.shape[0], 1), 0, dtype=np.int32)
targets = np.full((self.anchors.shape[0], 4), 0., dtype=np.float32)
sample['gt_label'] = labels
sample['gt_target'] = targets
if len(gt_boxes) < 1:
sample['fg_num'] = np.array(0, dtype=np.int32)
return sample
anchor_to_gt = self.match(self.anchors, gt_boxes)
matched_indices = (anchor_to_gt >= 0).nonzero()[0]
labels[matched_indices] = gt_labels[anchor_to_gt[matched_indices]]
matched_boxes = gt_boxes[anchor_to_gt[matched_indices]]
matched_anchors = self.anchors[matched_indices]
matched_targets = self.encode(matched_anchors, matched_boxes)
targets[matched_indices] = matched_targets
sample['fg_num'] = np.array(len(matched_targets), dtype=np.int32)
def apply(self, sample, context=None):
assert 'gt_bbox' in sample
bbox = sample['gt_bbox']
gt_num = min(self.num_max_boxes, len(bbox))
num_max = self.num_max_boxes
# fields = context['fields'] if context else []
pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
if gt_num > 0:
pad_bbox[:gt_num, :] = bbox[:gt_num, :]
sample['gt_bbox'] = pad_bbox
if 'gt_class' in sample:
pad_class = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_class[:gt_num] = sample['gt_class'][:gt_num, 0]
sample['gt_class'] = pad_class
if 'gt_score' in sample:
pad_score = np.zeros((num_max, ), dtype=np.float32)
if gt_num > 0:
pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
sample['gt_score'] = pad_score
# in training, for example in op ExpandImage,
# the bbox and gt_class is expandded, but the difficult is not,
# so, judging by it's length
if 'difficult' in sample:
pad_diff = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
sample['difficult'] = pad_diff
if 'is_crowd' in sample:
pad_crowd = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
sample['is_crowd'] = pad_crowd
return sample
......@@ -2443,7 +1701,7 @@ class DebugVisibleImage(BaseOperator):
if not isinstance(self.is_normalized, bool):
raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, sample):
def apply(self, sample, context=None):
image = Image.open(sample['im_file']).convert('RGB')
out_file_name = sample['im_file'].split('/')[-1]
width = sample['w']
......@@ -2485,9 +1743,177 @@ class DebugVisibleImage(BaseOperator):
x1 = round(keypoint[2 * j]).astype(np.int32)
y1 = round(keypoint[2 * j + 1]).astype(np.int32)
draw.ellipse(
(x1, y1, x1 + 5, y1i + 5),
fill='green',
outline='green')
(x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
save_path = os.path.join(self.output_dir, out_file_name)
image.save(save_path, quality=95)
return sample
@register_op
class Pad(BaseOperator):
def __init__(self,
size=None,
size_divisor=32,
pad_mode=0,
offsets=None,
fill_value=(127.5, 127.5, 127.5)):
"""
Pad image to a specified size or multiple of size_divisor. random target_size and interpolation method
Args:
size (int, Sequence): image target size, if None, pad to multiple of size_divisor, default None
size_divisor (int): size divisor, default 32
pad_mode (int): pad mode, currently only supports four modes [-1, 0, 1, 2]. if -1, use specified offsets
if 0, only pad to right and bottom. if 1, pad according to center. if 2, only pad left and top
fill_value (bool): rgb value of pad area, default (127.5, 127.5, 127.5)
"""
super(Pad, self).__init__()
if not isinstance(size, (int, Sequence)):
raise TypeError(
"Type of target_size is invalid when random_size is True. \
Must be List, now is {}".format(type(size)))
if isinstance(size, int):
size = [size, size]
assert pad_mode in [
-1, 0, 1, 2
], 'currently only supports four modes [-1, 0, 1, 2]'
assert pad_mode == -1 and offsets, 'if pad_mode is -1, offsets should not be None'
self.size = size
self.size_divisor = size_divisor
self.pad_mode = pad_mode
self.fill_value = fill_value
self.offsets = offsets
def apply_segm(self, segms, offsets, im_size, size):
def _expand_poly(poly, x, y):
expanded_poly = np.array(poly)
expanded_poly[0::2] += x
expanded_poly[1::2] += y
return expanded_poly.tolist()
def _expand_rle(rle, x, y, height, width, h, w):
if 'counts' in rle and type(rle['counts']) == list:
rle = mask_util.frPyObjects(rle, height, width)
mask = mask_util.decode(rle)
expanded_mask = np.full((h, w), 0).astype(mask.dtype)
expanded_mask[y:y + height, x:x + width] = mask
rle = mask_util.encode(
np.array(
expanded_mask, order='F', dtype=np.uint8))
return rle
x, y = offsets
height, width = im_size
h, w = size
expanded_segms = []
for segm in segms:
if is_poly(segm):
# Polygon format
expanded_segms.append(
[_expand_poly(poly, x, y) for poly in segm])
else:
# RLE format
import pycocotools.mask as mask_util
expanded_segms.append(
_expand_rle(segm, x, y, height, width, h, w))
return expanded_segms
def apply_bbox(self, bbox, offsets):
return bbox + np.array(offsets * 2, dtype=np.float32)
def apply_keypoint(self, keypoints, offsets):
n = len(keypoints[0]) // 2
return keypoints + np.array(offsets * n, dtype=np.float32)
def apply_image(self, image, offsets, im_size, size):
x, y = offsets
im_h, im_w = im_size
h, w = size
canvas = np.ones((h, w, 3), dtype=np.float32)
canvas *= np.array(self.fill_value, dtype=np.float32)
canvas[y:y + im_h, x:x + im_w, :] = image.astype(np.float32)
return canvas
def apply(self, sample, context=None):
im = sample['image']
im_h, im_w = im.shape[:2]
if self.size:
h, w = self.size
assert (
im_h < h and im_w < w
), '(h, w) of target size should be greater than (im_h, im_w)'
else:
h = np.ceil(im_h // self.size_divisor) * self.size_divisor
w = np.ceil(im_w / self.size_divisor) * self.size_divisor
if h == im_h and w == im_w:
return sample
if self.pad_mode == -1:
offset_x, offset_y = self.offsets
elif self.pad_mode == 0:
offset_y, offset_x = 0, 0
elif self.pad_mode == 1:
offset_y, offset_x = (h - im_h) // 2, (w - im_w) // 2
else:
offset_y, offset_x = h - im_h, w - im_w
offsets, im_size, size = [offset_x, offset_y], [im_h, im_w], [h, w]
sample['image'] = self.apply_image(im, offsets, im_size, size)
if self.pad_mode == 0:
return sample
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], offsets)
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_segm(sample['gt_poly'], offsets,
im_size, size)
if 'gt_keypoint' in sample and len(sample['gt_keypoint']) > 0:
sample['gt_keypoint'] = self.apply_keypoint(sample['gt_keypoint'],
offsets)
return sample
@register_op
class Poly2Mask(BaseOperator):
"""
gt poly to mask annotations
"""
def __init__(self):
super(Poly2Mask, self).__init__()
import pycocotools.mask as maskUtils
self.maskutils = maskUtils
def _poly2mask(self, mask_ann, img_h, img_w):
if isinstance(mask_ann, list):
# polygon -- a single object might consist of multiple parts
# we merge all parts into one mask rle code
rles = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
rle = self.maskutils.merge(rles)
elif isinstance(mask_ann['counts'], list):
# uncompressed RLE
rle = self.maskutils.frPyObjects(mask_ann, img_h, img_w)
else:
# rle
rle = mask_ann
mask = self.maskutils.decode(rle)
return mask
def apply(self, sample, context=None):
assert 'gt_poly' in sample
im_h = sample['h']
im_w = sample['w']
masks = [
self._poly2mask(gt_poly, im_h, im_w)
for gt_poly in sample['gt_poly']
]
sample['gt_segm'] = np.asarray(masks).astype(np.uint8)
return sample
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册