diff --git a/configs/mot/fairmot/_base_/fairmot_reader_1088x608.yml b/configs/mot/fairmot/_base_/fairmot_reader_1088x608.yml index 43170363d0fe550840d71a47382a9f8c4cb0f5ed..03834508c21e0709d0f102011a71d770eb194390 100644 --- a/configs/mot/fairmot/_base_/fairmot_reader_1088x608.yml +++ b/configs/mot/fairmot/_base_/fairmot_reader_1088x608.yml @@ -3,15 +3,17 @@ TrainReader: inputs_def: image_shape: [3, 608, 1088] sample_transforms: - - Decode: {to_rgb: False} - - AugmentHSV: {is_bgr: True} + - Decode: {} + - RGBReverse: {} + - AugmentHSV: {} - LetterBoxResize: {target_size: [608, 1088]} - MOTRandomAffine: {reject_outside: False} - RandomFlip: {} - BboxXYXY2XYWH: {} - NormalizeBox: {} - NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1]} - - Permute: {to_rgb: True} + - RGBReverse: {} + - Permute: {} batch_transforms: - Gt2FairMOTTarget: {} batch_size: 6 @@ -23,10 +25,10 @@ EvalMOTReader: inputs_def: image_shape: [3, 608, 1088] sample_transforms: - - Decode: {to_rgb: False} + - Decode: {} - LetterBoxResize: {target_size: [608, 1088]} - NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1]} - - Permute: {to_rgb: True} + - Permute: {} batch_size: 1 @@ -36,5 +38,5 @@ TestMOTReader: sample_transforms: - LetterBoxResize: {target_size: [608, 1088]} - NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1]} - - Permute: {to_rgb: True} + - Permute: {} batch_size: 1 diff --git a/configs/mot/jde/_base_/jde_reader_1088x608.yml b/configs/mot/jde/_base_/jde_reader_1088x608.yml index 0d05af3aaacbe74905d4ba49c917ae3fa324cddf..41709b9bcc76cae674a0773b98245232fa4a8b88 100644 --- a/configs/mot/jde/_base_/jde_reader_1088x608.yml +++ b/configs/mot/jde/_base_/jde_reader_1088x608.yml @@ -2,6 +2,7 @@ worker_num: 2 TrainReader: sample_transforms: - Decode: {} + - RGBReverse: {} - AugmentHSV: {} - LetterBoxResize: {target_size: [608, 1088]} - MOTRandomAffine: {} @@ -9,6 +10,7 @@ TrainReader: - BboxXYXY2XYWH: {} - NormalizeBox: {} - NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True} + - RGBReverse: {} - Permute: {} batch_transforms: - Gt2JDETargetThres: diff --git a/configs/mot/jde/_base_/jde_reader_576x320.yml b/configs/mot/jde/_base_/jde_reader_576x320.yml index 0a137fe04eb1b4b2400554cc3d64b70795048489..4b204f7ba80f170a00ed4c74d8960418ddaa44fa 100644 --- a/configs/mot/jde/_base_/jde_reader_576x320.yml +++ b/configs/mot/jde/_base_/jde_reader_576x320.yml @@ -2,6 +2,7 @@ worker_num: 2 TrainReader: sample_transforms: - Decode: {} + - RGBReverse: {} - AugmentHSV: {} - LetterBoxResize: {target_size: [320, 576]} - MOTRandomAffine: {} @@ -9,11 +10,12 @@ TrainReader: - BboxXYXY2XYWH: {} - NormalizeBox: {} - NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True} + - RGBReverse: {} - Permute: {} batch_transforms: - Gt2JDETargetThres: anchor_masks: [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]] - anchors: [[[85,255], [120,320], [170,320], [340,320]], + anchors: [[[85,255], [120,360], [170,420], [340,420]], [[21,64], [30,90], [43,128], [60,180]], [[6,16], [8,23], [11,32], [16,45]]] downsample_ratios: [32, 16, 8] diff --git a/configs/mot/jde/_base_/jde_reader_864x480.yml b/configs/mot/jde/_base_/jde_reader_864x480.yml index 6083a99c57c464f84aeb545bede1a3e81731730c..2f6b822ac0cda560285c9631be72f858738b78d2 100644 --- a/configs/mot/jde/_base_/jde_reader_864x480.yml +++ b/configs/mot/jde/_base_/jde_reader_864x480.yml @@ -2,6 +2,7 @@ worker_num: 2 TrainReader: sample_transforms: - Decode: {} + - RGBReverse: {} - AugmentHSV: {} - LetterBoxResize: {target_size: [480, 864]} - MOTRandomAffine: {} @@ -9,6 +10,7 @@ TrainReader: - BboxXYXY2XYWH: {} - NormalizeBox: {} - NormalizeImage: {mean: [0, 0, 0], std: [1, 1, 1], is_scale: True} + - RGBReverse: {} - Permute: {} batch_transforms: - Gt2JDETargetThres: diff --git a/configs/mot/jde/jde_darknet53_30e_576x320.yml b/configs/mot/jde/jde_darknet53_30e_576x320.yml index 23ba22efb39d6b26580edaa28abe95ae4208540b..7cee1aafc867d8352d7297dcc6d3bbee59e463f0 100644 --- a/configs/mot/jde/jde_darknet53_30e_576x320.yml +++ b/configs/mot/jde/jde_darknet53_30e_576x320.yml @@ -20,7 +20,7 @@ YOLOv3: for_mot: True YOLOv3Head: - anchors: [[85,255], [120,320], [170,320], [340,320], + anchors: [[85,255], [120,360], [170,420], [340,420], [21,64], [30,90], [43,128], [60,180], [6,16], [8,23], [11,32], [16,45]] anchor_masks: [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]] diff --git a/ppdet/data/transform/mot_operators.py b/ppdet/data/transform/mot_operators.py index 4944ec6fa4b1e1652f794da7cf218d81aa103c6d..e85805743c6c5b903a2c5cd3d30a937298289ba2 100644 --- a/ppdet/data/transform/mot_operators.py +++ b/ppdet/data/transform/mot_operators.py @@ -36,11 +36,25 @@ from ppdet.utils.logger import setup_logger logger = setup_logger(__name__) __all__ = [ - 'LetterBoxResize', 'MOTRandomAffine', 'Gt2JDETargetThres', + 'RGBReverse', 'LetterBoxResize', 'MOTRandomAffine', 'Gt2JDETargetThres', 'Gt2JDETargetMax', 'Gt2FairMOTTarget' ] +@register_op +class RGBReverse(BaseOperator): + """RGB to BGR, or BGR to RGB, sensitive to MOTRandomAffine + """ + + def __init__(self): + super(RGBReverse, self).__init__() + + def apply(self, sample, context=None): + im = sample['image'] + sample['image'] = np.ascontiguousarray(im[:, :, ::-1]) + return sample + + @register_op class LetterBoxResize(BaseOperator): def __init__(self, target_size): diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py index f500bad8b1a15bc425c37ff9ec6fe5c212802631..ebf9f24cac76cb216a587ce4b4fa90fd7db3673a 100644 --- a/ppdet/data/transform/operators.py +++ b/ppdet/data/transform/operators.py @@ -107,12 +107,10 @@ class BaseOperator(object): @register_op class Decode(BaseOperator): - def __init__(self, to_rgb=True): + def __init__(self): """ Transform the image data to numpy format following the rgb format """ super(Decode, self).__init__() - # TODO: remove this parameter - self.to_rgb = to_rgb def apply(self, sample, context=None): """ load image if 'im_file' field is not empty but 'image' is""" @@ -126,8 +124,7 @@ class Decode(BaseOperator): im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode if 'keep_ori_im' in sample and sample['keep_ori_im']: sample['ori_image'] = im - if self.to_rgb: - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) sample['image'] = im if 'h' not in sample: @@ -154,18 +151,14 @@ class Decode(BaseOperator): @register_op class Permute(BaseOperator): - def __init__(self, to_rgb=False): + def __init__(self): """ Change the channel to be (C, H, W) """ super(Permute, self).__init__() - # TODO: remove this parameter - self.to_rgb = to_rgb def apply(self, sample, context=None): im = sample['image'] - if self.to_rgb: - im = np.ascontiguousarray(im[:, :, ::-1]) im = im.transpose((2, 0, 1)) sample['image'] = im return sample