diff --git a/configs/mot/bytetrack/_base_/ppyoloe_mot_reader_640x640.yml b/configs/mot/bytetrack/_base_/ppyoloe_mot_reader_640x640.yml index c1e7ab8418956810ae6d2788cd4d67b9f2e17775..ef6342fd0e9249acf386b7795cb538b73a26f108 100644 --- a/configs/mot/bytetrack/_base_/ppyoloe_mot_reader_640x640.yml +++ b/configs/mot/bytetrack/_base_/ppyoloe_mot_reader_640x640.yml @@ -1,4 +1,8 @@ -worker_num: 8 +worker_num: 4 +eval_height: &eval_height 640 +eval_width: &eval_width 640 +eval_size: &eval_size [*eval_height, *eval_width] + TrainReader: sample_transforms: - Decode: {} @@ -20,17 +24,17 @@ TrainReader: EvalReader: sample_transforms: - Decode: {} - - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2} - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - Permute: {} batch_size: 8 TestReader: inputs_def: - image_shape: [3, 640, 640] + image_shape: [3, *eval_height, *eval_width] sample_transforms: - Decode: {} - - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2} - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - Permute: {} batch_size: 1 @@ -40,17 +44,17 @@ TestReader: EvalMOTReader: sample_transforms: - Decode: {} - - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2} - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - Permute: {} batch_size: 1 TestMOTReader: inputs_def: - image_shape: [3, 640, 640] + image_shape: [3, *eval_height, *eval_width] sample_transforms: - Decode: {} - - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2} - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - Permute: {} batch_size: 1 diff --git a/configs/mot/bytetrack/bytetrack_ppyoloe.yml b/configs/mot/bytetrack/bytetrack_ppyoloe.yml index 08b7a00d89b79ad1bd1e2753738f22fcc66e657c..5e7ffe07f0f758c641596e90ee0da4c31085fd85 100644 --- a/configs/mot/bytetrack/bytetrack_ppyoloe.yml +++ b/configs/mot/bytetrack/bytetrack_ppyoloe.yml @@ -8,7 +8,7 @@ weights: output/bytetrack_ppyoloe/model_final log_iter: 20 snapshot_epoch: 2 -metric: MOT # eval/infer mode +metric: MOT # eval/infer mode, set 'COCO' can be training mode num_classes: 1 architecture: ByteTrack @@ -33,7 +33,6 @@ PPYOLOEHead: grid_cell_offset: 0.5 static_assigner_epoch: -1 # 100 use_varifocal_loss: True - eval_input_size: [640, 640] loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5} static_assigner: name: ATSSAssigner diff --git a/configs/mot/bytetrack/bytetrack_ppyoloe_pplcnet.yml b/configs/mot/bytetrack/bytetrack_ppyoloe_pplcnet.yml index 98ea15f15299b9d550bc4de1a53fe203e7cd61fc..60f81165d5b324943a997dbc26fbe56f249f2ef6 100644 --- a/configs/mot/bytetrack/bytetrack_ppyoloe_pplcnet.yml +++ b/configs/mot/bytetrack/bytetrack_ppyoloe_pplcnet.yml @@ -33,7 +33,6 @@ PPYOLOEHead: grid_cell_offset: 0.5 static_assigner_epoch: -1 # 100 use_varifocal_loss: True - eval_input_size: [640, 640] loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5} static_assigner: name: ATSSAssigner diff --git a/configs/mot/bytetrack/detector/ppyoloe_crn_l_36e_640x640_mot17half.yml b/configs/mot/bytetrack/detector/ppyoloe_crn_l_36e_640x640_mot17half.yml index 89654f059e603eda24002dfec844f450bd73e8ff..6c770e9bf85e953a30df43faf57c401518b7f6ad 100644 --- a/configs/mot/bytetrack/detector/ppyoloe_crn_l_36e_640x640_mot17half.yml +++ b/configs/mot/bytetrack/detector/ppyoloe_crn_l_36e_640x640_mot17half.yml @@ -7,6 +7,7 @@ weights: output/ppyoloe_crn_l_36e_640x640_mot17half/model_final log_iter: 20 snapshot_epoch: 2 + # schedule configuration for fine-tuning epoch: 36 LearningRate: @@ -16,7 +17,7 @@ LearningRate: max_epochs: 43 - !LinearWarmup start_factor: 0.001 - steps: 100 + epochs: 1 OptimizerBuilder: optimizer: @@ -26,9 +27,11 @@ OptimizerBuilder: factor: 0.0005 type: L2 + TrainReader: batch_size: 8 + # detector configuration architecture: YOLOv3 norm_type: sync_bn @@ -63,7 +66,6 @@ PPYOLOEHead: grid_cell_offset: 0.5 static_assigner_epoch: -1 # 100 use_varifocal_loss: True - eval_input_size: [640, 640] loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5} static_assigner: name: ATSSAssigner diff --git a/configs/ppyoloe/_base_/optimizer_36e_xpu.yml b/configs/ppyoloe/_base_/optimizer_36e_xpu.yml new file mode 100644 index 0000000000000000000000000000000000000000..59d76f4bae98e4774c2cee9cbc8c77ac341af35d --- /dev/null +++ b/configs/ppyoloe/_base_/optimizer_36e_xpu.yml @@ -0,0 +1,18 @@ +epoch: 36 + +LearningRate: + base_lr: 0.00125 + schedulers: + - !CosineDecay + max_epochs: 43 + - !LinearWarmup + start_factor: 0.001 + steps: 2000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 diff --git a/configs/ppyoloe/ppyoloe_crn_l_36e_coco_xpu.yml b/configs/ppyoloe/ppyoloe_crn_l_36e_coco_xpu.yml new file mode 100644 index 0000000000000000000000000000000000000000..5b4d644fe923d5ea84e26e1edbddd150a14bae1c --- /dev/null +++ b/configs/ppyoloe/ppyoloe_crn_l_36e_coco_xpu.yml @@ -0,0 +1,69 @@ +_BASE_: [ + '../datasets/coco_detection.yml', + '../runtime.yml', + './_base_/optimizer_36e_xpu.yml', + './_base_/ppyoloe_reader.yml', +] + +# note: these are default values (use_gpu = true and use_xpu = false) for CI. +# set use_gpu = false and use_xpu = true for training. +use_gpu: true +use_xpu: false + +log_iter: 100 +snapshot_epoch: 1 +weights: output/ppyoloe_crn_l_36e_coco/model_final +find_unused_parameters: True + +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_l_pretrained.pdparams +depth_mult: 1.0 +width_mult: 1.0 + +TrainReader: + batch_size: 8 + +architecture: YOLOv3 +norm_type: sync_bn +use_ema: true +ema_decay: 0.9998 + +YOLOv3: + backbone: CSPResNet + neck: CustomCSPPAN + yolo_head: PPYOLOEHead + post_process: ~ + +CSPResNet: + layers: [3, 6, 6, 3] + channels: [64, 128, 256, 512, 1024] + return_idx: [1, 2, 3] + use_large_stem: True + +CustomCSPPAN: + out_channels: [768, 384, 192] + stage_num: 1 + block_num: 3 + act: 'swish' + spp: true + +PPYOLOEHead: + fpn_strides: [32, 16, 8] + grid_cell_scale: 5.0 + grid_cell_offset: 0.5 + static_assigner_epoch: 4 + use_varifocal_loss: True + loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5} + static_assigner: + name: ATSSAssigner + topk: 9 + assigner: + name: TaskAlignedAssigner + topk: 13 + alpha: 1.0 + beta: 6.0 + nms: + name: MultiClassNMS + nms_top_k: 1000 + keep_top_k: 100 + score_threshold: 0.01 + nms_threshold: 0.6