[PPYOLOE] fix oom eval in train (#5409)

d6ffa2b5 · shangliang Xu · GitHub · 0fe525af · d6ffa2b5 · d6ffa2b5
8 changed file
--- a/configs/ppyoloe/_base_/optimizer_300e.yml
+++ b/configs/ppyoloe/_base_/optimizer_300e.yml
 epoch: 300
 LearningRate:
-  base_lr: 0.03
+  base_lr: 0.025
  schedulers:
    - !CosineDecay
      max_epochs: 360
    - !LinearWarmup
-      start_factor: 0.001
+      start_factor: 0.
-      steps: 3000
+      epochs: 5
 OptimizerBuilder:
  optimizer:

--- a/configs/ppyoloe/_base_/ppyoloe_reader.yml
+++ b/configs/ppyoloe/_base_/ppyoloe_reader.yml
-worker_num: 8
+worker_num: 4
 TrainReader:
  sample_transforms:
    - Decode: {}
@@ -11,7 +11,7 @@ TrainReader:
    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
    - Permute: {}
    - PadGT: {}
-  batch_size: 24
+  batch_size: 20
  shuffle: true
  drop_last: true
  use_shared_memory: true
@@ -23,7 +23,7 @@ EvalReader:
    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
    - Permute: {}
-  batch_size: 4
+  batch_size: 2
 TestReader:
  inputs_def:

--- a/configs/ppyoloe/ppyoloe_crn_l_300e_coco.yml
+++ b/configs/ppyoloe/ppyoloe_crn_l_300e_coco.yml
@@ -9,7 +9,6 @@ _BASE_: [
 log_iter: 100
 snapshot_epoch: 10
 weights: output/ppyoloe_crn_l_300e_coco/model_final
-find_unused_parameters: True
 pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_l_pretrained.pdparams
 depth_mult: 1.0

--- a/configs/ppyoloe/ppyoloe_crn_m_300e_coco.yml
+++ b/configs/ppyoloe/ppyoloe_crn_m_300e_coco.yml
@@ -9,20 +9,13 @@ _BASE_: [
 log_iter: 100
 snapshot_epoch: 10
 weights: output/ppyoloe_crn_m_300e_coco/model_final
-find_unused_parameters: True
 pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_m_pretrained.pdparams
 depth_mult: 0.67
 width_mult: 0.75
 TrainReader:
-  batch_size: 32
+  batch_size: 28
 LearningRate:
-  base_lr: 0.04
+  base_lr: 0.035
-  schedulers:
-    - !CosineDecay
-      max_epochs: 360
-    - !LinearWarmup
-      start_factor: 0.001
-      steps: 2300
--- a/configs/ppyoloe/ppyoloe_crn_s_300e_coco.yml
+++ b/configs/ppyoloe/ppyoloe_crn_s_300e_coco.yml
@@ -9,7 +9,6 @@ _BASE_: [
 log_iter: 100
 snapshot_epoch: 10
 weights: output/ppyoloe_crn_s_300e_coco/model_final
-find_unused_parameters: True
 pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_s_pretrained.pdparams
 depth_mult: 0.33
@@ -20,9 +19,3 @@ TrainReader:
 LearningRate:
  base_lr: 0.04
-  schedulers:
-    - !CosineDecay
-      max_epochs: 360
-    - !LinearWarmup
-      start_factor: 0.001
-      steps: 2300
--- a/configs/ppyoloe/ppyoloe_crn_x_300e_coco.yml
+++ b/configs/ppyoloe/ppyoloe_crn_x_300e_coco.yml
@@ -9,7 +9,6 @@ _BASE_: [
 log_iter: 100
 snapshot_epoch: 10
 weights: output/ppyoloe_crn_x_300e_coco/model_final
-find_unused_parameters: True
 pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/CSPResNetb_x_pretrained.pdparams
 depth_mult: 1.33
@@ -20,9 +19,3 @@ TrainReader:
 LearningRate:
  base_lr: 0.02
-  schedulers:
-    - !CosineDecay
-      max_epochs: 360
-    - !LinearWarmup
-      start_factor: 0.001
-      steps: 4600
--- a/ppdet/modeling/assigners/atss_assigner.py
+++ b/ppdet/modeling/assigners/atss_assigner.py
@@ -183,9 +183,6 @@ class ATSSAssigner(nn.Layer):
                                         mask_positive)
            mask_positive_sum = mask_positive.sum(axis=-2)
        assigned_gt_index = mask_positive.argmax(axis=-2)
-        assert mask_positive_sum.max() == 1, \
-            ("one anchor just assign one gt, but received not equals 1. "
-             "Received: %f" % mask_positive_sum.max().item())
        # assigned target
        batch_ind = paddle.arange(

--- a/ppdet/modeling/assigners/task_aligned_assigner.py
+++ b/ppdet/modeling/assigners/task_aligned_assigner.py
@@ -129,9 +129,6 @@ class TaskAlignedAssigner(nn.Layer):
                                         mask_positive)
            mask_positive_sum = mask_positive.sum(axis=-2)
        assigned_gt_index = mask_positive.argmax(axis=-2)
-        assert mask_positive_sum.max() == 1, \
-            ("one anchor just assign one gt, but received not equals 1. "
-             "Received: %f" % mask_positive_sum.max().item())
        # assigned target
        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes