diff --git a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_224.yaml b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_224.yaml index 4655e02b3afb558177b9989276c1d25166a2f4d7..a7697840ea8f1f29bed5d9e2c2226ea18f4e421d 100644 --- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_224.yaml +++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_224.yaml @@ -42,11 +42,12 @@ Optimizer: no_weight_decay_name: pos_embed cls_token .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1.25e-4 - eta_min: 1.25e-6 + learning_rate: 2.5e-4 + eta_min: 2.5e-6 warmup_epoch: 20 - warmup_start_lr: 1.25e-7 + warmup_start_lr: 2.5e-7 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_384.yaml b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_384.yaml index 1e6b1f79f5a86be481920c659d7344d2c0ce182a..a7100289c06b94f211dd3fea6cd0b8f2548b8244 100644 --- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_384.yaml +++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_base_384.yaml @@ -42,11 +42,12 @@ Optimizer: no_weight_decay_name: pos_embed cls_token .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 6.25e-5 - eta_min: 6.25e-7 + learning_rate: 1.25e-4 + eta_min: 1.25e-6 warmup_epoch: 20 - warmup_start_lr: 6.25e-8 + warmup_start_lr: 1.25e-7 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_224.yaml b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_224.yaml index ddeacadf0cf93bb2941819a218d76fa8eeba92b2..7c96343df5d00f8c7ceeba0f1dabec8ecccdbc57 100644 --- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_224.yaml +++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_224.yaml @@ -42,11 +42,12 @@ Optimizer: no_weight_decay_name: pos_embed cls_token .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1.25e-4 - eta_min: 1.25e-6 + learning_rate: 2.5e-4 + eta_min: 2.5e-6 warmup_epoch: 20 - warmup_start_lr: 1.25e-7 + warmup_start_lr: 2.5e-7 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_384.yaml b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_384.yaml index ab477ef2e7ba4613e0e292c8279c1e077e9dedee..4b682fec60569cd16e6cf81d093d016761f37615 100644 --- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_384.yaml +++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_large_384.yaml @@ -42,11 +42,12 @@ Optimizer: no_weight_decay_name: pos_embed cls_token .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 3.125e-5 - eta_min: 3.125e-7 + learning_rate: 6.25e-5 + eta_min: 6.25e-7 warmup_epoch: 20 - warmup_start_lr: 3.125e-8 + warmup_start_lr: 6.25e-8 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_small_224.yaml b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_small_224.yaml index ec3c5a1452db3b2b224bf888b1015cab10374462..a191f4160fd2922974a14c379a8010f90a698b6c 100644 --- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_small_224.yaml +++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_small_224.yaml @@ -42,11 +42,12 @@ Optimizer: no_weight_decay_name: pos_embed cls_token .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 2.5e-4 - eta_min: 2.5e-6 + learning_rate: 5e-4 + eta_min: 5e-6 warmup_epoch: 20 - warmup_start_lr: 2.5e-7 + warmup_start_lr: 5e-7 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml index 3e3f9252578e8b3e06d7d858e303fdd39eeded25..3a2be2837891639604cbea15b7179adfb5cda0c7 100644 --- a/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml +++ b/ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml @@ -42,11 +42,12 @@ Optimizer: no_weight_decay_name: pos_embed cls_token .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml index 979a04a381c67debd81ffc6a509664de8cd71ccd..8c3cc4c3413729e2a5530de23105619ae76e0a15 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_224.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml index 859f57d72b0ee1261e3d06bb1c8dec6e601faada..0b8c2e808c94f09f9a6283a6baa605bd7ac45d85 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_distilled_patch16_384.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml index 3cdd10202ee8275eeb9d1d1535e414007559fd6d..938916caa3937b2f1ef556e98c3adf9313b4c7b9 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_224.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml index 88a8fbae9394d81438f96a99cc9ac9da243cfba8..4cbe6ffded134bbe52656879b5105f3676c44b64 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_base_patch16_384.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml index 54d962e689b95d78c56b7c0bd8a0fd44a379364f..d5ba0cee78de68ecccb35ffcbf099ddbdad3271d 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_small_distilled_patch16_224.yaml @@ -41,10 +41,10 @@ Optimizer: one_dim_param_no_weight_decay: True lr: name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml index 05c3ac1f36ad79ffa141eb86d4fceabbe28da98b..a167c896e8be7746d9896cfcd9d3d3a8e7671908 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_small_patch16_224.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml index f6661761391d65c35152ee222eb7b6cf3273de47..319e17025d758eadce16001863312f773410104c 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_distilled_patch16_224.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml index 647050a77b181f0ac5ccbf473130415d6b957513..1234d79b6ba68186466edd7c2d1ea4f6bc61eba9 100644 --- a/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml +++ b/ppcls/configs/ImageNet/DeiT/DeiT_tiny_patch16_224.yaml @@ -40,11 +40,12 @@ Optimizer: no_weight_decay_name: norm cls_token pos_embed dist_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 1e-3 - eta_min: 1e-5 + learning_rate: 2e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval DataLoader: diff --git a/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml b/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml index a7265b066e1c526fbb63f59993ff68bb4ae09d8a..b230f11cbde78e195355d00a7b042b0d9e6a4026 100644 --- a/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml +++ b/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml @@ -49,9 +49,8 @@ Loss: model_name_pairs: - ["Student", "Teacher"] Eval: - - DistillationGTCELoss: + - CELoss: weight: 1.0 - model_names: ["Student"] Optimizer: diff --git a/ppcls/configs/ImageNet/Distillation/resnet34_distill_resnet18_afd.yaml b/ppcls/configs/ImageNet/Distillation/resnet34_distill_resnet18_afd.yaml index e5b8b716222316c0fca80a69154b0c937e6c52da..000cb9add132c0231d72d47e2947d4397c380d61 100644 --- a/ppcls/configs/ImageNet/Distillation/resnet34_distill_resnet18_afd.yaml +++ b/ppcls/configs/ImageNet/Distillation/resnet34_distill_resnet18_afd.yaml @@ -88,10 +88,8 @@ Loss: s_shapes: *s_shapes t_shapes: *t_shapes Eval: - - DistillationGTCELoss: + - CELoss: weight: 1.0 - model_names: ["Student"] - Optimizer: name: Momentum diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml index 6c0854cb4a3aed82f8e8897d26e5ad964ceb9cc9..27fc20b99961b29e9ddbcb58363b495f199b8aec 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B0.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml index 42134c74006042a81a1bd4c9181105034fa56ae0..20fa39773f66d0ffff2786a031f86156c5fc5c41 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B1.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml index 4d0d5a432dba117fcffc98b2c3f32afcbc7a0969..cda94496e34747468ac3dfe0b474478c0d30cae6 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml index a5feb260bfdb570a8b0ae5791b74697051546e61..2d48178f05c19bcca34da95087129893fc574bd0 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml index be300aca61832f9886daccb6786af770cc5fcb46..581a7060549607248f967e54b60956722bcb4be0 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B3.yaml @@ -44,11 +44,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml index b6a89533949e472ffea26e2b18ae1ea174288afa..92da84d1ef00430a1aab66fb33f84e8016ab2c0c 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B4.yaml @@ -44,11 +44,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml index 9d36b28078ff2472913da4cc3101ad39779d6a3d..4bb2449a40bdd605b7d36359437b39324e5b1772 100644 --- a/ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml +++ b/ppcls/configs/ImageNet/PVTV2/PVT_V2_B5.yaml @@ -44,11 +44,12 @@ Optimizer: no_weight_decay_name: pos_embed1 pos_embed2 pos_embed3 pos_embed4 cls_token one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 5e-6 + learning_rate: 1e-3 + eta_min: 1e-5 warmup_epoch: 20 - warmup_start_lr: 5e-7 + warmup_start_lr: 1e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml index 4dd0ac4cfbe3840e39ca46b67b539c3c1a9cf146..afc3fdcd263de26e4864ecafbe46db7afd575ddf 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window12_384.yaml @@ -41,11 +41,12 @@ Optimizer: no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 20 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml index a42dea1f94b2c34b2d34497861cc8daf6ec757b1..4920fae6c4bab15f16d908d112a618da42aa9b35 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_base_patch4_window7_224.yaml @@ -41,11 +41,12 @@ Optimizer: no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 20 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml index 36b5e5e38ecc2b70fc82b93dfd8a761c3e40d0b0..a6dd74267eaab84d919ff47d979d4ed863520ff8 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window12_384.yaml @@ -41,11 +41,12 @@ Optimizer: no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 20 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml index 96a9befd2026b70354dc20682d143c72b5660169..564da72f1fd4dc88b7161d00259a346a25b38c42 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_large_patch4_window7_224.yaml @@ -41,11 +41,12 @@ Optimizer: no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 20 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml index ffbbcf080b025527035fee353c4ddc6a212c294e..ba42f1efb8460581445e7b5a605971ec64bb0851 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_small_patch4_window7_224.yaml @@ -41,11 +41,12 @@ Optimizer: no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 20 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml index 066db715da45de4eb85ff4f3c2406be50be99e7e..26fa0ba61ed159ad458f9b0c21e03aa4fcd7f02e 100644 --- a/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml +++ b/ppcls/configs/ImageNet/SwinTransformer/SwinTransformer_tiny_patch4_window7_224.yaml @@ -41,11 +41,12 @@ Optimizer: no_weight_decay_name: absolute_pos_embed relative_position_bias_table .bias norm one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 20 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml b/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml index 74c402ee7cdddba2234ec14f9388f83392c465e0..36e5b086dc43376fe6424ca67eb02b93ac6ce9a4 100644 --- a/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml +++ b/ppcls/configs/ImageNet/Twins/alt_gvt_base.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml b/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml index ca66e9a33e81c87c7e119094069e36fed44bd608..6e19d64618fa6d1dbb5608558592760f6ad61cb7 100644 --- a/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml +++ b/ppcls/configs/ImageNet/Twins/alt_gvt_large.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml b/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml index 9e97c0f9994da983a5951d784850096498549ced..66235960a5c6e3fb4c255da86031214856ea761f 100644 --- a/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml +++ b/ppcls/configs/ImageNet/Twins/alt_gvt_small.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml b/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml index 7831e9289570a62a60dd72945b2bdf842f9d8f09..96745495a0926bc2767a309e0c3bd71cf4201c0b 100644 --- a/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml +++ b/ppcls/configs/ImageNet/Twins/pcpvt_base.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml b/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml index 8e160b3c2100a7a6c2666df89e08a67e93d589ad..ca4baf942a4c0763ee03f4631030a9d2a0752e1d 100644 --- a/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml +++ b/ppcls/configs/ImageNet/Twins/pcpvt_large.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml b/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml index 582382d4d0fa38c89db337b0cc7acbcb79a9a544..a5e5f7e0564c9d6c96b51cbda0ff7312a899d6b0 100644 --- a/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml +++ b/ppcls/configs/ImageNet/Twins/pcpvt_small.yaml @@ -43,11 +43,12 @@ Optimizer: no_weight_decay_name: norm cls_token proj.0.weight proj.1.weight proj.2.weight proj.3.weight pos_block one_dim_param_no_weight_decay: True lr: + # for 8 cards name: Cosine - learning_rate: 5e-4 - eta_min: 1e-5 + learning_rate: 1e-3 + eta_min: 2e-5 warmup_epoch: 5 - warmup_start_lr: 1e-6 + warmup_start_lr: 2e-6 # data loader for train and eval diff --git a/ppcls/engine/engine.py b/ppcls/engine/engine.py index 05151a1bb0fcbcf2cf2ab4630fea63a97bdcb1f1..ca851c6268d892738db44d5f729bb3c15ba91de0 100644 --- a/ppcls/engine/engine.py +++ b/ppcls/engine/engine.py @@ -262,12 +262,17 @@ class Engine(object): self.model_ema = ExponentialMovingAverage( self.model, self.config['EMA'].get("decay", 0.9999)) - # for distributed + # check the gpu num world_size = dist.get_world_size() self.config["Global"]["distributed"] = world_size != 1 - if world_size != 4 and self.mode == "train": - msg = f"The training strategy in config files provided by PaddleClas is based on 4 gpus. But the number of gpus is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use config files in PaddleClas to train." - logger.warning(msg) + if self.mode == "train": + std_gpu_num = 8 if self.config["Optimizer"][ + "name"] == "AdamW" else 4 + if world_size != std_gpu_num: + msg = f"The training strategy provided by PaddleClas is based on {std_gpu_num} gpus. But the number of gpu is {world_size} in current training. Please modify the stategy (learning rate, batch size and so on) if use this config to train." + logger.warning(msg) + + # for distributed if self.config["Global"]["distributed"]: dist.init_parallel_env() self.model = paddle.DataParallel(self.model) diff --git a/ppcls/engine/evaluation/classification.py b/ppcls/engine/evaluation/classification.py index 6e7fc1a76fe8c3bc4402d9428d372b9c2b50a17b..f4c90a393f5043575c5e49f16fd5b220c881e0fc 100644 --- a/ppcls/engine/evaluation/classification.py +++ b/ppcls/engine/evaluation/classification.py @@ -80,22 +80,17 @@ def classification_eval(engine, epoch_id=0): current_samples = batch_size * paddle.distributed.get_world_size() accum_samples += current_samples + if isinstance(out, dict) and "Student" in out: + out = out["Student"] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + # gather Tensor when distributed if paddle.distributed.get_world_size() > 1: label_list = [] paddle.distributed.all_gather(label_list, batch[1]) labels = paddle.concat(label_list, 0) - if isinstance(out, dict): - if "Student" in out: - out = out["Student"] - if isinstance(out, dict): - out = out["logits"] - elif "logits" in out: - out = out["logits"] - else: - msg = "Error: Wrong key in out!" - raise Exception(msg) if isinstance(out, list): preds = [] for x in out: diff --git a/ppcls/loss/deephashloss.py b/ppcls/loss/deephashloss.py index 959fd11ada6bf2812c2cd028926ab1e9a469d688..7dda519a871cc0460160ea4411c286ac5c25d6ad 100644 --- a/ppcls/loss/deephashloss.py +++ b/ppcls/loss/deephashloss.py @@ -20,6 +20,7 @@ class DSHSDLoss(nn.Layer): """ # DSHSD(IEEE ACCESS 2019) # paper [Deep Supervised Hashing Based on Stable Distribution](https://ieeexplore.ieee.org/document/8648432/) + # code reference: https://github.com/swuxyj/DeepHash-pytorch/blob/master/DSHSD.py """ def __init__(self, alpha, multi_label=False): @@ -62,6 +63,7 @@ class DSHSDLoss(nn.Layer): class LCDSHLoss(nn.Layer): """ # paper [Locality-Constrained Deep Supervised Hashing for Image Retrieval](https://www.ijcai.org/Proceedings/2017/0499.pdf) + # code reference: https://github.com/swuxyj/DeepHash-pytorch/blob/master/LCDSH.py """ def __init__(self, n_class, _lambda): @@ -100,6 +102,7 @@ class DCHLoss(paddle.nn.Layer): """ # paper [Deep Cauchy Hashing for Hamming Space Retrieval] URL:(http://ise.thss.tsinghua.edu.cn/~mlong/doc/deep-cauchy-hashing-cvpr18.pdf) + # code reference: https://github.com/swuxyj/DeepHash-pytorch/blob/master/DCH.py """ def __init__(self, gamma, _lambda, n_class): diff --git a/ppcls/loss/emlloss.py b/ppcls/loss/emlloss.py index 973570389ac08e11b47449fbefbaa9e5e8e33c83..38b707fe1a4eb9ed6d130a3eb9bc4f8762d4c189 100644 --- a/ppcls/loss/emlloss.py +++ b/ppcls/loss/emlloss.py @@ -23,6 +23,11 @@ from .comfunc import rerange_index class EmlLoss(paddle.nn.Layer): + """Ensemble Metric Learning Loss + paper: [Large Scale Strongly Supervised Ensemble Metric Learning, with Applications to Face Verification and Retrieval](https://arxiv.org/pdf/1212.6094.pdf) + code reference: https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/metric_learning/losses/emlloss.py + """ + def __init__(self, batch_size=40, samples_each_class=2): super(EmlLoss, self).__init__() assert (batch_size % samples_each_class == 0) diff --git a/ppcls/loss/googlenetloss.py b/ppcls/loss/googlenetloss.py index c580aa61701be6f5b6be43ce52a31be363b40d95..491311831acf90e11474f0a82713ef096221eb7f 100644 --- a/ppcls/loss/googlenetloss.py +++ b/ppcls/loss/googlenetloss.py @@ -18,11 +18,13 @@ import paddle.nn.functional as F class GoogLeNetLoss(nn.Layer): """ Cross entropy loss used after googlenet + reference paper: [https://arxiv.org/pdf/1409.4842v1.pdf](Going Deeper with Convolutions) """ + def __init__(self, epsilon=None): super().__init__() - assert (epsilon is None or epsilon <= 0 or epsilon >= 1), "googlenet is not support label_smooth" - + assert (epsilon is None or epsilon <= 0 or + epsilon >= 1), "googlenet is not support label_smooth" def forward(self, inputs, label): input0, input1, input2 = inputs diff --git a/ppcls/loss/msmloss.py b/ppcls/loss/msmloss.py index 3aa0dd8bfb0cdc6f558ff9891f0e0000ef183fae..adf03ef8e03c942fd1f2635704b9929e439dc3f5 100644 --- a/ppcls/loss/msmloss.py +++ b/ppcls/loss/msmloss.py @@ -21,10 +21,12 @@ from .comfunc import rerange_index class MSMLoss(paddle.nn.Layer): """ - MSMLoss Loss, based on triplet loss. USE P * K samples. + paper : [Margin Sample Mining Loss: A Deep Learning Based Method for Person Re-identification](https://arxiv.org/pdf/1710.00478.pdf) + code reference: https://github.com/michuanhaohao/keras_reid/blob/master/reid_tripletcls.py + Margin Sample Mining Loss, based on triplet loss. USE P * K samples. the batch size is fixed. Batch_size = P * K; but the K may vary between batches. same label gather together - + supported_metrics = [ 'euclidean', 'sqeuclidean', @@ -41,7 +43,7 @@ class MSMLoss(paddle.nn.Layer): self.rerange_index = rerange_index(batch_size, samples_each_class) def forward(self, input, target=None): - #normalization + #normalization features = input["features"] features = self._nomalize(features) samples_each_class = self.samples_each_class @@ -53,7 +55,7 @@ class MSMLoss(paddle.nn.Layer): features, axis=0) similary_matrix = paddle.sum(paddle.square(diffs), axis=-1) - #rerange + #rerange tmp = paddle.reshape(similary_matrix, shape=[-1, 1]) tmp = paddle.gather(tmp, index=rerange_index) similary_matrix = paddle.reshape(tmp, shape=[-1, self.batch_size]) diff --git a/ppcls/loss/npairsloss.py b/ppcls/loss/npairsloss.py index d4b359e88119a735442858cb8dbe3fa255add09a..131c799a48abb9507cfe7ae16dd2aa34bf8c8f25 100644 --- a/ppcls/loss/npairsloss.py +++ b/ppcls/loss/npairsloss.py @@ -5,6 +5,11 @@ import paddle class NpairsLoss(paddle.nn.Layer): + """Npair_loss_ + paper [Improved deep metric learning with multi-class N-pair loss objective](https://dl.acm.org/doi/10.5555/3157096.3157304) + code reference: https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/contrib/losses/metric_learning/npairs_loss + """ + def __init__(self, reg_lambda=0.01): super(NpairsLoss, self).__init__() self.reg_lambda = reg_lambda diff --git a/ppcls/loss/pairwisecosface.py b/ppcls/loss/pairwisecosface.py index beb806863bd171635452fd67993cc56404efe0b6..7f146dea5454b90e08a4790b2d95d4624c76bb0d 100644 --- a/ppcls/loss/pairwisecosface.py +++ b/ppcls/loss/pairwisecosface.py @@ -23,6 +23,11 @@ import paddle.nn.functional as F class PairwiseCosface(nn.Layer): + """ + paper: Circle Loss: A Unified Perspective of Pair Similarity Optimization + code reference: https://github.com/leoluopy/circle-loss-demonstration/blob/main/circle_loss.py + """ + def __init__(self, margin, gamma): super(PairwiseCosface, self).__init__() self.margin = margin @@ -36,8 +41,10 @@ class PairwiseCosface(nn.Layer): dist_mat = paddle.matmul(embedding, embedding, transpose_y=True) N = dist_mat.shape[0] - is_pos = targets.reshape([N,1]).expand([N,N]).equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float') - is_neg = targets.reshape([N,1]).expand([N,N]).not_equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float') + is_pos = targets.reshape([N, 1]).expand([N, N]).equal( + paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float') + is_neg = targets.reshape([N, 1]).expand([N, N]).not_equal( + paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float') # Mask scores related to itself is_pos = is_pos - paddle.eye(N, N) @@ -46,10 +53,12 @@ class PairwiseCosface(nn.Layer): s_n = dist_mat * is_neg logit_p = -self.gamma * s_p + (-99999999.) * (1 - is_pos) - logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 - is_neg) - - loss = F.softplus(paddle.logsumexp(logit_p, axis=1) + paddle.logsumexp(logit_n, axis=1)).mean() - - return {"PairwiseCosface": loss} + logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 - is_neg + ) + loss = F.softplus( + paddle.logsumexp( + logit_p, axis=1) + paddle.logsumexp( + logit_n, axis=1)).mean() + return {"PairwiseCosface": loss} diff --git a/ppcls/loss/rkdloss.py b/ppcls/loss/rkdloss.py index e6ffea273431ec7105d0cdedd0225c40648d2660..aa6ae232438ed6d5a915ea982092f3711d2901c3 100644 --- a/ppcls/loss/rkdloss.py +++ b/ppcls/loss/rkdloss.py @@ -29,6 +29,7 @@ def pdist(e, squared=False, eps=1e-12): class RKdAngle(nn.Layer): + # paper : [Relational Knowledge Distillation](https://arxiv.org/abs/1904.05068?context=cs.LG) # reference: https://github.com/lenscloth/RKD/blob/master/metric/loss.py def __init__(self, target_size=None): super().__init__() @@ -64,6 +65,7 @@ class RKdAngle(nn.Layer): class RkdDistance(nn.Layer): + # paper : [Relational Knowledge Distillation](https://arxiv.org/abs/1904.05068?context=cs.LG) # reference: https://github.com/lenscloth/RKD/blob/master/metric/loss.py def __init__(self, eps=1e-12, target_size=1): super().__init__() diff --git a/ppcls/loss/supconloss.py b/ppcls/loss/supconloss.py index 3dd33bc19e97ddb29966f55c4789b7a4ae81422b..753ceaf415b28793ebd1758c9cde84316d04e70b 100644 --- a/ppcls/loss/supconloss.py +++ b/ppcls/loss/supconloss.py @@ -4,6 +4,7 @@ from paddle import nn class SupConLoss(nn.Layer): """Supervised Contrastive Learning: https://arxiv.org/pdf/2004.11362.pdf. + code reference: https://github.com/HobbitLong/SupContrast/blob/master/losses.py It also supports the unsupervised contrastive loss in SimCLR""" def __init__(self, diff --git a/ppcls/loss/trihardloss.py b/ppcls/loss/trihardloss.py index 132c604d51920786e89c331b3223884910e50fa8..96cb42cb46b018abc77286eb96ef5e44f20f67b0 100644 --- a/ppcls/loss/trihardloss.py +++ b/ppcls/loss/trihardloss.py @@ -22,10 +22,12 @@ from .comfunc import rerange_index class TriHardLoss(paddle.nn.Layer): """ + paper: In Defense of the Triplet Loss for Person Re-Identification + code reference: https://github.com/VisualComputingInstitute/triplet-reid/blob/master/loss.py TriHard Loss, based on triplet loss. USE P * K samples. the batch size is fixed. Batch_size = P * K; but the K may vary between batches. same label gather together - + supported_metrics = [ 'euclidean', 'sqeuclidean', @@ -45,7 +47,7 @@ class TriHardLoss(paddle.nn.Layer): features = input["features"] assert (self.batch_size == features.shape[0]) - #normalization + #normalization features = self._nomalize(features) samples_each_class = self.samples_each_class rerange_index = paddle.to_tensor(self.rerange_index) @@ -56,7 +58,7 @@ class TriHardLoss(paddle.nn.Layer): features, axis=0) similary_matrix = paddle.sum(paddle.square(diffs), axis=-1) - #rerange + #rerange tmp = paddle.reshape(similary_matrix, shape=[-1, 1]) tmp = paddle.gather(tmp, index=rerange_index) similary_matrix = paddle.reshape(tmp, shape=[-1, self.batch_size]) diff --git a/ppcls/loss/triplet.py b/ppcls/loss/triplet.py index d1c7eec9e6031aa7e51a1a3575094e7d1a4f90df..458ee2e27d7b550fecfe16e5208047a8919b89d0 100644 --- a/ppcls/loss/triplet.py +++ b/ppcls/loss/triplet.py @@ -1,3 +1,17 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -8,6 +22,8 @@ import paddle.nn as nn class TripletLossV2(nn.Layer): """Triplet loss with hard positive/negative mining. + paper : [Facenet: A unified embedding for face recognition and clustering](https://arxiv.org/pdf/1503.03832.pdf) + code reference: https://github.com/okzhili/Cartoon-face-recognition/blob/master/loss/triplet_loss.py Args: margin (float): margin for triplet. """ diff --git a/ppcls/optimizer/__init__.py b/ppcls/optimizer/__init__.py index d27f1100eef871db48b8da9ab86eba6af8aecee8..44d7b5ac0b33f267f6893d39bd42d27c8bac0573 100644 --- a/ppcls/optimizer/__init__.py +++ b/ppcls/optimizer/__init__.py @@ -118,8 +118,6 @@ def build_optimizer(config, epochs, step_each_epoch, model_list=None): if hasattr(model_list[i], optim_scope): optim_model.append(getattr(model_list[i], optim_scope)) - assert len(optim_model) == 1, \ - "Invalid optim model for optim scope({}), number of optim_model={}".format(optim_scope, len(optim_model)) optim = getattr(optimizer, optim_name)( learning_rate=lr, grad_clip=grad_clip, **optim_cfg)(model_list=optim_model) diff --git a/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt b/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt index 11b2f9dd9eb100c43a87f08192606310c7b555de..03f5e3eedadd140c50075e39cd3a7c0ea73e3b2b 100644 --- a/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt +++ b/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt @@ -13,14 +13,14 @@ train_infer_img_dir:./dataset/ILSVRC2012/val null:null ## trainer:norm_train -norm_train:tools/train.py -c ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False +norm_train:tools/train.py -c ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.print_batch_step=1 pact_train:null fpgm_train:null distill_train:null null:null null:null ## -===========================eval_params=========================== +===========================eval_params=========================== eval:tools/eval.py -c ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml null:null ## diff --git a/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt b/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt index d69f26412fbd82c89bdef6095aca27d20df33903..06fda8fe661d5f21ff81fc30ef344c73ede4d4e2 100644 --- a/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt +++ b/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt @@ -13,14 +13,14 @@ train_infer_img_dir:./dataset/ILSVRC2012/val null:null ## trainer:norm_train -norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False +norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.print_batch_step=1 pact_train:null fpgm_train:null distill_train:null null:null null:null ## -===========================eval_params=========================== +===========================eval_params=========================== eval:tools/eval.py -c ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml null:null ## diff --git a/test_tipc/config/PVTV2/PVT_V2_B2_Linear_train_infer_python.txt b/test_tipc/config/PVTV2/PVT_V2_B2_Linear_train_infer_python.txt index b2aa7df69f36f797f26ab985f379634952fa45d3..f50107fea5309eee26b3e46f1d9f7388ff7f1b59 100644 --- a/test_tipc/config/PVTV2/PVT_V2_B2_Linear_train_infer_python.txt +++ b/test_tipc/config/PVTV2/PVT_V2_B2_Linear_train_infer_python.txt @@ -13,7 +13,7 @@ train_infer_img_dir:./dataset/ILSVRC2012/val null:null ## trainer:norm_train -norm_train:tools/train.py -c ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False +norm_train:tools/train.py -c ppcls/configs/ImageNet/PVTV2/PVT_V2_B2_Linear.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.print_batch_step=1 pact_train:null fpgm_train:null distill_train:null diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index c5be87074ddf8fd2bd7096bbd87929e492b688f8..70040dc8b28656f7fb3e1384f840f068437dcf7e 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -1,7 +1,7 @@ #!/bin/bash FILENAME=$1 -# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', +# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', # 'whole_infer', 'klquant_whole_infer', # 'cpp_infer', 'serving_infer', 'lite_infer'] @@ -67,9 +67,9 @@ if [ ${MODE} = "cpp_infer" ];then model_dir=${tar_name%.*} eval "tar xf ${tar_name}" eval "mv ${model_dir} ${cls_inference_model_dir}" - + eval "wget -nc $det_inference_url" - tar_name=$(func_get_url_file_name "$det_inference_url") + tar_name=$(func_get_url_file_name "$det_inference_url") model_dir=${tar_name%.*} eval "tar xf ${tar_name}" eval "mv ${model_dir} ${det_inference_model_dir}" @@ -120,7 +120,7 @@ if [ ${MODE} = "lite_train_lite_infer" ] || [ ${MODE} = "lite_train_whole_infer" wget -nc https://paddle-imagenet-models-name.bj.bcebos.com/data/whole_chain/whole_chain_little_train.tar tar xf whole_chain_little_train.tar ln -s whole_chain_little_train ILSVRC2012 - cd ILSVRC2012 + cd ILSVRC2012 mv train.txt train_list.txt mv val.txt val_list.txt cp -r train/* val/ @@ -132,7 +132,7 @@ elif [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ];then wget -nc https://paddle-imagenet-models-name.bj.bcebos.com/data/whole_chain/whole_chain_infer.tar tar xf whole_chain_infer.tar ln -s whole_chain_infer ILSVRC2012 - cd ILSVRC2012 + cd ILSVRC2012 mv val.txt val_list.txt ln -s val_list.txt train_list.txt cd ../../ @@ -153,7 +153,7 @@ elif [ ${MODE} = "whole_train_whole_infer" ];then wget -nc https://paddle-imagenet-models-name.bj.bcebos.com/data/whole_chain/whole_chain_CIFAR100.tar tar xf whole_chain_CIFAR100.tar ln -s whole_chain_CIFAR100 ILSVRC2012 - cd ILSVRC2012 + cd ILSVRC2012 mv train.txt train_list.txt mv test.txt val_list.txt cd ../../