diff --git a/ppcls/arch/backbone/model_zoo/foundation_vit.py b/ppcls/arch/backbone/model_zoo/foundation_vit.py index 12cc699e5489ba2fabb8daacfeb4d3ef9f50c399..588020fe073c5ee6126827fa1218d6bcda07a1bb 100644 --- a/ppcls/arch/backbone/model_zoo/foundation_vit.py +++ b/ppcls/arch/backbone/model_zoo/foundation_vit.py @@ -567,7 +567,7 @@ class VisionTransformer(nn.Layer): drop_path_rate=0., norm_layer='nn.LayerNorm', epsilon=1e-5, - head_init_scale=1, + head_init_scale=0.001, **kwargs): super().__init__() global _model_diff diff --git a/ppcls/configs/CLIP/CLIP_vit_base_patch16_224_finetune.yaml b/ppcls/configs/CLIP/CLIP_vit_base_patch16_224_finetune.yaml index c7e6e0de82d9e1c8e9ffd269add9bbfffc99f057..e2f6f55d6eee0195335b410b74c6cc38b322c6c1 100644 --- a/ppcls/configs/CLIP/CLIP_vit_base_patch16_224_finetune.yaml +++ b/ppcls/configs/CLIP/CLIP_vit_base_patch16_224_finetune.yaml @@ -27,7 +27,6 @@ Arch: class_num: 1000 return_embed: False pretrained: True - head_init_scale: 0.001 # loss function config for traing/eval process Loss: diff --git a/ppcls/configs/CLIP/CLIP_vit_large_patch16_224_finetune.yaml b/ppcls/configs/CLIP/CLIP_vit_large_patch16_224_finetune.yaml index 1380587bed9a58e130c3b6c2bd38244b1a76dca9..e17ae8184ede2439d21656a5bc1abf91e64795ae 100644 --- a/ppcls/configs/CLIP/CLIP_vit_large_patch16_224_finetune.yaml +++ b/ppcls/configs/CLIP/CLIP_vit_large_patch16_224_finetune.yaml @@ -27,7 +27,6 @@ Arch: class_num: 1000 return_embed: False pretrained: True - head_init_scale: 0.001 # loss function config for traing/eval process Loss: