未验证 提交 ed674366 编写于 作者: Z zhangyubo0722 提交者: GitHub

del head_init_scale (#2947)

上级 d7a7d3e5
...@@ -567,7 +567,7 @@ class VisionTransformer(nn.Layer): ...@@ -567,7 +567,7 @@ class VisionTransformer(nn.Layer):
drop_path_rate=0., drop_path_rate=0.,
norm_layer='nn.LayerNorm', norm_layer='nn.LayerNorm',
epsilon=1e-5, epsilon=1e-5,
head_init_scale=1, head_init_scale=0.001,
**kwargs): **kwargs):
super().__init__() super().__init__()
global _model_diff global _model_diff
......
...@@ -27,7 +27,6 @@ Arch: ...@@ -27,7 +27,6 @@ Arch:
class_num: 1000 class_num: 1000
return_embed: False return_embed: False
pretrained: True pretrained: True
head_init_scale: 0.001
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
......
...@@ -27,7 +27,6 @@ Arch: ...@@ -27,7 +27,6 @@ Arch:
class_num: 1000 class_num: 1000
return_embed: False return_embed: False
pretrained: True pretrained: True
head_init_scale: 0.001
# loss function config for traing/eval process # loss function config for traing/eval process
Loss: Loss:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册