未验证 提交 ed674366 编写于 作者: Z zhangyubo0722 提交者: GitHub

del head_init_scale (#2947)

上级 d7a7d3e5
......@@ -567,7 +567,7 @@ class VisionTransformer(nn.Layer):
drop_path_rate=0.,
norm_layer='nn.LayerNorm',
epsilon=1e-5,
head_init_scale=1,
head_init_scale=0.001,
**kwargs):
super().__init__()
global _model_diff
......
......@@ -27,7 +27,6 @@ Arch:
class_num: 1000
return_embed: False
pretrained: True
head_init_scale: 0.001
# loss function config for traing/eval process
Loss:
......
......@@ -27,7 +27,6 @@ Arch:
class_num: 1000
return_embed: False
pretrained: True
head_init_scale: 0.001
# loss function config for traing/eval process
Loss:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册