提交 001cdb09 编写于 作者: Y Yang Nie 提交者: Tingquan Gao

update MobileViTv3-v2 configs

上级 400de784
......@@ -48,14 +48,13 @@ Optimizer:
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: .bias norm
one_dim_param_no_weight_decay: True
lr:
# for 8 cards
name: Cosine
learning_rate: 0.002
eta_min: 0.0002
warmup_epoch: 20 # 20000 iterations
warmup_epoch: 16 # 20000 iterations
warmup_start_lr: 1e-6
# by_epoch: True
clip_norm: 10
......@@ -107,7 +106,7 @@ DataLoader:
name: DistributedBatchSampler
batch_size: 128
drop_last: False
shuffle: False
shuffle: True
loader:
num_workers: 4
use_shared_memory: True
......
......@@ -48,14 +48,13 @@ Optimizer:
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: .bias norm
one_dim_param_no_weight_decay: True
lr:
# for 8 cards
name: Cosine
learning_rate: 0.002
eta_min: 0.0002
warmup_epoch: 20 # 20000 iterations
warmup_epoch: 16 # 20000 iterations
warmup_start_lr: 1e-6
# by_epoch: True
clip_norm: 10
......@@ -107,7 +106,7 @@ DataLoader:
name: DistributedBatchSampler
batch_size: 128
drop_last: False
shuffle: False
shuffle: True
loader:
num_workers: 4
use_shared_memory: True
......
......@@ -48,14 +48,13 @@ Optimizer:
beta2: 0.999
epsilon: 1e-8
weight_decay: 0.05
no_weight_decay_name: .bias norm
one_dim_param_no_weight_decay: True
lr:
# for 8 cards
name: Cosine
learning_rate: 0.002
eta_min: 0.0002
warmup_epoch: 20 # 20000 iterations
warmup_epoch: 16 # 20000 iterations
warmup_start_lr: 1e-6
# by_epoch: True
clip_norm: 10
......@@ -107,7 +106,7 @@ DataLoader:
name: DistributedBatchSampler
batch_size: 128
drop_last: False
shuffle: False
shuffle: True
loader:
num_workers: 4
use_shared_memory: True
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册