提交 001cdb09 编写于 作者: Y Yang Nie 提交者: Tingquan Gao

update MobileViTv3-v2 configs

上级 400de784
...@@ -48,14 +48,13 @@ Optimizer: ...@@ -48,14 +48,13 @@ Optimizer:
beta2: 0.999 beta2: 0.999
epsilon: 1e-8 epsilon: 1e-8
weight_decay: 0.05 weight_decay: 0.05
no_weight_decay_name: .bias norm
one_dim_param_no_weight_decay: True one_dim_param_no_weight_decay: True
lr: lr:
# for 8 cards # for 8 cards
name: Cosine name: Cosine
learning_rate: 0.002 learning_rate: 0.002
eta_min: 0.0002 eta_min: 0.0002
warmup_epoch: 20 # 20000 iterations warmup_epoch: 16 # 20000 iterations
warmup_start_lr: 1e-6 warmup_start_lr: 1e-6
# by_epoch: True # by_epoch: True
clip_norm: 10 clip_norm: 10
...@@ -107,7 +106,7 @@ DataLoader: ...@@ -107,7 +106,7 @@ DataLoader:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 128 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: True
loader: loader:
num_workers: 4 num_workers: 4
use_shared_memory: True use_shared_memory: True
......
...@@ -48,14 +48,13 @@ Optimizer: ...@@ -48,14 +48,13 @@ Optimizer:
beta2: 0.999 beta2: 0.999
epsilon: 1e-8 epsilon: 1e-8
weight_decay: 0.05 weight_decay: 0.05
no_weight_decay_name: .bias norm
one_dim_param_no_weight_decay: True one_dim_param_no_weight_decay: True
lr: lr:
# for 8 cards # for 8 cards
name: Cosine name: Cosine
learning_rate: 0.002 learning_rate: 0.002
eta_min: 0.0002 eta_min: 0.0002
warmup_epoch: 20 # 20000 iterations warmup_epoch: 16 # 20000 iterations
warmup_start_lr: 1e-6 warmup_start_lr: 1e-6
# by_epoch: True # by_epoch: True
clip_norm: 10 clip_norm: 10
...@@ -107,7 +106,7 @@ DataLoader: ...@@ -107,7 +106,7 @@ DataLoader:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 128 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: True
loader: loader:
num_workers: 4 num_workers: 4
use_shared_memory: True use_shared_memory: True
......
...@@ -48,14 +48,13 @@ Optimizer: ...@@ -48,14 +48,13 @@ Optimizer:
beta2: 0.999 beta2: 0.999
epsilon: 1e-8 epsilon: 1e-8
weight_decay: 0.05 weight_decay: 0.05
no_weight_decay_name: .bias norm
one_dim_param_no_weight_decay: True one_dim_param_no_weight_decay: True
lr: lr:
# for 8 cards # for 8 cards
name: Cosine name: Cosine
learning_rate: 0.002 learning_rate: 0.002
eta_min: 0.0002 eta_min: 0.0002
warmup_epoch: 20 # 20000 iterations warmup_epoch: 16 # 20000 iterations
warmup_start_lr: 1e-6 warmup_start_lr: 1e-6
# by_epoch: True # by_epoch: True
clip_norm: 10 clip_norm: 10
...@@ -107,7 +106,7 @@ DataLoader: ...@@ -107,7 +106,7 @@ DataLoader:
name: DistributedBatchSampler name: DistributedBatchSampler
batch_size: 128 batch_size: 128
drop_last: False drop_last: False
shuffle: False shuffle: True
loader: loader:
num_workers: 4 num_workers: 4
use_shared_memory: True use_shared_memory: True
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册