TrainConfig: epochs: 6 eval_iter: 2000 learning_rate: 3.0e-5 optimizer_builder: optimizer: type: AdamW weight_decay: 0.01 origin_metric: 0.8098