TrainConfig: epochs: 12 eval_iter: 750 learning_rate: 2.0e-5
optimizer_builder: optimizer: type: AdamW
weight_decay: 0.01
origin_metric: 0.6067