ds_config_perf_bs8.json 422 字节
Newer Older
E
Elton Zheng 已提交
1 2 3 4 5 6 7 8
{
  "train_batch_size": 8,
  "gradient_accumulation_steps": 1,
  "steps_per_print": 1,
  "zero_optimization": true,
  "disable_allgather": true,
  "optimizer": {
    "type": "Adam",
9
    "legacy_fusion": false,
E
Elton Zheng 已提交
10 11 12 13 14 15 16 17 18 19 20 21 22 23
    "params": {
      "lr": 0.00015,
      "max_grad_norm": 1.0
    }
  },

  "fp16": {
    "enabled": true,
    "loss_scale": 0,
    "loss_scale_window": 1000,
    "hysteresis": 2,
    "min_loss_scale": 1
  }
}