audio: num_mels: 80 n_fft: 1024 sr: 22050 preemphasis: 0.97 hop_length: 256 win_length: 1024 power: 1.2 fmin: 0 fmax: 8000 network: hidden_size: 256 embedding_size: 512 encoder_num_head: 4 encoder_n_layers: 3 decoder_num_head: 4 decoder_n_layers: 3 outputs_per_step: 1 stop_loss_weight: 8 vocoder: hidden_size: 256 train: batch_size: 32 learning_rate: 0.001 warm_up_step: 4000 grad_clip_thresh: 1.0 checkpoint_interval: 1000 image_interval: 2000 max_iteration: 500000