audio: num_mels: 80 n_fft: 2048 sr: 22050 preemphasis: 0.97
hop_length: 256 win_length: 1024
power: 1.2 min_level_db: -100 ref_level_db: 20 outputs_per_step: 1 encoder_n_layer: 6 encoder_head: 2 encoder_conv1d_filter_size: 1536
max_seq_len: 2048
decoder_n_layer: 6 decoder_head: 2 decoder_conv1d_filter_size: 1536 fs_hidden_size: 384 duration_predictor_output_size: 256 duration_predictor_filter_size: 3 fft_conv1d_filter: 3 fft_conv1d_padding: 1 dropout: 0.1
transformer_head: 4