audio:num_mels:80#the number of mel bands when calculating mel spectrograms.n_fft:2048#the number of fft components.sr:22050#the sampling rate of audio data file.preemphasis:0.97#the preemphasis coefficient.hop_length:256#the number of samples to advance between frames.win_length:1024#the length (width) of the window function.power:1.2#the power to raise before griffin-lim.min_level_db:-100#the minimum level db.ref_level_db:20#the reference level db.outputs_per_step:1#the outputs per step.encoder_n_layer:6#the number of FFT Block in encoder.encoder_head:2#the attention head number in encoder.encoder_conv1d_filter_size:1536#the filter size of conv1d in encoder.max_seq_len:2048#the max length of sequence.decoder_n_layer:6#the number of FFT Block in decoder.decoder_head:2#the attention head number in decoder.decoder_conv1d_filter_size:1536#the filter size of conv1d in decoder.fs_hidden_size:384#the hidden size in model of fastspeech.duration_predictor_output_size:256#the output size of duration predictior.duration_predictor_filter_size:3#the filter size of conv1d in duration prediction.fft_conv1d_filter:3#the filter size of conv1d in fft.fft_conv1d_padding:1#the padding size of conv1d in fft.dropout:0.1#the dropout in network.transformer_head:4#the attention head num of transformerTTS.embedding_size:512#the dim size of embedding of transformerTTS.hidden_size:256#the hidden size in model of transformerTTS.warm_up_step:4000#the warm up step of learning rate.grad_clip_thresh:0.1#the threshold of grad clip.