# data processing p_pronunciation: 0.99 sample_rate: 22050 # Hz n_fft: 1024 win_length: 1024 hop_length: 256 n_mels: 80 reduction_factor: 4 # model-s2s n_speakers: 1 speaker_dim: 16 char_dim: 256 encoder_dim: 64 kernel_size: 5 encoder_layers: 7 decoder_layers: 8 prenet_sizes: [128] attention_dim: 128 # model-postnet postnet_layers: 5 postnet_dim: 256 # position embedding position_weight: 1.0 position_rate: 5.54 forward_step: 4 backward_step: 0 dropout: 0.05 # output-griffinlim sharpening_factor: 1.4 # optimizer: learning_rate: 0.001 clip_value: 5.0 clip_norm: 100.0 # training: batch_size: 16 report_interval: 10000 save_interval: 10000 valid_size: 5