########################################################### # FEATURE EXTRACTION SETTING # ########################################################### # 其实没用上,其实用的是 16000 sr: 24000 n_fft: 2048 win_length: 1200 hop_length: 300 n_mels: 80 ########################################################### # MODEL SETTING # ########################################################### generator_params: dim_in: 64 style_dim: 64 max_conv_dim: 512 w_hpf: 0 F0_channel: 256 mapping_network_params: num_domains: 20 # num of speakers in StarGANv2 latent_dim: 16 style_dim: 64 # same as style_dim in generator_params hidden_dim: 512 # same as max_conv_dim in generator_params style_encoder_params: dim_in: 64 # same as dim_in in generator_params style_dim: 64 # same as style_dim in generator_params num_domains: 20 # same as num_domains in generator_params max_conv_dim: 512 # same as max_conv_dim in generator_params discriminator_params: dim_in: 64 # same as dim_in in generator_params num_domains: 20 # same as num_domains in mapping_network_params max_conv_dim: 512 # same as max_conv_dim in generator_params n_repeat: 4 asr_params: input_dim: 80 hidden_dim: 256 n_token: 80 token_embedding_dim: 256 ########################################################### # ADVERSARIAL LOSS SETTING # ########################################################### loss_params: g_loss: lambda_sty: 1. lambda_cyc: 5. lambda_ds: 1. lambda_norm: 1. lambda_asr: 10. lambda_f0: 5. lambda_f0_sty: 0.1 lambda_adv: 2. lambda_adv_cls: 0.5 norm_bias: 0.5 d_loss: lambda_reg: 1. lambda_adv_cls: 0.1 lambda_con_reg: 10. adv_cls_epoch: 50 con_reg_epoch: 30 ########################################################### # DATA LOADER SETTING # ########################################################### batch_size: 5 # Batch size. num_workers: 2 # Number of workers in DataLoader. ########################################################### # OPTIMIZER & SCHEDULER SETTING # ########################################################### generator_optimizer_params: beta1: 0.0 beta2: 0.99 weight_decay: 1e-4 epsilon: 1e-9 generator_scheduler_params: max_learning_rate: 2e-4 phase_pct: 0.0 divide_factor: 1 total_steps: 200000 # train_max_steps end_learning_rate: 2e-4 style_encoder_optimizer_params: beta1: 0.0 beta2: 0.99 weight_decay: 1e-4 epsilon: 1e-9 style_encoder_scheduler_params: max_learning_rate: 2e-4 phase_pct: 0.0 divide_factor: 1 total_steps: 200000 # train_max_steps end_learning_rate: 2e-4 mapping_network_optimizer_params: beta1: 0.0 beta2: 0.99 weight_decay: 1e-4 epsilon: 1e-9 mapping_network_scheduler_params: max_learning_rate: 2e-6 phase_pct: 0.0 divide_factor: 1 total_steps: 200000 # train_max_steps end_learning_rate: 2e-6 discriminator_optimizer_params: beta1: 0.0 beta2: 0.99 weight_decay: 1e-4 epsilon: 1e-9 discriminator_scheduler_params: max_learning_rate: 2e-4 phase_pct: 0.0 divide_factor: 1 total_steps: 200000 # train_max_steps end_learning_rate: 2e-4 ########################################################### # TRAINING SETTING # ########################################################### max_epoch: 150 num_snapshots: 5 seed: 1