chunk_confermer.yaml 3.4 KB
Newer Older
1 2 3 4 5
# https://yaml.org/type/float.html
data:
  train_manifest: data/manifest.tiny
  dev_manifest: data/manifest.tiny
  test_manifest: data/manifest.tiny
6
  min_input_len: 0.5  # second
7
  max_input_len: 30.0 # second
8 9 10 11 12 13 14
  min_output_len: 0.0 # tokens
  max_output_len: 400.0 # tokens
  min_output_input_ratio: 0.05
  max_output_input_ratio: 10.0
  
collator:
  mean_std_filepath: ""
15 16 17 18 19 20
  vocab_filepath: data/vocab.txt 
  unit_type: 'spm'
  spm_model_prefix: 'data/bpe_unigram_200'
  augmentation_config: conf/augmentation.json
  batch_size: 4
  raw_wav: True  # use raw_wav or kaldi feature
H
Hui Zhang 已提交
21
  spectrum_type: fbank #linear, mfcc, fbank
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
  feat_dim: 80
  delta_delta: False
  dither: 1.0
  target_sample_rate: 16000
  max_freq: None
  n_fft: None
  stride_ms: 10.0
  window_ms: 25.0
  use_dB_normalization: True
  target_dB: -20
  random_seed: 0
  keep_transcription_text: False
  sortagrad: True 
  shuffle_method: batch_shuffle
  num_workers: 2


# network architecture
model:
    cmvn_file: "data/mean_std.json"
    cmvn_file_type: "json"
    # encoder related
    encoder: conformer
    encoder_conf:
        output_size: 256    # dimension of attention
        attention_heads: 4
        linear_units: 2048  # the number of units of position-wise feed forward
        num_blocks: 12      # the number of encoder blocks
        dropout_rate: 0.1
        positional_dropout_rate: 0.1
        attention_dropout_rate: 0.0
        input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
        normalize_before: True
        use_cnn_module: True
        cnn_module_kernel: 15
        activation_type: 'swish'
        pos_enc_layer_type: 'rel_pos'
        selfattention_layer_type: 'rel_selfattn'
        causal: True
        use_dynamic_chunk: True
        cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
        use_dynamic_left_chunk: false

    # decoder related
    decoder: transformer
    decoder_conf:
        attention_heads: 4
        linear_units: 2048
        num_blocks: 6
        dropout_rate: 0.1
        positional_dropout_rate: 0.1
        self_attention_dropout_rate: 0.0
        src_attention_dropout_rate: 0.0

    # hybrid CTC/attention
    model_conf:
        ctc_weight: 0.3
H
Hui Zhang 已提交
79
        ctc_dropoutrate: 0.0
H
Hui Zhang 已提交
80
        ctc_grad_norm_type: null
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
        lsm_weight: 0.1     # label smoothing option
        length_normalized_loss: false


training:
  n_epoch: 20
  accum_grad: 1
  global_grad_clip: 5.0
  optim: adam
  optim_conf:
    lr: 0.001
    weight_decay: 1e-06
  scheduler: warmuplr     # pytorch v1.1.0+ required
  scheduler_conf:
    warmup_steps: 25000
    lr_decay: 1.0
  log_interval: 1
H
Haoxin Ma 已提交
98 99 100
  checkpoint:
    kbest_n: 10
    latest_n: 1
101 102 103 104 105 106 107 108 109 110 111 112 113


decoding:
  batch_size: 64
  error_rate_type: wer
  decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
  lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
  alpha: 2.5
  beta: 0.3
  beam_size: 10
  cutoff_prob: 1.0
  cutoff_top_n: 0
  num_proc_bsearch: 8
H
Hui Zhang 已提交
114
  ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
115 116 117 118 119 120 121 122
  decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
      # <0: for decoding, use full chunk.
      # >0: for decoding, use fixed chunk size as set.
      # 0: used for training, it's prohibited here. 
  num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
  simulate_streaming: False  # simulate streaming inference. Defaults to False.