chunk_transformer.yaml 3.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
# https://yaml.org/type/float.html
data:
  train_manifest: data/manifest.tiny
  dev_manifest: data/manifest.tiny
  test_manifest: data/manifest.tiny
  min_input_len: 0.5  # second
  max_input_len: 20.0 # second
  min_output_len: 0.0 # tokens
  max_output_len: 400.0 # tokens
  min_output_input_ratio: 0.05
  max_output_input_ratio: 10.0
12 13 14 15 16
  
collator:
  mean_std_filepath: ""
  vocab_filepath: data/vocab.txt 
  unit_type: 'spm'
H
Hui Zhang 已提交
17
  spm_model_prefix: 'data/bpe_unigram_200'
H
Hui Zhang 已提交
18
  augmentation_config: conf/preprocess.yaml
19
  batch_size: 4
20
  raw_wav: True  # use raw_wav or kaldi feature
H
Hui Zhang 已提交
21
  spectrum_type: fbank #linear, mfcc, fbank
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
  feat_dim: 80
  delta_delta: False
  dither: 1.0
  target_sample_rate: 16000
  max_freq: None
  n_fft: None
  stride_ms: 10.0
  window_ms: 25.0
  use_dB_normalization: True
  target_dB: -20
  random_seed: 0
  keep_transcription_text: False
  sortagrad: True 
  shuffle_method: batch_shuffle
  num_workers: 2


# network architecture
model:
    cmvn_file: "data/mean_std.json"
    cmvn_file_type: "json"
    # encoder related
    encoder: transformer
    encoder_conf:
        output_size: 256    # dimension of attention
        attention_heads: 4
        linear_units: 2048  # the number of units of position-wise feed forward
        num_blocks: 12      # the number of encoder blocks
        dropout_rate: 0.1
        positional_dropout_rate: 0.1
        attention_dropout_rate: 0.0
        input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
        normalize_before: true
        use_dynamic_chunk: true
        use_dynamic_left_chunk: false

    # decoder related
    decoder: transformer
    decoder_conf:
        attention_heads: 4
        linear_units: 2048
        num_blocks: 6
        dropout_rate: 0.1
        positional_dropout_rate: 0.1
        self_attention_dropout_rate: 0.0
        src_attention_dropout_rate: 0.0

    # hybrid CTC/attention
    model_conf:
        ctc_weight: 0.3
H
Hui Zhang 已提交
72
        ctc_dropoutrate: 0.0
H
Hui Zhang 已提交
73
        ctc_grad_norm_type: null
74 75 76 77 78
        lsm_weight: 0.1     # label smoothing option
        length_normalized_loss: false


training:
79
  n_epoch: 5
80 81 82 83 84 85 86 87 88 89 90
  accum_grad: 1
  global_grad_clip: 5.0
  optim: adam
  optim_conf:
    lr: 0.002
    weight_decay: 1e-06
  scheduler: warmuplr     # pytorch v1.1.0+ required
  scheduler_conf:
    warmup_steps: 25000
    lr_decay: 1.0
  log_interval: 1
H
Haoxin Ma 已提交
91 92 93
  checkpoint:
    kbest_n: 10
    latest_n: 1
94 95 96 97 98 99 100 101 102 103 104 105 106


decoding:
  batch_size: 64
  error_rate_type: wer
  decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
  lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
  alpha: 2.5
  beta: 0.3
  beam_size: 10
  cutoff_prob: 1.0
  cutoff_top_n: 0
  num_proc_bsearch: 8
H
Hui Zhang 已提交
107
  ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
108 109 110 111 112 113 114 115
  decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
      # <0: for decoding, use full chunk.
      # >0: for decoding, use fixed chunk size as set.
      # 0: used for training, it's prohibited here. 
  num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
  simulate_streaming: False  # simulate streaming inference. Defaults to False.