transformer.yaml 3.0 KB
Newer Older
J
Junkun 已提交
1 2 3 4 5
# https://yaml.org/type/float.html
data:
  train_manifest: data/manifest.train.tiny
  dev_manifest: data/manifest.dev
  test_manifest: data/manifest.test
6 7
  min_input_len: 0.05  # second
  max_input_len: 30.0 # second
J
Junkun 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20
  min_output_len: 0.0 # tokens
  max_output_len: 400.0 # tokens
  min_output_input_ratio: 0.01
  max_output_input_ratio: 20.0

collator:
  vocab_filepath: data/vocab.txt
  unit_type: 'spm'
  spm_model_prefix: data/bpe_unigram_8000
  mean_std_filepath: ""
  # augmentation_config: conf/augmentation.json
  batch_size: 10
  raw_wav: True  # use raw_wav or kaldi feature
H
Hui Zhang 已提交
21
  spectrum_type: fbank #linear, mfcc, fbank
J
Junkun 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
  feat_dim: 80
  delta_delta: False
  dither: 1.0
  target_sample_rate: 16000
  max_freq: None
  n_fft: None
  stride_ms: 10.0
  window_ms: 25.0
  use_dB_normalization: True
  target_dB: -20
  random_seed: 0
  keep_transcription_text: False
  sortagrad: True 
  shuffle_method: batch_shuffle
  num_workers: 2


# network architecture
model:
    cmvn_file: "data/mean_std.json"
    cmvn_file_type: "json"
    # encoder related
    encoder: transformer
    encoder_conf:
        output_size: 256    # dimension of attention
        attention_heads: 4
        linear_units: 2048  # the number of units of position-wise feed forward
        num_blocks: 12      # the number of encoder blocks
        dropout_rate: 0.1
        positional_dropout_rate: 0.1
        attention_dropout_rate: 0.0
        input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
        normalize_before: true

    # decoder related
    decoder: transformer
    decoder_conf:
        attention_heads: 4
        linear_units: 2048
        num_blocks: 6
        dropout_rate: 0.1
        positional_dropout_rate: 0.1
        self_attention_dropout_rate: 0.0
        src_attention_dropout_rate: 0.0

    # hybrid CTC/attention
    model_conf:
        asr_weight: 0.0
        ctc_weight: 0.0
H
Hui Zhang 已提交
71
        ctc_dropoutrate: 0.0
H
Hui Zhang 已提交
72
        ctc_grad_norm_type: null
J
Junkun 已提交
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
        lsm_weight: 0.1     # label smoothing option
        length_normalized_loss: false


training:
  n_epoch: 120
  accum_grad: 2
  global_grad_clip: 5.0
  optim: adam
  optim_conf:
    lr: 0.004
    weight_decay: 1e-06
  scheduler: warmuplr     # pytorch v1.1.0+ required
  scheduler_conf:
    warmup_steps: 25000
    lr_decay: 1.0
  log_interval: 5
  checkpoint:
    kbest_n: 50
    latest_n: 5


decoding:
  batch_size: 5
  error_rate_type: char-bleu
  decoding_method: fullsentence  # 'fullsentence', 'simultaneous'
  alpha: 2.5
  beta: 0.3
  beam_size: 10
  cutoff_prob: 1.0
  cutoff_top_n: 0
  num_proc_bsearch: 8
  ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
  decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
      # <0: for decoding, use full chunk.
      # >0: for decoding, use fixed chunk size as set.
      # 0: used for training, it's prohibited here. 
  num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
  simulate_streaming: False  # simulate streaming inference. Defaults to False.