add librispeech scripts

c4df6bac · Hui Zhang · 497cf4e2 · c4df6bac · c4df6bac · c4df6bac
10 changed file
--- a/examples/librispeech/s1/conf/chunk_confermer.yaml
+++ b/examples/librispeech/s1/conf/chunk_confermer.yaml
@@ -79,7 +79,7 @@ model:


 training:
-  n_epoch: 20
+  n_epoch: 120
  accum_grad: 1
  global_grad_clip: 5.0
  optim: adam
@@ -90,11 +90,11 @@ training:
  scheduler_conf:
    warmup_steps: 25000
    lr_decay: 1.0
-  log_interval: 1
+  log_interval: 100


 decoding:
-  batch_size: 64
+  batch_size: 128
  error_rate_type: wer
  decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
  lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm

--- a/examples/librispeech/s1/conf/chunk_transformer.yaml
+++ b/examples/librispeech/s1/conf/chunk_transformer.yaml
@@ -8,7 +8,7 @@ data:
  spm_model_prefix: 'data/bpe_unigram_200'
  mean_std_filepath: ""
  augmentation_config: conf/augmentation.json
-  batch_size: 4
+  batch_size: 64
  min_input_len: 0.5  # second
  max_input_len: 20.0 # second
  min_output_len: 0.0 # tokens
@@ -72,18 +72,18 @@ model:


 training:
-  n_epoch: 20
+  n_epoch: 120
  accum_grad: 1
  global_grad_clip: 5.0
  optim: adam
  optim_conf:
-    lr: 0.002
+    lr: 0.001
    weight_decay: 1e-06
  scheduler: warmuplr     # pytorch v1.1.0+ required
  scheduler_conf:
    warmup_steps: 25000
    lr_decay: 1.0
-  log_interval: 1
+  log_interval: 100


 decoding:

--- a/examples/librispeech/s1/conf/conformer.yaml
+++ b/examples/librispeech/s1/conf/conformer.yaml
@@ -5,14 +5,14 @@ data:
  test_manifest: data/manifest.tiny
  vocab_filepath: data/vocab.txt 
  unit_type: 'spm'
-  spm_model_prefix: 'data/bpe_unigram_200'
+  spm_model_prefix: 'data/bpe_unigram_5000'
  mean_std_filepath: ""
  augmentation_config: conf/augmentation.json
-  batch_size: 4
-  min_input_len: 0.5
-  max_input_len: 20.0
-  min_output_len: 0.0
-  max_output_len: 400.0
+  batch_size: 64
+  min_input_len: 0.5  # seconds
+  max_input_len: 20.0 # seconds
+  min_output_len: 0.0 # tokens
+  max_output_len: 400.0 # tokens
  min_output_input_ratio: 0.05
  max_output_input_ratio: 10.0
  raw_wav: True  # use raw_wav or kaldi feature
@@ -49,7 +49,7 @@ model:
        positional_dropout_rate: 0.1
        attention_dropout_rate: 0.0
        input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
-        normalize_before: true
+        normalize_before: True
        use_cnn_module: True
        cnn_module_kernel: 15
        activation_type: 'swish'
@@ -75,18 +75,18 @@ model:


 training:
-  n_epoch: 20
-  accum_grad: 4
+  n_epoch: 120
+  accum_grad: 2
  global_grad_clip: 5.0
  optim: adam
  optim_conf:
-    lr: 0.002
+    lr: 0.004
    weight_decay: 1e-06
  scheduler: warmuplr     # pytorch v1.1.0+ required
  scheduler_conf:
    warmup_steps: 25000
    lr_decay: 1.0
-  log_interval: 1
+  log_interval: 100


 decoding:

--- a/examples/librispeech/s1/conf/transformer.yaml
+++ b/examples/librispeech/s1/conf/transformer.yaml
@@ -8,7 +8,7 @@ data:
  spm_model_prefix: 'data/bpe_unigram_200'
  mean_std_filepath: ""
  augmentation_config: conf/augmentation.json
-  batch_size: 4
+  batch_size: 64
  min_input_len: 0.5  # second
  max_input_len: 20.0 # second
  min_output_len: 0.0 # tokens
@@ -70,18 +70,18 @@ model:


 training:
-  n_epoch: 20
-  accum_grad: 1
+  n_epoch: 120
+  accum_grad: 2
  global_grad_clip: 5.0
  optim: adam
  optim_conf:
-    lr: 0.002
+    lr: 0.004
    weight_decay: 1e-06
  scheduler: warmuplr     # pytorch v1.1.0+ required
  scheduler_conf:
    warmup_steps: 25000
    lr_decay: 1.0
-  log_interval: 1
+  log_interval: 100


 decoding:

--- a/examples/librispeech/s1/local/avg.sh
+++ b/examples/librispeech/s1/local/avg.sh
 #! /usr/bin/env bash

-if [ $# != 2 ];then
+if [ $# != 2 ]; then
    echo "usage: ${0} ckpt_dir avg_num"
    exit -1
 fi
@@ -14,10 +14,10 @@ python3 -u ${MAIN_ROOT}/utils/avg_model.py \
 --ckpt_dir ${ckpt_dir}  \
 --num ${average_num} \
 --val_best
-            
+
 if [ $? -ne 0 ]; then
    echo "Failed in avg ckpt!"
    exit 1
 fi

-exit 0
\ No newline at end of file
+exit 0
--- a/examples/librispeech/s1/local/data.sh
+++ b/examples/librispeech/s1/local/data.sh
@@ -4,7 +4,7 @@ stage=-1
 stop_stage=100

 # bpemode (unigram or bpe)
-nbpe=200
+nbpe=5000
 bpemode=unigram
 bpeprefix="data/bpe_${bpemode}_${nbpe}"

@@ -20,14 +20,16 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    python3 ${TARGET_DIR}/librispeech/librispeech.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/librispeech" \
-    --full_download="False"
-    
+    --full_download="True"
+
    if [ $? -ne 0 ]; then
        echo "Prepare LibriSpeech failed. Terminated."
        exit 1
    fi
-    
-    head -n 64 data/manifest.dev-clean  > data/manifest.tiny.raw
+
+    for set in train-clean-100 train-clean-360 train-other-500; do
+        cat data/manifest.${set} >> data/manifest.train.raw
+    done
 fi

 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
@@ -38,8 +40,8 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    --spm_mode ${bpemode} \
    --spm_model_prefix ${bpeprefix} \
    --vocab_path="data/vocab.txt" \
-    --manifest_paths="data/manifest.tiny.raw"
-    
+    --manifest_paths="data/manifest.train.raw"
+
    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
@@ -49,18 +51,19 @@ fi

 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # compute mean and stddev for normalizer
+    num_workers=$(nproc)
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
-    --manifest_path="data/manifest.tiny.raw" \
-    --num_samples=64 \
+    --manifest_path="data/manifest.train.raw" \
+    --num_samples=-1 \
    --specgram_type="fbank" \
    --feat_dim=80 \
    --delta_delta=false \
    --sample_rate=16000 \
    --stride_ms=10.0 \
    --window_ms=25.0 \
-    --num_workers=2 \
+    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"
-    
+
    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
@@ -76,10 +79,10 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    --unit_type "spm" \
    --spm_model_prefix ${bpeprefix} \
    --vocab_path="data/vocab.txt" \
-    --manifest_path="data/manifest.tiny.raw" \
-    --output_path="data/manifest.tiny"
-    
-    
+    --manifest_path="data/manifest.train.raw" \
+    --output_path="data/manifest.train"
+
+
    if [ $? -ne 0 ]; then
        echo "Formt mnaifest failed. Terminated."
        exit 1

--- a/examples/librispeech/s1/local/export.sh
+++ b/examples/librispeech/s1/local/export.sh
@@ -5,14 +5,24 @@ if [ $# != 3 ];then
    exit -1
 fi

+ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
+echo "using $ngpu gpus..."
+
 config_path=$1
 ckpt_path_prefix=$2
 jit_model_export_path=$3

+device=gpu
+if [ ngpu != 0 ];then
+    device=cpu
+fi
+
 python3 -u ${BIN_DIR}/export.py \
+--device ${device} \
+--nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path} 
+--export_path ${jit_model_export_path}


 if [ $? -ne 0 ]; then

--- a/examples/librispeech/s1/local/train.sh
+++ b/examples/librispeech/s1/local/train.sh
@@ -10,6 +10,7 @@ echo "using $ngpu gpus..."

 config_path=$1
 ckpt_name=$2
+
 device=gpu
 if [ ngpu != 0 ];then
    device=cpu

--- a/examples/librispeech/s1/run.sh
+++ b/examples/librispeech/s1/run.sh
 #!/bin/bash
 set -e
-
 source path.sh
-source ${MAIN_ROOT}/utils/parse_options.sh

-# prepare data
-bash ./local/data.sh
+stage=0
+stop_stage=100
+ckpt=conformer
+avg_num=30
+avg_ckpt=avg_${avg_num}
+
+source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
+
+if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
+    # prepare data
+    bash ./local/data.sh || exit -1
+fi

-# train model, all `ckpt` under `exp` dir
-CUDA_VISIBLE_DEVICES=0 ./local/train.sh conf/conformer.yaml test
+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
+    # train model, all `ckpt` under `exp` dir
+    CUDA_VISIBLE_DEVICES=4,5,6,7 ./local/train.sh conf/conformer.yaml  ${ckpt}
+fi

-# test ckpt 1
-CUDA_VISIBLE_DEVICES=0 ./local/test.sh conf/conformer.yaml exp/test/checkpoints/1
+if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
+    # avg n best model
+    ./local/avg.sh exp/${ckpt}/checkpoints ${avg_num}
+fi

-# avg 1 best model
-./local/avg.sh exp/test/checkpoints 1
+if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
+    # test ckpt avg_n
+    CUDA_VISIBLE_DEVICES=7 ./local/test.sh conf/conformer.yaml exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
+fi

-# export ckpt 1
-./local/export.sh conf/conformer.yaml exp/test/checkpoints/1 exp/test/checkpoints/1.jit.model
\ No newline at end of file
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
+    # export ckpt avg_n
+    CUDA_VISIBLE_DEVICES= ./local/export.sh conf/conformer.yaml exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
+fi
--- a/examples/tiny/s1/run.sh
+++ b/examples/tiny/s1/run.sh
 #!/bin/bash
 set -e
-
 source path.sh
+
+stage=0
+stop_stage=100
+ckpt=conformer
+avg_num=1
+avg_ckpt=avg_${avg_num}
+
 source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

-# prepare data
-bash ./local/data.sh || exit -1
+if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
+    # prepare data
+    bash ./local/data.sh || exit -1
+fi

-# train model, all `ckpt` under `exp` dir
-CUDA_VISIBLE_DEVICES=0 ./local/train.sh conf/conformer.yaml test || exit -1
+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
+    # train model, all `ckpt` under `exp` dir
+    CUDA_VISIBLE_DEVICES=0 ./local/train.sh conf/conformer.yaml  ${ckpt}
+fi

-# avg 1 best model
-./local/avg.sh exp/test/checkpoints 1
+if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
+    # avg n best model
+    ./local/avg.sh exp/${ckpt}/checkpoints ${avg_num}
+fi

-# test ckpt 1
-CUDA_VISIBLE_DEVICES=0 ./local/test.sh conf/conformer.yaml exp/test/checkpoints/avg_1 || exit -1
+if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
+    # test ckpt avg_n
+    CUDA_VISIBLE_DEVICES=0 ./local/test.sh conf/conformer.yaml exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
+fi

-# export ckpt 1
-CUDA_VISIBLE_DEVICES= ./local/export.sh conf/conformer.yaml exp/test/checkpoints/avg_1 exp/test/checkpoints/avg_1.jit.model
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
+    # export ckpt avg_n
+    CUDA_VISIBLE_DEVICES= ./local/export.sh conf/conformer.yaml exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
+fi