diff --git a/examples/librispeech/s1/conf/chunk_confermer.yaml b/examples/librispeech/s1/conf/chunk_confermer.yaml index bd4279e2be5adc71937afb524c5739c689bae074..43e7802b655d06084ef3ad97d32c1557fe9a3651 100644 --- a/examples/librispeech/s1/conf/chunk_confermer.yaml +++ b/examples/librispeech/s1/conf/chunk_confermer.yaml @@ -79,7 +79,7 @@ model: training: - n_epoch: 20 + n_epoch: 120 accum_grad: 1 global_grad_clip: 5.0 optim: adam @@ -90,11 +90,11 @@ training: scheduler_conf: warmup_steps: 25000 lr_decay: 1.0 - log_interval: 1 + log_interval: 100 decoding: - batch_size: 64 + batch_size: 128 error_rate_type: wer decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring' lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm diff --git a/examples/librispeech/s1/conf/chunk_transformer.yaml b/examples/librispeech/s1/conf/chunk_transformer.yaml index ba60c273564540a627e7da3e9f4502ed4f2bb28a..721cc3f3740701ddf94992f1ef486171cb67b5fd 100644 --- a/examples/librispeech/s1/conf/chunk_transformer.yaml +++ b/examples/librispeech/s1/conf/chunk_transformer.yaml @@ -8,7 +8,7 @@ data: spm_model_prefix: 'data/bpe_unigram_200' mean_std_filepath: "" augmentation_config: conf/augmentation.json - batch_size: 4 + batch_size: 64 min_input_len: 0.5 # second max_input_len: 20.0 # second min_output_len: 0.0 # tokens @@ -72,18 +72,18 @@ model: training: - n_epoch: 20 + n_epoch: 120 accum_grad: 1 global_grad_clip: 5.0 optim: adam optim_conf: - lr: 0.002 + lr: 0.001 weight_decay: 1e-06 scheduler: warmuplr # pytorch v1.1.0+ required scheduler_conf: warmup_steps: 25000 lr_decay: 1.0 - log_interval: 1 + log_interval: 100 decoding: diff --git a/examples/librispeech/s1/conf/conformer.yaml b/examples/librispeech/s1/conf/conformer.yaml index 83f4f5af46100735f5fc78761c4cc21901fe15e7..576d2ca0c15812b6add73346555777131cc03161 100644 --- a/examples/librispeech/s1/conf/conformer.yaml +++ b/examples/librispeech/s1/conf/conformer.yaml @@ -5,14 +5,14 @@ data: test_manifest: data/manifest.tiny vocab_filepath: data/vocab.txt unit_type: 'spm' - spm_model_prefix: 'data/bpe_unigram_200' + spm_model_prefix: 'data/bpe_unigram_5000' mean_std_filepath: "" augmentation_config: conf/augmentation.json - batch_size: 4 - min_input_len: 0.5 - max_input_len: 20.0 - min_output_len: 0.0 - max_output_len: 400.0 + batch_size: 64 + min_input_len: 0.5 # seconds + max_input_len: 20.0 # seconds + min_output_len: 0.0 # tokens + max_output_len: 400.0 # tokens min_output_input_ratio: 0.05 max_output_input_ratio: 10.0 raw_wav: True # use raw_wav or kaldi feature @@ -49,7 +49,7 @@ model: positional_dropout_rate: 0.1 attention_dropout_rate: 0.0 input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8 - normalize_before: true + normalize_before: True use_cnn_module: True cnn_module_kernel: 15 activation_type: 'swish' @@ -75,18 +75,18 @@ model: training: - n_epoch: 20 - accum_grad: 4 + n_epoch: 120 + accum_grad: 2 global_grad_clip: 5.0 optim: adam optim_conf: - lr: 0.002 + lr: 0.004 weight_decay: 1e-06 scheduler: warmuplr # pytorch v1.1.0+ required scheduler_conf: warmup_steps: 25000 lr_decay: 1.0 - log_interval: 1 + log_interval: 100 decoding: diff --git a/examples/librispeech/s1/conf/transformer.yaml b/examples/librispeech/s1/conf/transformer.yaml index 3f3170bdfab088a85b7d365834e8f659c42f31bd..8113a791f6aac438a822465025ac6b4a7dbf5820 100644 --- a/examples/librispeech/s1/conf/transformer.yaml +++ b/examples/librispeech/s1/conf/transformer.yaml @@ -8,7 +8,7 @@ data: spm_model_prefix: 'data/bpe_unigram_200' mean_std_filepath: "" augmentation_config: conf/augmentation.json - batch_size: 4 + batch_size: 64 min_input_len: 0.5 # second max_input_len: 20.0 # second min_output_len: 0.0 # tokens @@ -70,18 +70,18 @@ model: training: - n_epoch: 20 - accum_grad: 1 + n_epoch: 120 + accum_grad: 2 global_grad_clip: 5.0 optim: adam optim_conf: - lr: 0.002 + lr: 0.004 weight_decay: 1e-06 scheduler: warmuplr # pytorch v1.1.0+ required scheduler_conf: warmup_steps: 25000 lr_decay: 1.0 - log_interval: 1 + log_interval: 100 decoding: diff --git a/examples/librispeech/s1/local/avg.sh b/examples/librispeech/s1/local/avg.sh index 8589e35308c97fdcb1abbb13bcf35ca4042c3ae2..3d15ce2372917ac135ebeaedd8d2ea98c0289969 100755 --- a/examples/librispeech/s1/local/avg.sh +++ b/examples/librispeech/s1/local/avg.sh @@ -1,6 +1,6 @@ #! /usr/bin/env bash -if [ $# != 2 ];then +if [ $# != 2 ]; then echo "usage: ${0} ckpt_dir avg_num" exit -1 fi @@ -14,10 +14,10 @@ python3 -u ${MAIN_ROOT}/utils/avg_model.py \ --ckpt_dir ${ckpt_dir} \ --num ${average_num} \ --val_best - + if [ $? -ne 0 ]; then echo "Failed in avg ckpt!" exit 1 fi -exit 0 \ No newline at end of file +exit 0 diff --git a/examples/librispeech/s1/local/data.sh b/examples/librispeech/s1/local/data.sh index bf3f1b85ccb18f671e75dfd2697a1877b8bb7605..3c5cfc6fdec4c01d8ab213422219009708d862e5 100755 --- a/examples/librispeech/s1/local/data.sh +++ b/examples/librispeech/s1/local/data.sh @@ -4,7 +4,7 @@ stage=-1 stop_stage=100 # bpemode (unigram or bpe) -nbpe=200 +nbpe=5000 bpemode=unigram bpeprefix="data/bpe_${bpemode}_${nbpe}" @@ -20,14 +20,16 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then python3 ${TARGET_DIR}/librispeech/librispeech.py \ --manifest_prefix="data/manifest" \ --target_dir="${TARGET_DIR}/librispeech" \ - --full_download="False" - + --full_download="True" + if [ $? -ne 0 ]; then echo "Prepare LibriSpeech failed. Terminated." exit 1 fi - - head -n 64 data/manifest.dev-clean > data/manifest.tiny.raw + + for set in train-clean-100 train-clean-360 train-other-500; do + cat data/manifest.${set} >> data/manifest.train.raw + done fi if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then @@ -38,8 +40,8 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --spm_mode ${bpemode} \ --spm_model_prefix ${bpeprefix} \ --vocab_path="data/vocab.txt" \ - --manifest_paths="data/manifest.tiny.raw" - + --manifest_paths="data/manifest.train.raw" + if [ $? -ne 0 ]; then echo "Build vocabulary failed. Terminated." exit 1 @@ -49,18 +51,19 @@ fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then # compute mean and stddev for normalizer + num_workers=$(nproc) python3 ${MAIN_ROOT}/utils/compute_mean_std.py \ - --manifest_path="data/manifest.tiny.raw" \ - --num_samples=64 \ + --manifest_path="data/manifest.train.raw" \ + --num_samples=-1 \ --specgram_type="fbank" \ --feat_dim=80 \ --delta_delta=false \ --sample_rate=16000 \ --stride_ms=10.0 \ --window_ms=25.0 \ - --num_workers=2 \ + --num_workers=${num_workers} \ --output_path="data/mean_std.json" - + if [ $? -ne 0 ]; then echo "Compute mean and stddev failed. Terminated." exit 1 @@ -76,10 +79,10 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --unit_type "spm" \ --spm_model_prefix ${bpeprefix} \ --vocab_path="data/vocab.txt" \ - --manifest_path="data/manifest.tiny.raw" \ - --output_path="data/manifest.tiny" - - + --manifest_path="data/manifest.train.raw" \ + --output_path="data/manifest.train" + + if [ $? -ne 0 ]; then echo "Formt mnaifest failed. Terminated." exit 1 diff --git a/examples/librispeech/s1/local/export.sh b/examples/librispeech/s1/local/export.sh index b83a13a980d63473448d06cf39e6002fa0c55881..fb0c3cfae305e7a065698005e8918506aec2c9bb 100755 --- a/examples/librispeech/s1/local/export.sh +++ b/examples/librispeech/s1/local/export.sh @@ -5,14 +5,24 @@ if [ $# != 3 ];then exit -1 fi +ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') +echo "using $ngpu gpus..." + config_path=$1 ckpt_path_prefix=$2 jit_model_export_path=$3 +device=gpu +if [ ngpu != 0 ];then + device=cpu +fi + python3 -u ${BIN_DIR}/export.py \ +--device ${device} \ +--nproc ${ngpu} \ --config ${config_path} \ --checkpoint_path ${ckpt_path_prefix} \ ---export_path ${jit_model_export_path} +--export_path ${jit_model_export_path} if [ $? -ne 0 ]; then diff --git a/examples/librispeech/s1/local/train.sh b/examples/librispeech/s1/local/train.sh index 3ed5338088ca550092c7c01b4a4b6ea7d7f47549..47645d4b5ca0e53e6fbce8ddb14cf577215a6399 100755 --- a/examples/librispeech/s1/local/train.sh +++ b/examples/librispeech/s1/local/train.sh @@ -10,6 +10,7 @@ echo "using $ngpu gpus..." config_path=$1 ckpt_name=$2 + device=gpu if [ ngpu != 0 ];then device=cpu diff --git a/examples/librispeech/s1/run.sh b/examples/librispeech/s1/run.sh old mode 100644 new mode 100755 index 3ee16e3fc8234d4d5599cfc9e356adc34a429e11..c166baecb3df1aa0237adc15c679001e3e9c9da0 --- a/examples/librispeech/s1/run.sh +++ b/examples/librispeech/s1/run.sh @@ -1,20 +1,36 @@ #!/bin/bash set -e - source path.sh -source ${MAIN_ROOT}/utils/parse_options.sh -# prepare data -bash ./local/data.sh +stage=0 +stop_stage=100 +ckpt=conformer +avg_num=30 +avg_ckpt=avg_${avg_num} + +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; + +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + # prepare data + bash ./local/data.sh || exit -1 +fi -# train model, all `ckpt` under `exp` dir -CUDA_VISIBLE_DEVICES=0 ./local/train.sh conf/conformer.yaml test +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + # train model, all `ckpt` under `exp` dir + CUDA_VISIBLE_DEVICES=4,5,6,7 ./local/train.sh conf/conformer.yaml ${ckpt} +fi -# test ckpt 1 -CUDA_VISIBLE_DEVICES=0 ./local/test.sh conf/conformer.yaml exp/test/checkpoints/1 +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + # avg n best model + ./local/avg.sh exp/${ckpt}/checkpoints ${avg_num} +fi -# avg 1 best model -./local/avg.sh exp/test/checkpoints 1 +if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + # test ckpt avg_n + CUDA_VISIBLE_DEVICES=7 ./local/test.sh conf/conformer.yaml exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 +fi -# export ckpt 1 -./local/export.sh conf/conformer.yaml exp/test/checkpoints/1 exp/test/checkpoints/1.jit.model \ No newline at end of file +if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then + # export ckpt avg_n + CUDA_VISIBLE_DEVICES= ./local/export.sh conf/conformer.yaml exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit +fi diff --git a/examples/tiny/s1/run.sh b/examples/tiny/s1/run.sh index 71cd453490b8ca9848670569f60c44b24211594d..d70863ed64aa76822844361b327d77094865f54e 100644 --- a/examples/tiny/s1/run.sh +++ b/examples/tiny/s1/run.sh @@ -1,20 +1,36 @@ #!/bin/bash set -e - source path.sh + +stage=0 +stop_stage=100 +ckpt=conformer +avg_num=1 +avg_ckpt=avg_${avg_num} + source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; -# prepare data -bash ./local/data.sh || exit -1 +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + # prepare data + bash ./local/data.sh || exit -1 +fi -# train model, all `ckpt` under `exp` dir -CUDA_VISIBLE_DEVICES=0 ./local/train.sh conf/conformer.yaml test || exit -1 +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + # train model, all `ckpt` under `exp` dir + CUDA_VISIBLE_DEVICES=0 ./local/train.sh conf/conformer.yaml ${ckpt} +fi -# avg 1 best model -./local/avg.sh exp/test/checkpoints 1 +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + # avg n best model + ./local/avg.sh exp/${ckpt}/checkpoints ${avg_num} +fi -# test ckpt 1 -CUDA_VISIBLE_DEVICES=0 ./local/test.sh conf/conformer.yaml exp/test/checkpoints/avg_1 || exit -1 +if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + # test ckpt avg_n + CUDA_VISIBLE_DEVICES=0 ./local/test.sh conf/conformer.yaml exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 +fi -# export ckpt 1 -CUDA_VISIBLE_DEVICES= ./local/export.sh conf/conformer.yaml exp/test/checkpoints/avg_1 exp/test/checkpoints/avg_1.jit.model +if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then + # export ckpt avg_n + CUDA_VISIBLE_DEVICES= ./local/export.sh conf/conformer.yaml exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit +fi