diff --git a/examples/aishell/asr0/local/export.sh b/examples/aishell/asr0/local/export.sh index 426a72fe54cfae8af4c564611e3b794f988f5468..ce7e6d6423c2cd479ed8050bd18ff4f2411b371f 100755 --- a/examples/aishell/asr0/local/export.sh +++ b/examples/aishell/asr0/local/export.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [ $# != 4 ];then - echo "usage: $0 config_path ckpt_prefix jit_model_path model_type" +if [ $# != 3 ];then + echo "usage: $0 config_path ckpt_prefix jit_model_path" exit -1 fi @@ -11,14 +11,12 @@ echo "using $ngpu gpus..." config_path=$1 ckpt_path_prefix=$2 jit_model_export_path=$3 -model_type=$4 python3 -u ${BIN_DIR}/export.py \ --ngpu ${ngpu} \ --config ${config_path} \ --checkpoint_path ${ckpt_path_prefix} \ ---export_path ${jit_model_export_path} \ ---model_type ${model_type} +--export_path ${jit_model_export_path} if [ $? -ne 0 ]; then echo "Failed in export!" diff --git a/examples/aishell/asr0/local/test.sh b/examples/aishell/asr0/local/test.sh index 363dbf0abb7b6f8ae71efd06408c5f780f813d1d..778c7142ef690edcaebb75095709620f974c4cd8 100755 --- a/examples/aishell/asr0/local/test.sh +++ b/examples/aishell/asr0/local/test.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [ $# != 4 ];then - echo "usage: ${0} config_path decode_config_path ckpt_path_prefix model_type" +if [ $# != 3 ];then + echo "usage: ${0} config_path decode_config_path ckpt_path_prefix" exit -1 fi @@ -13,7 +13,6 @@ echo "using $ngpu gpus..." config_path=$1 decode_config_path=$2 ckpt_prefix=$3 -model_type=$4 # download language model bash local/download_lm_ch.sh @@ -23,7 +22,7 @@ fi if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # format the reference test file - python utils/format_rsl.py \ + python3 utils/format_rsl.py \ --origin_ref data/manifest.test.raw \ --trans_ref data/manifest.test.text @@ -32,8 +31,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --config ${config_path} \ --decode_cfg ${decode_config_path} \ --result_file ${ckpt_prefix}.rsl \ - --checkpoint_path ${ckpt_prefix} \ - --model_type ${model_type} + --checkpoint_path ${ckpt_prefix} if [ $? -ne 0 ]; then echo "Failed in evaluation!" @@ -41,25 +39,25 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then fi # format the hyp file - python utils/format_rsl.py \ + python3 utils/format_rsl.py \ --origin_hyp ${ckpt_prefix}.rsl \ --trans_hyp ${ckpt_prefix}.rsl.text - python utils/compute-wer.py --char=1 --v=1 \ - data/manifest.test.text ${ckpt_prefix}.rsl.text > ${ckpt_prefix}.error + python3 utils/compute-wer.py --char=1 --v=1 \ + data/manifest.test.text ${ckpt_prefix}.rsl.text > ${ckpt_prefix}.error fi if [ ${stage} -le 101 ] && [ ${stop_stage} -ge 101 ]; then - python utils/format_rsl.py \ + python3 utils/format_rsl.py \ --origin_ref data/manifest.test.raw \ --trans_ref_sclite data/manifest.test.text.sclite - python utils/format_rsl.py \ - --origin_hyp ${ckpt_prefix}.rsl \ - --trans_hyp_sclite ${ckpt_prefix}.rsl.text.sclite + python3 utils/format_rsl.py \ + --origin_hyp ${ckpt_prefix}.rsl \ + --trans_hyp_sclite ${ckpt_prefix}.rsl.text.sclite - mkdir -p ${ckpt_prefix}_sclite - sclite -i wsj -r data/manifest.test.text.sclite -h ${ckpt_prefix}.rsl.text.sclite -e utf-8 -o all -O ${ckpt_prefix}_sclite -c NOASCII + mkdir -p ${ckpt_prefix}_sclite + sclite -i wsj -r data/manifest.test.text.sclite -h ${ckpt_prefix}.rsl.text.sclite -e utf-8 -o all -O ${ckpt_prefix}_sclite -c NOASCII fi exit 0 diff --git a/examples/aishell/asr0/local/test_export.sh b/examples/aishell/asr0/local/test_export.sh index 7a4b87f8c926608a8b97ff181704171cba0a6e4f..a46a0d8762d5103addefedac48e245568b63ebed 100755 --- a/examples/aishell/asr0/local/test_export.sh +++ b/examples/aishell/asr0/local/test_export.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [ $# != 4 ];then - echo "usage: ${0} config_path decode_config_path ckpt_path_prefix model_type" +if [ $# != 3 ];then + echo "usage: ${0} config_path decode_config_path ckpt_path_prefix" exit -1 fi @@ -11,7 +11,6 @@ echo "using $ngpu gpus..." config_path=$1 decode_config_path=$2 jit_model_export_path=$3 -model_type=$4 # download language model bash local/download_lm_ch.sh > /dev/null 2>&1 @@ -24,8 +23,7 @@ python3 -u ${BIN_DIR}/test_export.py \ --config ${config_path} \ --decode_cfg ${decode_config_path} \ --result_file ${jit_model_export_path}.rsl \ ---export_path ${jit_model_export_path} \ ---model_type ${model_type} +--export_path ${jit_model_export_path} if [ $? -ne 0 ]; then echo "Failed in evaluation!" diff --git a/examples/aishell/asr0/local/test_wav.sh b/examples/aishell/asr0/local/test_wav.sh index 62b005a6a0bda1061978780274477bc442448e19..a228dda5a0c4c11fb46eaca8b2f2d28cd6ff657e 100755 --- a/examples/aishell/asr0/local/test_wav.sh +++ b/examples/aishell/asr0/local/test_wav.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [ $# != 5 ];then - echo "usage: ${0} config_path decode_config_path ckpt_path_prefix model_type audio_file" +if [ $# != 4 ];then + echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file" exit -1 fi @@ -11,8 +11,7 @@ echo "using $ngpu gpus..." config_path=$1 decode_config_path=$2 ckpt_prefix=$3 -model_type=$4 -audio_file=$5 +audio_file=$4 mkdir -p data wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/ @@ -37,7 +36,6 @@ python3 -u ${BIN_DIR}/test_wav.py \ --decode_cfg ${decode_config_path} \ --result_file ${ckpt_prefix}.rsl \ --checkpoint_path ${ckpt_prefix} \ ---model_type ${model_type} \ --audio_file ${audio_file} if [ $? -ne 0 ]; then diff --git a/examples/aishell/asr0/local/train.sh b/examples/aishell/asr0/local/train.sh index 102c051c164e9ca10c26e6baa4e7d40ca64a9292..76b696d9bb1807c4a4a168b7fb6a948d451446b8 100755 --- a/examples/aishell/asr0/local/train.sh +++ b/examples/aishell/asr0/local/train.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [ $# != 3 ];then - echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name model_type" +if [ $# != 2 ];then + echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" exit -1 fi @@ -10,7 +10,6 @@ echo "using $ngpu gpus..." config_path=$1 ckpt_name=$2 -model_type=$3 mkdir -p exp @@ -25,14 +24,12 @@ python3 -u ${BIN_DIR}/train.py \ --ngpu ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---model_type ${model_type} \ --seed ${seed} else python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \ --ngpu ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---model_type ${model_type} \ --seed ${seed} fi diff --git a/examples/aishell/asr0/run.sh b/examples/aishell/asr0/run.sh index 0358b821d03f2fc8dbb011e7b534cef8dc740d8a..3bf9aa12efae0e65e18259992389baffb347c2ab 100755 --- a/examples/aishell/asr0/run.sh +++ b/examples/aishell/asr0/run.sh @@ -24,7 +24,7 @@ fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then # train model, all `ckpt` under `exp` dir - CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${model_type} + CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} fi if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then @@ -34,21 +34,21 @@ fi if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then # test ckpt avg_n - CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${model_type}|| exit -1 + CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}|| exit -1 fi if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then # export ckpt avg_n - CUDA_VISIBLE_DEVICES=0 ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type} + CUDA_VISIBLE_DEVICES=0 ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit fi if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then # test export ckpt avg_n - CUDA_VISIBLE_DEVICES=0 ./local/test_export.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type}|| exit -1 + CUDA_VISIBLE_DEVICES=0 ./local/test_export.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}.jit|| exit -1 fi # Optionally, you can add LM and test it with runtime. if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then # test a single .wav file - CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${model_type} ${audio_file} || exit -1 + CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1 fi diff --git a/examples/librispeech/asr0/local/export.sh b/examples/librispeech/asr0/local/export.sh index 426a72fe54cfae8af4c564611e3b794f988f5468..ce7e6d6423c2cd479ed8050bd18ff4f2411b371f 100755 --- a/examples/librispeech/asr0/local/export.sh +++ b/examples/librispeech/asr0/local/export.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [ $# != 4 ];then - echo "usage: $0 config_path ckpt_prefix jit_model_path model_type" +if [ $# != 3 ];then + echo "usage: $0 config_path ckpt_prefix jit_model_path" exit -1 fi @@ -11,14 +11,12 @@ echo "using $ngpu gpus..." config_path=$1 ckpt_path_prefix=$2 jit_model_export_path=$3 -model_type=$4 python3 -u ${BIN_DIR}/export.py \ --ngpu ${ngpu} \ --config ${config_path} \ --checkpoint_path ${ckpt_path_prefix} \ ---export_path ${jit_model_export_path} \ ---model_type ${model_type} +--export_path ${jit_model_export_path} if [ $? -ne 0 ]; then echo "Failed in export!" diff --git a/examples/librispeech/asr0/local/test.sh b/examples/librispeech/asr0/local/test.sh index 5654a8794a40ece56dd3287b51e8431125988cc0..728569d1f91248bd59a34e9ab87a951149fbf772 100755 --- a/examples/librispeech/asr0/local/test.sh +++ b/examples/librispeech/asr0/local/test.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [ $# != 4 ];then - echo "usage: ${0} config_path decode_config_path ckpt_path_prefix model_type" +if [ $# != 3 ];then + echo "usage: ${0} config_path decode_config_path ckpt_path_prefix" exit -1 fi stage=0 @@ -13,7 +13,6 @@ echo "using $ngpu gpus..." config_path=$1 decode_config_path=$2 ckpt_prefix=$3 -model_type=$4 # download language model bash local/download_lm_en.sh @@ -23,7 +22,7 @@ fi if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # format the reference test file - python utils/format_rsl.py \ + python3 utils/format_rsl.py \ --origin_ref data/manifest.test-clean.raw \ --trans_ref data/manifest.test-clean.text @@ -32,33 +31,32 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --config ${config_path} \ --decode_cfg ${decode_config_path} \ --result_file ${ckpt_prefix}.rsl \ - --checkpoint_path ${ckpt_prefix} \ - --model_type ${model_type} + --checkpoint_path ${ckpt_prefix} if [ $? -ne 0 ]; then echo "Failed in evaluation!" exit 1 fi - python utils/format_rsl.py \ + python3 utils/format_rsl.py \ --origin_hyp ${ckpt_prefix}.rsl \ --trans_hyp ${ckpt_prefix}.rsl.text - python utils/compute-wer.py --char=1 --v=1 \ + python3 utils/compute-wer.py --char=1 --v=1 \ data/manifest.test-clean.text ${ckpt_prefix}.rsl.text > ${ckpt_prefix}.error fi if [ ${stage} -le 101 ] && [ ${stop_stage} -ge 101 ]; then - python utils/format_rsl.py \ + python3 utils/format_rsl.py \ --origin_ref data/manifest.test-clean.raw \ --trans_ref_sclite data/manifest.test.text-clean.sclite - python utils/format_rsl.py \ - --origin_hyp ${ckpt_prefix}.rsl \ - --trans_hyp_sclite ${ckpt_prefix}.rsl.text.sclite + python3 utils/format_rsl.py \ + --origin_hyp ${ckpt_prefix}.rsl \ + --trans_hyp_sclite ${ckpt_prefix}.rsl.text.sclite - mkdir -p ${ckpt_prefix}_sclite - sclite -i wsj -r data/manifest.test-clean.text.sclite -h ${ckpt_prefix}.rsl.text.sclite -e utf-8 -o all -O ${ckpt_prefix}_sclite -c NOASCII + mkdir -p ${ckpt_prefix}_sclite + sclite -i wsj -r data/manifest.test-clean.text.sclite -h ${ckpt_prefix}.rsl.text.sclite -e utf-8 -o all -O ${ckpt_prefix}_sclite -c NOASCII fi diff --git a/examples/librispeech/asr0/local/test_wav.sh b/examples/librispeech/asr0/local/test_wav.sh index 25cfc45e3ad532ecb51f6116def2dac5899824d0..a5712b608dc59dbd7254bde5f8708cfdff0b9523 100755 --- a/examples/librispeech/asr0/local/test_wav.sh +++ b/examples/librispeech/asr0/local/test_wav.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [ $# != 5 ];then - echo "usage: ${0} config_path decode_config_path ckpt_path_prefix model_type audio_file" +if [ $# != 4 ];then + echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file" exit -1 fi @@ -11,8 +11,7 @@ echo "using $ngpu gpus..." config_path=$1 decode_config_path=$2 ckpt_prefix=$3 -model_type=$4 -audio_file=$5 +audio_file=$4 mkdir -p data wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/ @@ -37,7 +36,6 @@ python3 -u ${BIN_DIR}/test_wav.py \ --decode_cfg ${decode_config_path} \ --result_file ${ckpt_prefix}.rsl \ --checkpoint_path ${ckpt_prefix} \ ---model_type ${model_type} \ --audio_file ${audio_file} if [ $? -ne 0 ]; then diff --git a/examples/librispeech/asr0/local/train.sh b/examples/librispeech/asr0/local/train.sh index 50d1d1922b54005a80d3b90b17e1ea6def5a438a..ad00653b70b366addcab126f1e63fc64aec1b37c 100755 --- a/examples/librispeech/asr0/local/train.sh +++ b/examples/librispeech/asr0/local/train.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [ $# != 3 ];then - echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name model_type" +if [ $# != 2 ];then + echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" exit -1 fi @@ -10,7 +10,6 @@ echo "using $ngpu gpus..." config_path=$1 ckpt_name=$2 -model_type=$3 mkdir -p exp @@ -25,14 +24,12 @@ python3 -u ${BIN_DIR}/train.py \ --ngpu ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---model_type ${model_type} \ --seed ${seed} else python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \ --ngpu ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---model_type ${model_type} \ --seed ${seed} fi diff --git a/examples/librispeech/asr0/run.sh b/examples/librispeech/asr0/run.sh index d96f658232fa696b704e0a00c0464d40c4140408..6b1ff6c6d9ef96509e5d25ecd7ad83ff7808d221 100755 --- a/examples/librispeech/asr0/run.sh +++ b/examples/librispeech/asr0/run.sh @@ -23,7 +23,7 @@ fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then # train model, all `ckpt` under `exp` dir - CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${model_type} + CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} fi if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then @@ -33,20 +33,20 @@ fi if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then # test ckpt avg_n - CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${model_type} || exit -1 + CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}|| exit -1 fi if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then # export ckpt avg_n - CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type} + CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit fi if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then # test export ckpt avg_n - CUDA_VISIBLE_DEVICES=0 ./local/test_export.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type}|| exit -1 + CUDA_VISIBLE_DEVICES=0 ./local/test_export.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}.jit|| exit -1 fi if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then # test a single .wav file - CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${model_type} ${audio_file} || exit -1 + CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1 fi diff --git a/examples/tiny/asr0/conf/augmentation.json b/examples/tiny/asr0/conf/augmentation.json deleted file mode 100644 index 4480307b98e3e7c5492e5e419be529617cc2bb24..0000000000000000000000000000000000000000 --- a/examples/tiny/asr0/conf/augmentation.json +++ /dev/null @@ -1,36 +0,0 @@ -[ - { - "type": "speed", - "params": { - "min_speed_rate": 0.9, - "max_speed_rate": 1.1, - "num_rates": 3 - }, - "prob": 0.0 - }, - { - "type": "shift", - "params": { - "min_shift_ms": -5, - "max_shift_ms": 5 - }, - "prob": 1.0 - }, - { - "type": "specaug", - "params": { - "W": 5, - "warp_mode": "PIL", - "F": 30, - "n_freq_masks": 2, - "T": 40, - "n_time_masks": 2, - "p": 1.0, - "adaptive_number_ratio": 0, - "adaptive_size_ratio": 0, - "max_n_time_masks": 20, - "replace_with_zero": true - }, - "prob": 1.0 - } -] diff --git a/examples/tiny/asr0/conf/deepspeech2.yaml b/examples/tiny/asr0/conf/deepspeech2.yaml index 64d432e2603c87eb9c77a197dc1be8b25378b5dd..a94143b95fcf16f47082fd43e6c3aa0af68a16e4 100644 --- a/examples/tiny/asr0/conf/deepspeech2.yaml +++ b/examples/tiny/asr0/conf/deepspeech2.yaml @@ -16,28 +16,26 @@ max_output_input_ratio: 10.0 ########################################### # Dataloader # ########################################### -mean_std_filepath: data/mean_std.json -unit_type: char -vocab_filepath: data/lang_char/vocab.txt -augmentation_config: conf/augmentation.json -random_seed: 0 -spm_model_prefix: -spectrum_type: linear +vocab_filepath: data/lang_char/vocab.txt +spm_model_prefix: '' +unit_type: 'char' +preprocess_config: conf/preprocess.yaml feat_dim: 161 -delta_delta: False stride_ms: 10.0 -window_ms: 20.0 -n_fft: None -max_freq: None -target_sample_rate: 16000 -use_dB_normalization: True -target_dB: -20 -dither: 1.0 -keep_transcription_text: False -sortagrad: True -shuffle_method: batch_shuffle -num_workers: 2 +window_ms: 25.0 +sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs batch_size: 4 +maxlen_in: 512 # if input length > maxlen-in, batchsize is automatically reduced +maxlen_out: 150 # if output length > maxlen-out, batchsize is automatically reduced +minibatches: 0 # for debug +batch_count: auto +batch_bins: 0 +batch_frames_in: 0 +batch_frames_out: 0 +batch_frames_inout: 0 +num_workers: 8 +subsampling_factor: 1 +num_encs: 1 ############################################ # Network Architecture # @@ -45,8 +43,10 @@ batch_size: 4 num_conv_layers: 2 num_rnn_layers: 3 rnn_layer_size: 2048 +rnn_direction: bidirect # [forward, bidirect] +num_fc_layers: 0 +fc_layers_size_list: -1, use_gru: False -share_rnn_weights: True blank_id: 0 @@ -59,6 +59,7 @@ lr: 1.0e-5 lr_decay: 0.8 weight_decay: 1.0e-6 global_grad_clip: 5.0 +dist_sampler: False log_interval: 1 checkpoint: kbest_n: 3 diff --git a/examples/tiny/asr0/conf/deepspeech2_online.yaml b/examples/tiny/asr0/conf/deepspeech2_online.yaml index 74a4dc814d5f3183d0dc1b99994564850b801885..1bd8da19cf8fe416f53c76e547b5e96717a893d6 100644 --- a/examples/tiny/asr0/conf/deepspeech2_online.yaml +++ b/examples/tiny/asr0/conf/deepspeech2_online.yaml @@ -16,29 +16,27 @@ max_output_input_ratio: 10.0 ########################################### # Dataloader # ########################################### -mean_std_filepath: data/mean_std.json -unit_type: char -vocab_filepath: data/lang_char/vocab.txt -augmentation_config: conf/augmentation.json -random_seed: 0 -spm_model_prefix: -spectrum_type: linear +vocab_filepath: data/lang_char/vocab.txt +spm_model_prefix: '' +unit_type: 'char' +preprocess_config: conf/preprocess.yaml feat_dim: 161 -delta_delta: False stride_ms: 10.0 -window_ms: 20.0 -n_fft: None -max_freq: None -target_sample_rate: 16000 -use_dB_normalization: True -target_dB: -20 -dither: 1.0 -keep_transcription_text: False -sortagrad: True -shuffle_method: batch_shuffle -num_workers: 0 +window_ms: 25.0 +sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs batch_size: 4 - +maxlen_in: 512 # if input length > maxlen-in, batchsize is automatically reduced +maxlen_out: 150 # if output length > maxlen-out, batchsize is automatically reduced +minibatches: 0 # for debug +batch_count: auto +batch_bins: 0 +batch_frames_in: 0 +batch_frames_out: 0 +batch_frames_inout: 0 +num_workers: 8 +subsampling_factor: 1 +num_encs: 1 + ############################################ # Network Architecture # ############################################ @@ -61,6 +59,7 @@ lr: 1.0e-5 lr_decay: 1.0 weight_decay: 1.0e-6 global_grad_clip: 5.0 +dist_sampler: False log_interval: 1 checkpoint: kbest_n: 3 diff --git a/examples/tiny/asr0/local/export.sh b/examples/tiny/asr0/local/export.sh index 426a72fe54cfae8af4c564611e3b794f988f5468..ce7e6d6423c2cd479ed8050bd18ff4f2411b371f 100755 --- a/examples/tiny/asr0/local/export.sh +++ b/examples/tiny/asr0/local/export.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [ $# != 4 ];then - echo "usage: $0 config_path ckpt_prefix jit_model_path model_type" +if [ $# != 3 ];then + echo "usage: $0 config_path ckpt_prefix jit_model_path" exit -1 fi @@ -11,14 +11,12 @@ echo "using $ngpu gpus..." config_path=$1 ckpt_path_prefix=$2 jit_model_export_path=$3 -model_type=$4 python3 -u ${BIN_DIR}/export.py \ --ngpu ${ngpu} \ --config ${config_path} \ --checkpoint_path ${ckpt_path_prefix} \ ---export_path ${jit_model_export_path} \ ---model_type ${model_type} +--export_path ${jit_model_export_path} if [ $? -ne 0 ]; then echo "Failed in export!" diff --git a/examples/tiny/asr0/local/test.sh b/examples/tiny/asr0/local/test.sh index ea40046b10997ee425d4e654b89fedc732c8b3fe..55f97d2ec8467c503dc7c507605fb666a7c34241 100755 --- a/examples/tiny/asr0/local/test.sh +++ b/examples/tiny/asr0/local/test.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [ $# != 4 ];then - echo "usage: ${0} config_path decode_config_path ckpt_path_prefix model_type" +if [ $# != 3 ];then + echo "usage: ${0} config_path decode_config_path ckpt_path_prefix" exit -1 fi @@ -11,7 +11,6 @@ echo "using $ngpu gpus..." config_path=$1 decode_config_path=$2 ckpt_prefix=$3 -model_type=$4 # download language model bash local/download_lm_en.sh @@ -24,8 +23,7 @@ python3 -u ${BIN_DIR}/test.py \ --config ${config_path} \ --decode_cfg ${decode_config_path} \ --result_file ${ckpt_prefix}.rsl \ ---checkpoint_path ${ckpt_prefix} \ ---model_type ${model_type} +--checkpoint_path ${ckpt_prefix} if [ $? -ne 0 ]; then echo "Failed in evaluation!" diff --git a/examples/tiny/asr0/local/train.sh b/examples/tiny/asr0/local/train.sh index 9060be674e5804005de6b23c2575bd4888efef00..2f2cff77a8ba8ee740e32046e9115c91689b0493 100755 --- a/examples/tiny/asr0/local/train.sh +++ b/examples/tiny/asr0/local/train.sh @@ -15,14 +15,13 @@ if [ ${seed} != 0 ]; then echo "using seed $seed & FLAGS_cudnn_deterministic=True ..." fi -if [ $# != 3 ];then - echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name model_type" +if [ $# != 2 ];then + echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" exit -1 fi config_path=$1 ckpt_name=$2 -model_type=$3 mkdir -p exp @@ -31,7 +30,6 @@ python3 -u ${BIN_DIR}/train.py \ --ngpu ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---model_type ${model_type} \ --profiler-options "${profiler_options}" \ --seed ${seed} else @@ -39,7 +37,6 @@ python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/t --ngpu ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---model_type ${model_type} \ --profiler-options "${profiler_options}" \ --seed ${seed} fi diff --git a/examples/tiny/asr0/run.sh b/examples/tiny/asr0/run.sh index 25f04624526f2ea480492fad2ab53ee3eced0654..16f4313038ef875d66a47f9175687f0aa92f752d 100755 --- a/examples/tiny/asr0/run.sh +++ b/examples/tiny/asr0/run.sh @@ -8,8 +8,6 @@ stop_stage=100 conf_path=conf/deepspeech2.yaml decode_conf_path=conf/tuning/decode.yaml avg_num=1 -model_type=offline - source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; avg_ckpt=avg_${avg_num} @@ -23,7 +21,7 @@ fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then # train model, all `ckpt` under `exp` dir - CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${model_type} + CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} fi if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then @@ -33,10 +31,10 @@ fi if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then # test ckpt avg_n - CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${model_type} || exit -1 + CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}|| exit -1 fi if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then # export ckpt avg_n - CUDA_VISIBLE_DEVICES=${gpus} ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type} + CUDA_VISIBLE_DEVICES=${gpus} ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit fi diff --git a/paddlespeech/s2t/exps/deepspeech2/model.py b/paddlespeech/s2t/exps/deepspeech2/model.py index 5aa8e37438e6b8cdf08191b6f10b562f4c4be535..2cd3c04ac354910f401b6b5366a6f8fecac8e30f 100644 --- a/paddlespeech/s2t/exps/deepspeech2/model.py +++ b/paddlespeech/s2t/exps/deepspeech2/model.py @@ -248,7 +248,6 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): for text, n in zip(texts, texts_len): n = n.numpy().item() ids = text[:n] - #trans.append(''.join([chr(i) for i in ids])) trans.append(self._text_featurizer.defeaturize(ids.numpy().tolist())) return trans