diff --git a/deepspeech/__init__.py b/deepspeech/__init__.py index 493f10a6f5f0abc5679b0fcd76bfe73b170998ab..f0cd1bae2bc47e9767c70372519120b6eaf952e8 100644 --- a/deepspeech/__init__.py +++ b/deepspeech/__init__.py @@ -362,11 +362,19 @@ def ctc_loss(logits, label_lengths, blank=0, reduction='mean', - norm_by_times=True): + norm_by_times=False, + norm_by_batchsize=True, + norm_by_total_logits_len=False): #logger.info("my ctc loss with norm by times") ## https://github.com/PaddlePaddle/Paddle/blob/f5ca2db2cc/paddle/fluid/operators/warpctc_op.h#L403 - loss_out = paddle.fluid.layers.warpctc(logits, labels, blank, norm_by_times, - input_lengths, label_lengths) + loss_out = paddle.fluid.layers.warpctc( + logits, + labels, + blank, + norm_by_times, + input_lengths, + label_lengths, + norm_by_batchsize, ) loss_out = paddle.fluid.layers.squeeze(loss_out, [-1]) assert reduction in ['mean', 'sum', 'none'] diff --git a/examples/librispeech/s2/README.md b/examples/librispeech/s2/README.md index e4022f014a40888afdbe4d2f84c88cc4b9c198dc..1f7c69194691b37f9b31d09ce2d10a4b8b7468cc 100644 --- a/examples/librispeech/s2/README.md +++ b/examples/librispeech/s2/README.md @@ -1,41 +1,13 @@ # LibriSpeech -## Data -| Data Subset | Duration in Seconds | -| data/manifest.train | 0.83s ~ 29.735s | -| data/manifest.dev | 1.065 ~ 35.155s | -| data/manifest.test-clean | 1.285s ~ 34.955s | +| Model | Params | Config | Augmentation| Loss | +| --- | --- | --- | --- | +| transformer | 32.52 M | conf/transformer.yaml | spec_aug | 6.3197922706604 | -## Conformer -| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER | -| --- | --- | --- | --- | --- | --- | --- | --- | -| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | attention | - | - | -| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_greedy_search | | | -| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_prefix_beam_search | | | -| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | attention_rescoring | | | -### Test w/o length filter -| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER | -| --- | --- | --- | --- | --- | --- | --- | --- | -| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean-all | attention | | | - - -## Chunk Conformer - -| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size & Left Chunks | Loss | WER | -| --- | --- | --- | --- | --- | --- | --- | --- | --- | -| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | attention | 16, -1 | | | -| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | ctc_greedy_search | 16, -1 | | | -| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | ctc_prefix_beam_search | 16, -1 | | - | -| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | attention_rescoring | 16, -1 | | - | - - -## Transformer -| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER | -| --- | --- | --- | --- | --- | --- | --- | --- | -| transformer | 32.52 M | conf/transformer.yaml | spec_aug + shift | test-clean | attention | | | - -### Test w/o length filter -| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER | -| --- | --- | --- | --- | --- | --- | --- | --- | -| transformer | 32.52 M | conf/transformer.yaml | spec_aug + shift | test-clean-all | attention | | | +| Test Set | Decode Method | #Snt | #Wrd | Corr | Sub | Del | Ins | Err | S.Err | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +| test-clean | attention | 2620 | 52576 | 96.4 | 2.5 | 1.1 | 0.4 | 4.0 | 34.7 | +| test-clean | ctc_greedy_search | 2620 | 52576 | 95.9 | 3.7 | 0.4 | 0.5 | 4.6 | 48.0 | +| test-clean | ctc_prefix_beamsearch | 2620 | 52576 | 95.9 | 3.7 | 0.4 | 0.5 | 4.6 | 47.6 | +| test-clean | attention_rescore | 2620 | 52576 | 96.8 | 2.9 | 0.3 | 0.4 | 3.7 | 38.0 | diff --git a/examples/librispeech/s2/local/test.sh b/examples/librispeech/s2/local/test.sh index 5eeb2d6126e801801b4b8be13fa66d0781859d9a..379a3787e61d172e905902267429877ea8465d6d 100755 --- a/examples/librispeech/s2/local/test.sh +++ b/examples/librispeech/s2/local/test.sh @@ -6,7 +6,7 @@ expdir=exp datadir=data nj=32 -lmtag= +lmtag='nolm' recog_set="test-clean test-other dev-clean dev-other" recog_set="test-clean" @@ -29,11 +29,18 @@ config_path=$1 dict=$2 ckpt_prefix=$3 + +ckpt_dir=$(dirname `dirname ${ckpt_prefix}`) +echo "ckpt dir: ${ckpt_dir}" + +ckpt_tag=$(basename ${ckpt_prefix}) +echo "ckpt tag: ${ckpt_tag}" + chunk_mode=false if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then chunk_mode=true fi -echo "chunk mode ${chunk_mode}" +echo "chunk mode: ${chunk_mode}" # download language model @@ -46,11 +53,13 @@ pids=() # initialize pids for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_rescoring; do ( + echo "decode method: ${dmethd}" for rtask in ${recog_set}; do ( - decode_dir=decode_${rtask}_${dmethd}_$(basename ${config_path%.*})_${lmtag} + echo "dataset: ${rtask}" + decode_dir=${ckpt_dir}/decode/decode_${rtask/-/_}_${dmethd}_$(basename ${config_path%.*})_${lmtag}_${ckpt_tag} feat_recog_dir=${datadir} - mkdir -p ${expdir}/${decode_dir} + mkdir -p ${decode_dir} mkdir -p ${feat_recog_dir} # split data @@ -61,7 +70,7 @@ for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_resco # set batchsize 0 to disable batch decoding batch_size=1 - ${decode_cmd} JOB=1:${nj} ${expdir}/${decode_dir}/log/decode.JOB.log \ + ${decode_cmd} JOB=1:${nj} ${decode_dir}/log/decode.JOB.log \ python3 -u ${BIN_DIR}/test.py \ --model-name u2_kaldi \ --run-mode test \ @@ -69,7 +78,7 @@ for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_resco --dict-path ${dict} \ --config ${config_path} \ --checkpoint_path ${ckpt_prefix} \ - --result-file ${expdir}/${decode_dir}/data.JOB.json \ + --result-file ${decode_dir}/data.JOB.json \ --opts decoding.decoding_method ${dmethd} \ --opts decoding.batch_size ${batch_size} \ --opts data.test_manifest ${feat_recog_dir}/split${nj}/JOB/manifest.${rtask} diff --git a/examples/librispeech/s2/run.sh b/examples/librispeech/s2/run.sh index 46b6ac1b406bdb344ae266365524d4ffaa95af41..1ffe3e5c5c3e7ba3e4d4095a6202f8a385bbe9bf 100755 --- a/examples/librispeech/s2/run.sh +++ b/examples/librispeech/s2/run.sh @@ -1,4 +1,5 @@ #!/bin/bash + set -e . ./path.sh || exit 1; @@ -7,8 +8,9 @@ set -e stage=0 stop_stage=100 conf_path=conf/transformer.yaml -dict_path=data/train_960_unigram5000_units.txt +dict_path=data/bpe_unigram_5000_units.txt avg_num=10 + source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; avg_ckpt=avg_${avg_num}