提交 8539689b 编写于 作者: H Hui Zhang

u2 kaldi wer4p0

上级 3c32a292
...@@ -362,11 +362,19 @@ def ctc_loss(logits, ...@@ -362,11 +362,19 @@ def ctc_loss(logits,
label_lengths, label_lengths,
blank=0, blank=0,
reduction='mean', reduction='mean',
norm_by_times=True): norm_by_times=False,
norm_by_batchsize=True,
norm_by_total_logits_len=False):
#logger.info("my ctc loss with norm by times") #logger.info("my ctc loss with norm by times")
## https://github.com/PaddlePaddle/Paddle/blob/f5ca2db2cc/paddle/fluid/operators/warpctc_op.h#L403 ## https://github.com/PaddlePaddle/Paddle/blob/f5ca2db2cc/paddle/fluid/operators/warpctc_op.h#L403
loss_out = paddle.fluid.layers.warpctc(logits, labels, blank, norm_by_times, loss_out = paddle.fluid.layers.warpctc(
input_lengths, label_lengths) logits,
labels,
blank,
norm_by_times,
input_lengths,
label_lengths,
norm_by_batchsize, )
loss_out = paddle.fluid.layers.squeeze(loss_out, [-1]) loss_out = paddle.fluid.layers.squeeze(loss_out, [-1])
assert reduction in ['mean', 'sum', 'none'] assert reduction in ['mean', 'sum', 'none']
......
# LibriSpeech # LibriSpeech
## Data | Model | Params | Config | Augmentation| Loss |
| Data Subset | Duration in Seconds | | --- | --- | --- | --- |
| data/manifest.train | 0.83s ~ 29.735s | | transformer | 32.52 M | conf/transformer.yaml | spec_aug | 6.3197922706604 |
| data/manifest.dev | 1.065 ~ 35.155s |
| data/manifest.test-clean | 1.285s ~ 34.955s |
## Conformer
| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |
| --- | --- | --- | --- | --- | --- | --- | --- |
| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | attention | - | - |
| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_greedy_search | | |
| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | ctc_prefix_beam_search | | |
| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean | attention_rescoring | | |
### Test w/o length filter | Test Set | Decode Method | #Snt | #Wrd | Corr | Sub | Del | Ins | Err | S.Err |
| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| --- | --- | --- | --- | --- | --- | --- | --- | | test-clean | attention | 2620 | 52576 | 96.4 | 2.5 | 1.1 | 0.4 | 4.0 | 34.7 |
| conformer | 47.63 M | conf/conformer.yaml | spec_aug + shift | test-clean-all | attention | | | | test-clean | ctc_greedy_search | 2620 | 52576 | 95.9 | 3.7 | 0.4 | 0.5 | 4.6 | 48.0 |
| test-clean | ctc_prefix_beamsearch | 2620 | 52576 | 95.9 | 3.7 | 0.4 | 0.5 | 4.6 | 47.6 |
| test-clean | attention_rescore | 2620 | 52576 | 96.8 | 2.9 | 0.3 | 0.4 | 3.7 | 38.0 |
## Chunk Conformer
| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size & Left Chunks | Loss | WER |
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | attention | 16, -1 | | |
| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | ctc_greedy_search | 16, -1 | | |
| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | ctc_prefix_beam_search | 16, -1 | | - |
| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | attention_rescoring | 16, -1 | | - |
## Transformer
| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |
| --- | --- | --- | --- | --- | --- | --- | --- |
| transformer | 32.52 M | conf/transformer.yaml | spec_aug + shift | test-clean | attention | | |
### Test w/o length filter
| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |
| --- | --- | --- | --- | --- | --- | --- | --- |
| transformer | 32.52 M | conf/transformer.yaml | spec_aug + shift | test-clean-all | attention | | |
...@@ -6,7 +6,7 @@ expdir=exp ...@@ -6,7 +6,7 @@ expdir=exp
datadir=data datadir=data
nj=32 nj=32
lmtag= lmtag='nolm'
recog_set="test-clean test-other dev-clean dev-other" recog_set="test-clean test-other dev-clean dev-other"
recog_set="test-clean" recog_set="test-clean"
...@@ -29,11 +29,18 @@ config_path=$1 ...@@ -29,11 +29,18 @@ config_path=$1
dict=$2 dict=$2
ckpt_prefix=$3 ckpt_prefix=$3
ckpt_dir=$(dirname `dirname ${ckpt_prefix}`)
echo "ckpt dir: ${ckpt_dir}"
ckpt_tag=$(basename ${ckpt_prefix})
echo "ckpt tag: ${ckpt_tag}"
chunk_mode=false chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
chunk_mode=true chunk_mode=true
fi fi
echo "chunk mode ${chunk_mode}" echo "chunk mode: ${chunk_mode}"
# download language model # download language model
...@@ -46,11 +53,13 @@ pids=() # initialize pids ...@@ -46,11 +53,13 @@ pids=() # initialize pids
for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_rescoring; do for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_rescoring; do
( (
echo "decode method: ${dmethd}"
for rtask in ${recog_set}; do for rtask in ${recog_set}; do
( (
decode_dir=decode_${rtask}_${dmethd}_$(basename ${config_path%.*})_${lmtag} echo "dataset: ${rtask}"
decode_dir=${ckpt_dir}/decode/decode_${rtask/-/_}_${dmethd}_$(basename ${config_path%.*})_${lmtag}_${ckpt_tag}
feat_recog_dir=${datadir} feat_recog_dir=${datadir}
mkdir -p ${expdir}/${decode_dir} mkdir -p ${decode_dir}
mkdir -p ${feat_recog_dir} mkdir -p ${feat_recog_dir}
# split data # split data
...@@ -61,7 +70,7 @@ for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_resco ...@@ -61,7 +70,7 @@ for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_resco
# set batchsize 0 to disable batch decoding # set batchsize 0 to disable batch decoding
batch_size=1 batch_size=1
${decode_cmd} JOB=1:${nj} ${expdir}/${decode_dir}/log/decode.JOB.log \ ${decode_cmd} JOB=1:${nj} ${decode_dir}/log/decode.JOB.log \
python3 -u ${BIN_DIR}/test.py \ python3 -u ${BIN_DIR}/test.py \
--model-name u2_kaldi \ --model-name u2_kaldi \
--run-mode test \ --run-mode test \
...@@ -69,7 +78,7 @@ for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_resco ...@@ -69,7 +78,7 @@ for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_resco
--dict-path ${dict} \ --dict-path ${dict} \
--config ${config_path} \ --config ${config_path} \
--checkpoint_path ${ckpt_prefix} \ --checkpoint_path ${ckpt_prefix} \
--result-file ${expdir}/${decode_dir}/data.JOB.json \ --result-file ${decode_dir}/data.JOB.json \
--opts decoding.decoding_method ${dmethd} \ --opts decoding.decoding_method ${dmethd} \
--opts decoding.batch_size ${batch_size} \ --opts decoding.batch_size ${batch_size} \
--opts data.test_manifest ${feat_recog_dir}/split${nj}/JOB/manifest.${rtask} --opts data.test_manifest ${feat_recog_dir}/split${nj}/JOB/manifest.${rtask}
......
#!/bin/bash #!/bin/bash
set -e set -e
. ./path.sh || exit 1; . ./path.sh || exit 1;
...@@ -7,8 +8,9 @@ set -e ...@@ -7,8 +8,9 @@ set -e
stage=0 stage=0
stop_stage=100 stop_stage=100
conf_path=conf/transformer.yaml conf_path=conf/transformer.yaml
dict_path=data/train_960_unigram5000_units.txt dict_path=data/bpe_unigram_5000_units.txt
avg_num=10 avg_num=10
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
avg_ckpt=avg_${avg_num} avg_ckpt=avg_${avg_num}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册