run.sh 5.8 KB
Newer Older
Y
Yang Zhou 已提交
1 2 3 4 5 6
#!/bin/bash
set +x
set -e

. path.sh

H
Hui Zhang 已提交
7
nj=40
Y
Yang Zhou 已提交
8 9
stage=0
stop_stage=100
H
Hui Zhang 已提交
10

H
Hui Zhang 已提交
11
. utils/parse_options.sh
H
Hui Zhang 已提交
12

Y
Yang Zhou 已提交
13 14 15 16 17 18 19
# 1. compile
if [ ! -d ${SPEECHX_EXAMPLES} ]; then
    pushd ${SPEECHX_ROOT} 
    bash build.sh
    popd
fi

H
Hui Zhang 已提交
20
# input
Y
Yang Zhou 已提交
21
mkdir -p data
22
data=$PWD/data
H
Hui Zhang 已提交
23 24 25 26 27 28 29 30 31

ckpt_dir=$data/model
model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/
vocb_dir=$ckpt_dir/data/lang_char/

# output
mkdir -p exp
exp=$PWD/exp

Y
Yang Zhou 已提交
32
aishell_wav_scp=aishell_test.scp
H
Hui Zhang 已提交
33
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then
H
Hui Zhang 已提交
34 35 36 37 38 39 40 41 42 43 44
    if [ ! -d $data/test ]; then
        pushd $data
        wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
        unzip  aishell_test.zip
        popd

        realpath $data/test/*/*.wav > $data/wavlist
        awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id
        paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp
    fi

H
Hui Zhang 已提交
45
    if [ ! -f $ckpt_dir/data/mean_std.json ]; then
H
Hui Zhang 已提交
46
        mkdir -p $ckpt_dir
H
Hui Zhang 已提交
47 48 49 50
        pushd $ckpt_dir
        wget -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
        tar xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz 
        popd
H
Hui Zhang 已提交
51 52 53 54 55 56 57 58
    fi

    lm=$data/zh_giga.no_cna_cmn.prune01244.klm
    if [ ! -f $lm ]; then
        pushd $data
        wget -c https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm
        popd
    fi
H
Hui Zhang 已提交
59 60
fi

Y
Yang Zhou 已提交
61
# 3. make feature
H
Hui Zhang 已提交
62
text=$data/test/text
Y
Yang Zhou 已提交
63 64 65 66 67
label_file=./aishell_result
wer=./aishell_wer

export GLOG_logtostderr=1

H
Hui Zhang 已提交
68

Y
Yang Zhou 已提交
69
cmvn=$data/cmvn.ark
H
Hui Zhang 已提交
70
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
H
Hui Zhang 已提交
71 72
    # 3. gen linear feat
    cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn
H
Hui Zhang 已提交
73

H
Hui Zhang 已提交
74
    ./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
75

H
Hui Zhang 已提交
76 77 78 79 80 81
    utils/run.pl JOB=1:$nj $data/split${nj}/JOB/feat.log \
    linear-spectrogram-wo-db-norm-ol \
        --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
        --feature_wspecifier=ark,scp:$data/split${nj}/JOB/feat.ark,$data/split${nj}/JOB/feat.scp \
        --cmvn_file=$cmvn \
        --streaming_chunk=0.36
Y
Yang Zhou 已提交
82
    echo "feature make have finished!!!"
H
Hui Zhang 已提交
83
fi
Y
Yang Zhou 已提交
84

H
Hui Zhang 已提交
85
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
H
Hui Zhang 已提交
86 87 88 89 90
    #  recognizer
    utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.wolm.log \
    ctc-prefix-beam-search-decoder-ol \
        --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
        --model_path=$model_dir/avg_1.jit.pdmodel \
H
Hui Zhang 已提交
91
        --param_path=$model_dir/avg_1.jit.pdiparams \
H
Hui Zhang 已提交
92 93 94 95 96
        --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
        --dict_file=$vocb_dir/vocab.txt \
        --result_wspecifier=ark,t:$data/split${nj}/JOB/result

    cat $data/split${nj}/*/result > $exp/${label_file}
H
Hui Zhang 已提交
97
    utils/compute-wer.py --char=1 --v=1 $text $exp/${label_file} > $exp/${wer}
Y
Yang Zhou 已提交
98 99
    echo "ctc-prefix-beam-search-decoder-ol without lm has finished!!!"
    echo "please checkout in ${exp}/${wer}"
Y
Yang Zhou 已提交
100 101
fi

H
Hui Zhang 已提交
102
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
H
Hui Zhang 已提交
103 104 105 106 107
    #  decode with lm
    utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.lm.log \
    ctc-prefix-beam-search-decoder-ol \
        --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
        --model_path=$model_dir/avg_1.jit.pdmodel \
H
Hui Zhang 已提交
108
        --param_path=$model_dir/avg_1.jit.pdiparams \
H
Hui Zhang 已提交
109 110 111 112 113 114
        --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
        --dict_file=$vocb_dir/vocab.txt \
        --lm_path=$lm \
        --result_wspecifier=ark,t:$data/split${nj}/JOB/result_lm
 
    cat $data/split${nj}/*/result_lm > $exp/${label_file}_lm
H
Hui Zhang 已提交
115
    utils/compute-wer.py --char=1 --v=1 $text $exp/${label_file}_lm > $exp/${wer}.lm
Y
Yang Zhou 已提交
116 117
    echo "ctc-prefix-beam-search-decoder-ol with lm test has finished!!!"
    echo "please checkout in ${exp}/${wer}.lm"
H
Hui Zhang 已提交
118
fi
H
Hui Zhang 已提交
119

H
Hui Zhang 已提交
120
wfst=$data/wfst/
H
Hui Zhang 已提交
121 122 123 124 125 126
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    mkdir -p $wfst
    if [ ! -f $wfst/aishell_graph.zip ]; then
        pushd $wfst
        wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip
        unzip aishell_graph.zip
H
Hui Zhang 已提交
127
        mv aishell_graph/* $wfst
H
Hui Zhang 已提交
128 129
        popd
    fi
H
Hui Zhang 已提交
130
fi
Y
Yang Zhou 已提交
131

H
Hui Zhang 已提交
132
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
H
Hui Zhang 已提交
133 134 135 136 137
    #  TLG decoder
    utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.wfst.log \
    wfst-decoder-ol \
        --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
        --model_path=$model_dir/avg_1.jit.pdmodel \
H
Hui Zhang 已提交
138
        --param_path=$model_dir/avg_1.jit.pdiparams \
H
Hui Zhang 已提交
139
        --word_symbol_table=$wfst/words.txt \
H
Hui Zhang 已提交
140
        --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
H
Hui Zhang 已提交
141
        --graph_path=$wfst/TLG.fst --max_active=7500 \
H
Hui Zhang 已提交
142 143 144 145
        --acoustic_scale=1.2 \
        --result_wspecifier=ark,t:$data/split${nj}/JOB/result_tlg

    cat $data/split${nj}/*/result_tlg > $exp/${label_file}_tlg
H
Hui Zhang 已提交
146
    utils/compute-wer.py --char=1 --v=1 $text $exp/${label_file}_tlg > $exp/${wer}.tlg
Y
Yang Zhou 已提交
147 148
    echo "wfst-decoder-ol have finished!!!"
    echo "please checkout in ${exp}/${wer}.tlg"
H
Hui Zhang 已提交
149
fi
Y
Yang Zhou 已提交
150 151 152 153 154 155 156 157 158

if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    #  TLG decoder
    utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer.log \
    recognizer_test_main \
        --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
        --cmvn_file=$cmvn \
        --model_path=$model_dir/avg_1.jit.pdmodel \
        --streaming_chunk=30 \
H
Hui Zhang 已提交
159 160
        --param_path=$model_dir/avg_1.jit.pdiparams \
        --word_symbol_table=$wfst/words.txt \
Y
Yang Zhou 已提交
161
        --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
H
Hui Zhang 已提交
162
        --graph_path=$wfst/TLG.fst --max_active=7500 \
Y
Yang Zhou 已提交
163 164 165 166 167
        --acoustic_scale=1.2 \
        --result_wspecifier=ark,t:$data/split${nj}/JOB/result_recognizer

    cat $data/split${nj}/*/result_recognizer > $exp/${label_file}_recognizer
    utils/compute-wer.py --char=1 --v=1 $text $exp/${label_file}_recognizer > $exp/${wer}.recognizer
Y
Yang Zhou 已提交
168 169
    echo "recognizer test have finished!!!"
    echo "please checkout in ${exp}/${wer}.recognizer"
Y
Yang Zhou 已提交
170
fi