Merge branch 'develop' into onnx

1628e19b · Hui Zhang · GitHub · b472a148 · a37a5266 · 1628e19b
27 changed file
--- a/docs/source/released_model.md
+++ b/docs/source/released_model.md
@@ -6,7 +6,7 @@
 ### Speech Recognition Model
 Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER | Hours of speech | Example Link 
 :-------------:| :------------:| :-----: | -----: | :-----: |:-----:| :-----:  | :-----:  | :-----: 
-[Ds2 Online Wenetspeech ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.0a.model.tar.gz) | Wenetspeech Dataset | Char-based | 1.2 GB  | 2 Conv + 5 LSTM layers | 0.152 (test\_net, w/o LM) <br> 0.2417 (test\_meeting, w/o LM) <br> 0.053 (aishell, w/ LM) |-| 10000 h |- 
+[Ds2 Online Wenetspeech ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.2.model.tar.gz) | Wenetspeech Dataset | Char-based | 1.2 GB  | 2 Conv + 5 LSTM layers | 0.152 (test\_net, w/o LM) <br> 0.2417 (test\_meeting, w/o LM) <br> 0.053 (aishell, w/ LM) |-| 10000 h |- 
 [Ds2 Online Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_0.2.1.model.tar.gz) | Aishell Dataset | Char-based | 491 MB  | 2 Conv + 5 LSTM layers | 0.0666 |-| 151 h | [D2 Online Aishell ASR0](../../examples/aishell/asr0) 
 [Ds2 Offline Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz)| Aishell Dataset | Char-based | 1.4 GB | 2 Conv + 5 bidirectional LSTM layers| 0.0554 |-| 151 h | [Ds2 Offline Aishell ASR0](../../examples/aishell/asr0) 
 [Conformer Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz) | WenetSpeech Dataset | Char-based | 457 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.11 (test\_net) 0.1879 (test\_meeting) |-| 10000 h |- 
@@ -14,7 +14,7 @@ Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER |
 [Conformer Offline Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_0.1.2.model.tar.gz) | Aishell Dataset | Char-based | 189 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0464 |-| 151 h | [Conformer Offline Aishell ASR1](../../examples/aishell/asr1) 
 [Transformer Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz) | Aishell Dataset | Char-based | 128 MB | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0523 || 151 h | [Transformer  Aishell ASR1](../../examples/aishell/asr1) 
 [Ds2 Offline Librispeech ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr0/asr0_deepspeech2_offline_librispeech_ckpt_1.0.1.model.tar.gz)| Librispeech Dataset | Char-based | 1.3 GB | 2 Conv + 5 bidirectional LSTM layers| - |0.0467| 960 h | [Ds2 Offline Librispeech ASR0](../../examples/librispeech/asr0) 
-[Conformer Librispeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr1/asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz) | Librispeech Dataset | subword-based | 191 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0337 | 960 h | [Conformer Librispeech ASR1](../../examples/librispeech/asr1) 
+[Conformer Librispeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr1/asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz) | Librispeech Dataset | subword-based | 191 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0338 | 960 h | [Conformer Librispeech ASR1](../../examples/librispeech/asr1) 
 [Transformer Librispeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr1/asr1_transformer_librispeech_ckpt_0.1.1.model.tar.gz) | Librispeech Dataset | subword-based | 131 MB  | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0381 | 960 h | [Transformer Librispeech ASR1](../../examples/librispeech/asr1) 
 [Transformer Librispeech ASR2 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr2/asr2_transformer_librispeech_ckpt_0.1.1.model.tar.gz) | Librispeech Dataset | subword-based | 131 MB  | Encoder:Transformer, Decoder:Transformer, Decoding method: JoinCTC w/ LM |-| 0.0240 | 960 h | [Transformer Librispeech ASR2](../../examples/librispeech/asr2) 

--- a/examples/aishell/asr0/local/train.sh
+++ b/examples/aishell/asr0/local/train.sh
 #!/bin/bash
-if [ $# != 2 ];then
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
 fi
@@ -10,6 +10,13 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
+ips=$3
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 mkdir -p exp
@@ -26,7 +33,7 @@ python3 -u ${BIN_DIR}/train.py \
 --output exp/${ckpt_name} \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \

--- a/examples/aishell/asr0/run.sh
+++ b/examples/aishell/asr0/run.sh
@@ -6,6 +6,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=100
 conf_path=conf/deepspeech2.yaml    #conf/deepspeech2.yaml or conf/deepspeech2_online.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=10
 audio_file=data/demo_01_03.wav
@@ -24,7 +25,7 @@ fi
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then

--- a/examples/aishell/asr1/local/train.sh
+++ b/examples/aishell/asr1/local/train.sh
@@ -17,13 +17,21 @@ if [ ${seed} != 0  ]; then
    echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
 fi
-if [ $# != 2 ];then
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
 fi
 config_path=$1
 ckpt_name=$2
+ips=$3
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
+echo ${ips_config}
 mkdir -p exp
@@ -37,7 +45,7 @@ python3 -u ${BIN_DIR}/train.py \
 --benchmark-batch-size ${benchmark_batch_size} \
 --benchmark-max-step ${benchmark_max_step}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --seed ${seed} \
 --config ${config_path} \

--- a/examples/aishell/asr1/run.sh
+++ b/examples/aishell/asr1/run.sh
@@ -6,6 +6,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=50
 conf_path=conf/conformer.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=30
 audio_file=data/demo_01_03.wav
@@ -23,7 +24,7 @@ fi
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then

--- a/examples/callcenter/asr1/local/train.sh
+++ b/examples/callcenter/asr1/local/train.sh
 #! /usr/bin/env bash
-if [ $# != 2 ];then
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
 fi
@@ -10,6 +10,13 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
+ips=$3
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 echo "using ${device}..."
@@ -28,7 +35,7 @@ python3 -u ${BIN_DIR}/train.py \
 --output exp/${ckpt_name} \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \

--- a/examples/callcenter/asr1/run.sh
+++ b/examples/callcenter/asr1/run.sh
@@ -6,6 +6,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=50
 conf_path=conf/conformer.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=20
@@ -22,7 +23,7 @@ fi
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then

--- a/examples/librispeech/asr0/local/train.sh
+++ b/examples/librispeech/asr0/local/train.sh
 #!/bin/bash
-if [ $# != 2 ];then
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
 fi
@@ -10,6 +10,13 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
+ips=$3
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 mkdir -p exp
@@ -26,7 +33,7 @@ python3 -u ${BIN_DIR}/train.py \
 --output exp/${ckpt_name} \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \

--- a/examples/librispeech/asr0/run.sh
+++ b/examples/librispeech/asr0/run.sh
@@ -6,6 +6,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=100
 conf_path=conf/deepspeech2.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=5
 audio_file=data/demo_002_en.wav
@@ -23,7 +24,7 @@ fi
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then

--- a/examples/librispeech/asr1/local/test.sh
+++ b/examples/librispeech/asr1/local/test.sh
@@ -42,6 +42,11 @@ echo "chunk mode ${chunk_mode}"
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
+    # format the reference test file
+    python3 utils/format_rsl.py \
+        --origin_ref data/manifest.test-clean.raw \
+        --trans_ref data/manifest.test-clean.text
    for type in attention; do
        echo "decoding ${type}"
        if [ ${chunk_mode} == true ];then
@@ -63,54 +68,90 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
            echo "Failed in evaluation!"
            exit 1
        fi
+        python3 utils/format_rsl.py \
+            --origin_hyp ${ckpt_prefix}.${type}.rsl \
+            --trans_hyp ${ckpt_prefix}.${type}.rsl.text
+        python3 utils/compute-wer.py --char=1 --v=1 \
+            data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
+        echo "decoding ${type} done."
+    done
+    for type in ctc_greedy_search; do
+        echo "decoding ${type}"
+        if [ ${chunk_mode} == true ];then
+            # stream decoding only support batchsize=1
+            batch_size=1
+        else
+            batch_size=64
+        fi
+        python3 -u ${BIN_DIR}/test.py \
+            --ngpu ${ngpu} \
+            --config ${config_path} \
+            --decode_cfg ${decode_config_path} \
+            --result_file ${ckpt_prefix}.${type}.rsl \
+            --checkpoint_path ${ckpt_prefix} \
+            --opts decode.decoding_method ${type} \
+            --opts decode.decode_batch_size ${batch_size}
+        if [ $? -ne 0 ]; then
+            echo "Failed in evaluation!"
+            exit 1
+        fi
+        python3 utils/format_rsl.py \
+            --origin_hyp ${ckpt_prefix}.${type}.rsl \
+            --trans_hyp ${ckpt_prefix}.${type}.rsl.text
+        python3 utils/compute-wer.py --char=1 --v=1 \
+            data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
        echo "decoding ${type} done."
    done
-fi
-for type in ctc_greedy_search; do
-    echo "decoding ${type}"
-    if [ ${chunk_mode} == true ];then
+    for type in ctc_prefix_beam_search attention_rescoring; do
-        # stream decoding only support batchsize=1
+        echo "decoding ${type}"
        batch_size=1
-    else
+        python3 -u ${BIN_DIR}/test.py \
-        batch_size=64
+            --ngpu ${ngpu} \
-    fi
+            --config ${config_path} \
-    python3 -u ${BIN_DIR}/test.py \
+            --decode_cfg ${decode_config_path} \
-        --ngpu ${ngpu} \
+            --result_file ${ckpt_prefix}.${type}.rsl \
-        --config ${config_path} \
+            --checkpoint_path ${ckpt_prefix} \
-        --decode_cfg ${decode_config_path} \
+            --opts decode.decoding_method ${type} \
-        --result_file ${ckpt_prefix}.${type}.rsl \
+            --opts decode.decode_batch_size ${batch_size}
-        --checkpoint_path ${ckpt_prefix} \
-        --opts decode.decoding_method ${type} \
+        if [ $? -ne 0 ]; then
-        --opts decode.decode_batch_size ${batch_size}
+            echo "Failed in evaluation!"
+            exit 1
-    if [ $? -ne 0 ]; then
+        fi
-        echo "Failed in evaluation!"
+        python3 utils/format_rsl.py \
-        exit 1
+            --origin_hyp ${ckpt_prefix}.${type}.rsl \
-    fi
+            --trans_hyp ${ckpt_prefix}.${type}.rsl.text
-    echo "decoding ${type} done."
-done
+        python3 utils/compute-wer.py --char=1 --v=1 \
+            data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
+        echo "decoding ${type} done."
+    done
-for type in ctc_prefix_beam_search attention_rescoring; do
+fi
-    echo "decoding ${type}"
-    batch_size=1
+if [ ${stage} -le 101 ] && [ ${stop_stage} -ge 101 ]; then
-    python3 -u ${BIN_DIR}/test.py \
+    python3 utils/format_rsl.py \
-        --ngpu ${ngpu} \
+        --origin_ref data/manifest.test-clean.raw \
-        --config ${config_path} \
+        --trans_ref_sclite data/manifest.test.text-clean.sclite
-        --decode_cfg ${decode_config_path} \
-        --result_file ${ckpt_prefix}.${type}.rsl \
-        --checkpoint_path ${ckpt_prefix} \
+    output_dir=${ckpt_prefix}
-        --opts decode.decoding_method ${type} \
+    for type in attention ctc_greedy_search ctc_prefix_beam_search attention_rescoring; do
-        --opts decode.decode_batch_size ${batch_size}
+        python utils/format_rsl.py \
+            --origin_hyp ${output_dir}/${type}.rsl \
-    if [ $? -ne 0 ]; then
+            --trans_hyp_sclite ${output_dir}/${type}.rsl.text.sclite
-        echo "Failed in evaluation!"
-        exit 1
+        mkdir -p ${output_dir}/${type}_sclite
-    fi
+        sclite -i wsj -r data/manifest.test-clean.text.sclite -h  ${output_dir}/${type}.rsl.text.sclite  -e utf-8 -o all -O ${output_dir}/${type}_sclite -c NOASCII
-    echo "decoding ${type} done."
+    done
-done
+fi
 echo "Finished"

--- a/examples/librispeech/asr1/local/train.sh
+++ b/examples/librispeech/asr1/local/train.sh
 #!/bin/bash
-if [ $# != 2 ];then
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
 fi
@@ -10,6 +10,13 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
+ips=$3
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 mkdir -p exp
@@ -29,7 +36,7 @@ python3 -u ${BIN_DIR}/train.py \
 --output exp/${ckpt_name} \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \

--- a/examples/librispeech/asr1/run.sh
+++ b/examples/librispeech/asr1/run.sh
@@ -8,6 +8,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=50
 conf_path=conf/transformer.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=30
 audio_file=data/demo_002_en.wav
@@ -25,7 +26,7 @@ fi
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then

--- a/examples/librispeech/asr2/local/train.sh
+++ b/examples/librispeech/asr2/local/train.sh
 #!/bin/bash
-if [ $# != 2 ];then
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
 fi
@@ -10,6 +10,13 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
+ips=$3
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 mkdir -p exp
@@ -27,7 +34,7 @@ python3 -u ${BIN_DIR}/train.py \
 --output exp/${ckpt_name} \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --model-name u2_kaldi \
 --config ${config_path} \

--- a/examples/librispeech/asr2/run.sh
+++ b/examples/librispeech/asr2/run.sh
@@ -9,6 +9,7 @@ gpus=0,1,2,3,4,5,6,7
 stage=0
 stop_stage=50
 conf_path=conf/transformer.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/decode/decode_base.yaml
 dict_path=data/lang_char/train_960_unigram5000_units.txt
 avg_num=10
@@ -26,7 +27,7 @@ fi
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then

--- a/examples/mustc/st1/local/train.sh
+++ b/examples/mustc/st1/local/train.sh
 #!/bin/bash
-if [ $# != 3 ];then
+if [ $# -lt 3 ] && [ $# -gt 4 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ckpt_path"
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ckpt_path ips(optional)"
    exit -1
 fi
@@ -11,6 +11,13 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 ckpt_path=$3
+ips=$3
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 mkdir -p exp
@@ -21,12 +28,21 @@ if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
 fi
+if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
 --checkpoint_path "${ckpt_path}" \
 --seed ${seed}
+else
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
+--ngpu ${ngpu} \
+--config ${config_path} \
+--output exp/${ckpt_name} \
+--checkpoint_path "${ckpt_path}" \
+--seed ${seed}
+fi
 if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic

--- a/examples/mustc/st1/run.sh
+++ b/examples/mustc/st1/run.sh
@@ -7,6 +7,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=3
 conf_path=conf/transformer_es.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 must_c_path=
 lang=es
@@ -25,7 +26,7 @@ fi
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt} "${ckpt_path}" 
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt} "${ckpt_path}" ${ips} 
 fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
@@ -36,4 +37,4 @@ fi
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${lang} || exit -1
 fi
\ No newline at end of file
--- a/examples/ted_en_zh/st0/local/train.sh
+++ b/examples/ted_en_zh/st0/local/train.sh
 #!/bin/bash
-if [ $# != 2 ];then
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
 fi
@@ -10,6 +10,13 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
+ips=$3
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 mkdir -p exp
@@ -26,7 +33,7 @@ python3 -u ${BIN_DIR}/train.py \
 --output exp/${ckpt_name} \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \

--- a/examples/ted_en_zh/st0/run.sh
+++ b/examples/ted_en_zh/st0/run.sh
@@ -6,6 +6,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=50
 conf_path=conf/transformer_mtl_noam.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=5
 data_path=./TED_EnZh # path to unzipped data
@@ -23,7 +24,7 @@ fi
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then

--- a/examples/ted_en_zh/st1/local/train.sh
+++ b/examples/ted_en_zh/st1/local/train.sh
 #!/bin/bash
-if [ $# != 3 ];then
+if [ $# -lt 3 ] && [ $# -gt 4 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ckpt_path"
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
 fi
@@ -11,6 +11,15 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 ckpt_path=$3
+ips=$3
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
+mkdir -p exp
 mkdir -p exp
@@ -28,7 +37,7 @@ python3 -u ${BIN_DIR}/train.py \
 --checkpoint_path "${ckpt_path}" \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \

--- a/examples/ted_en_zh/st1/run.sh
+++ b/examples/ted_en_zh/st1/run.sh
@@ -7,6 +7,7 @@ gpus=0,1,2,3
 stage=1
 stop_stage=4
 conf_path=conf/transformer_mtl_noam.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 ckpt_path= # paddle.98 # (finetune from FAT-ST pretrained model)
 avg_num=5
@@ -29,7 +30,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
        echo "Finetune from Pretrained Model" ${ckpt_path}
        ./local/download_pretrain.sh || exit -1
    fi
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} "${ckpt_path}"
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} "${ckpt_path}" ${ips}
 fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then

--- a/examples/tiny/asr0/local/train.sh
+++ b/examples/tiny/asr0/local/train.sh
@@ -15,13 +15,20 @@ if [ ${seed} != 0  ]; then
    echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
 fi
-if [ $# != 2 ];then
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
 fi
 config_path=$1
 ckpt_name=$2
+ips=$3
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 mkdir -p exp
@@ -33,7 +40,7 @@ python3 -u ${BIN_DIR}/train.py \
 --profiler-options "${profiler_options}" \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \

--- a/examples/tiny/asr0/run.sh
+++ b/examples/tiny/asr0/run.sh
@@ -2,10 +2,11 @@
 set -e
 source path.sh
-gpus=0
+gpus=4
 stage=0
 stop_stage=100
 conf_path=conf/deepspeech2.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=1
 source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
@@ -21,7 +22,7 @@ fi
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then

--- a/examples/tiny/asr1/local/train.sh
+++ b/examples/tiny/asr1/local/train.sh
@@ -17,13 +17,20 @@ if [ ${seed} != 0  ]; then
    echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
 fi
-if [ $# != 2 ];then
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
 fi
 config_path=$1
 ckpt_name=$2
+ips=$3
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 mkdir -p exp
@@ -37,7 +44,7 @@ python3 -u ${BIN_DIR}/train.py \
 --benchmark-batch-size ${benchmark_batch_size} \
 --benchmark-max-step ${benchmark_max_step}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --seed ${seed} \
 --config ${config_path} \

--- a/examples/tiny/asr1/run.sh
+++ b/examples/tiny/asr1/run.sh
@@ -2,10 +2,11 @@
 set -e
 source path.sh
-gpus=0
+gpus=4
 stage=0
 stop_stage=50
 conf_path=conf/transformer.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=1
@@ -22,7 +23,7 @@ fi
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus}  ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus}  ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then

--- a/paddlespeech/cli/asr/infer.py
+++ b/paddlespeech/cli/asr/infer.py
@@ -136,7 +136,6 @@ class ASRExecutor(BaseExecutor):
        logger.info("start to init the model")
        # default max_len: unit:second
        self.max_len = 50
-        assert num_decoding_left_chunks == -1 or num_decoding_left_chunks >= 0
        if hasattr(self, 'model'):
            logger.info('Model had been initialized.')
            return
@@ -187,7 +186,9 @@ class ASRExecutor(BaseExecutor):
            elif "conformer" in model_type or "transformer" in model_type:
                self.config.decode.decoding_method = decode_method
-                self.config.num_decoding_left_chunks = num_decoding_left_chunks
+                if num_decoding_left_chunks:
+                    assert num_decoding_left_chunks == -1 or num_decoding_left_chunks >= 0, f"num_decoding_left_chunks should be -1 or >=0"
+                    self.config.num_decoding_left_chunks = num_decoding_left_chunks
            else:
                raise Exception("wrong type")

--- a/paddlespeech/resource/pretrained_models.py
+++ b/paddlespeech/resource/pretrained_models.py
@@ -136,9 +136,9 @@ asr_dynamic_pretrained_models = {
    "deepspeech2online_wenetspeech-zh-16k": {
        '1.0': {
            'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.1.model.tar.gz',
+            'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.2.model.tar.gz',
            'md5':
-            'd1be86a3e786042ab64f05161b5fae62',
+            'b0c77e7f8881e0a27b82127d1abb8d5f',
            'cfg_path':
            'model.yaml',
            'ckpt_path':

--- a/paddlespeech/server/engine/asr/online/asr_engine.py
+++ b/paddlespeech/server/engine/asr/online/asr_engine.py
@@ -793,8 +793,9 @@ class ASRServerExecutor(ASRExecutor):
                    self.config.decode.decoding_method = decode_method
                # update num_decoding_left_chunks
                if num_decoding_left_chunks:
+                    assert num_decoding_left_chunks == -1 or num_decoding_left_chunks >= 0, f"num_decoding_left_chunks should be -1 or >=0"
                    self.config.decode.num_decoding_left_chunks = num_decoding_left_chunks
-                    assert self.config.decode.num_decoding_left_chunks == -1 or self.config.decode.num_decoding_left_chunks >= 0, "num_decoding_left_chunks should be -1 or >=0"
                # we only support ctc_prefix_beam_search and attention_rescoring dedoding method
                # Generally we set the decoding_method to attention_rescoring
                if self.config.decode.decoding_method not in [