From 9aa868d14db86b7e5ec94a8d503fecfc7e18284b Mon Sep 17 00:00:00 2001
From: huangyuxin <hyxin2014@126.com>
Date: Fri, 10 Jun 2022 03:47:32 +0000
Subject: [PATCH] support distrbuted training

---
 examples/aishell/asr0/local/train.sh     | 13 ++++++++++---
 examples/aishell/asr0/run.sh             |  3 ++-
 examples/aishell/asr1/local/train.sh     | 14 +++++++++++---
 examples/aishell/asr1/run.sh             |  3 ++-
 examples/callcenter/asr1/local/train.sh  | 13 ++++++++++---
 examples/callcenter/asr1/run.sh          |  3 ++-
 examples/librispeech/asr0/local/train.sh | 13 ++++++++++---
 examples/librispeech/asr0/run.sh         |  3 ++-
 examples/librispeech/asr1/local/train.sh | 13 ++++++++++---
 examples/librispeech/asr1/run.sh         |  3 ++-
 examples/librispeech/asr2/local/train.sh | 13 ++++++++++---
 examples/librispeech/asr2/run.sh         |  3 ++-
 examples/mustc/st1/local/train.sh        | 20 ++++++++++++++++++--
 examples/mustc/st1/run.sh                |  5 +++--
 examples/ted_en_zh/st0/local/train.sh    | 13 ++++++++++---
 examples/ted_en_zh/st0/run.sh            |  3 ++-
 examples/ted_en_zh/st1/local/train.sh    | 15 ++++++++++++---
 examples/ted_en_zh/st1/run.sh            |  3 ++-
 examples/tiny/asr0/local/train.sh        | 13 ++++++++++---
 examples/tiny/asr0/run.sh                |  5 +++--
 examples/tiny/asr1/local/train.sh        | 13 ++++++++++---
 examples/tiny/asr1/run.sh                |  5 +++--
 22 files changed, 146 insertions(+), 46 deletions(-)

diff --git a/examples/aishell/asr0/local/train.sh b/examples/aishell/asr0/local/train.sh
index 76b696d9..256b30d2 100755
--- a/examples/aishell/asr0/local/train.sh
+++ b/examples/aishell/asr0/local/train.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-if [ $# != 2 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
     exit -1
 fi
 
@@ -10,6 +10,13 @@ echo "using $ngpu gpus..."
 
 config_path=$1
 ckpt_name=$2
+ips=$3
+
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 
 mkdir -p exp
 
@@ -26,7 +33,7 @@ python3 -u ${BIN_DIR}/train.py \
 --output exp/${ckpt_name} \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/aishell/asr0/run.sh b/examples/aishell/asr0/run.sh
index 3bf9aa12..530c013a 100755
--- a/examples/aishell/asr0/run.sh
+++ b/examples/aishell/asr0/run.sh
@@ -6,6 +6,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=100
 conf_path=conf/deepspeech2.yaml    #conf/deepspeech2.yaml or conf/deepspeech2_online.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=10
 audio_file=data/demo_01_03.wav
@@ -24,7 +25,7 @@ fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
diff --git a/examples/aishell/asr1/local/train.sh b/examples/aishell/asr1/local/train.sh
index 5617f7ef..f514de30 100755
--- a/examples/aishell/asr1/local/train.sh
+++ b/examples/aishell/asr1/local/train.sh
@@ -17,13 +17,21 @@ if [ ${seed} != 0  ]; then
     echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
 fi
 
-if [ $# != 2 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
     exit -1
 fi
 
 config_path=$1
 ckpt_name=$2
+ips=$3
+
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
+echo ${ips_config}
 
 mkdir -p exp
 
@@ -37,7 +45,7 @@ python3 -u ${BIN_DIR}/train.py \
 --benchmark-batch-size ${benchmark_batch_size} \
 --benchmark-max-step ${benchmark_max_step}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --seed ${seed} \
 --config ${config_path} \
diff --git a/examples/aishell/asr1/run.sh b/examples/aishell/asr1/run.sh
index cb781b20..bd4f50e3 100644
--- a/examples/aishell/asr1/run.sh
+++ b/examples/aishell/asr1/run.sh
@@ -6,6 +6,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=50
 conf_path=conf/conformer.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=30
 audio_file=data/demo_01_03.wav
@@ -23,7 +24,7 @@ fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
diff --git a/examples/callcenter/asr1/local/train.sh b/examples/callcenter/asr1/local/train.sh
index 03b4588e..41da89e2 100755
--- a/examples/callcenter/asr1/local/train.sh
+++ b/examples/callcenter/asr1/local/train.sh
@@ -1,7 +1,7 @@
 #! /usr/bin/env bash
 
-if [ $# != 2 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
     exit -1
 fi
 
@@ -10,6 +10,13 @@ echo "using $ngpu gpus..."
 
 config_path=$1
 ckpt_name=$2
+ips=$3
+
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 
 echo "using ${device}..."
 
@@ -28,7 +35,7 @@ python3 -u ${BIN_DIR}/train.py \
 --output exp/${ckpt_name} \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/callcenter/asr1/run.sh b/examples/callcenter/asr1/run.sh
index 0c7ffc1e..7e3b912a 100644
--- a/examples/callcenter/asr1/run.sh
+++ b/examples/callcenter/asr1/run.sh
@@ -6,6 +6,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=50
 conf_path=conf/conformer.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=20
 
@@ -22,7 +23,7 @@ fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
diff --git a/examples/librispeech/asr0/local/train.sh b/examples/librispeech/asr0/local/train.sh
index ad00653b..71659e28 100755
--- a/examples/librispeech/asr0/local/train.sh
+++ b/examples/librispeech/asr0/local/train.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-if [ $# != 2 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
     exit -1
 fi
 
@@ -10,6 +10,13 @@ echo "using $ngpu gpus..."
 
 config_path=$1
 ckpt_name=$2
+ips=$3
+
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 
 mkdir -p exp
 
@@ -26,7 +33,7 @@ python3 -u ${BIN_DIR}/train.py \
 --output exp/${ckpt_name} \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/librispeech/asr0/run.sh b/examples/librispeech/asr0/run.sh
index 6b1ff6c6..38112398 100755
--- a/examples/librispeech/asr0/run.sh
+++ b/examples/librispeech/asr0/run.sh
@@ -6,6 +6,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=100
 conf_path=conf/deepspeech2.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=5
 audio_file=data/demo_002_en.wav
@@ -23,7 +24,7 @@ fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
diff --git a/examples/librispeech/asr1/local/train.sh b/examples/librispeech/asr1/local/train.sh
index 3860d85c..f729ed22 100755
--- a/examples/librispeech/asr1/local/train.sh
+++ b/examples/librispeech/asr1/local/train.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-if [ $# != 2 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
     exit -1
 fi
 
@@ -10,6 +10,13 @@ echo "using $ngpu gpus..."
 
 config_path=$1
 ckpt_name=$2
+ips=$3
+
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 
 mkdir -p exp
 
@@ -29,7 +36,7 @@ python3 -u ${BIN_DIR}/train.py \
 --output exp/${ckpt_name} \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/librispeech/asr1/run.sh b/examples/librispeech/asr1/run.sh
index 116dae12..a14240ee 100755
--- a/examples/librispeech/asr1/run.sh
+++ b/examples/librispeech/asr1/run.sh
@@ -8,6 +8,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=50
 conf_path=conf/transformer.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=30
 audio_file=data/demo_002_en.wav
@@ -25,7 +26,7 @@ fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
diff --git a/examples/librispeech/asr2/local/train.sh b/examples/librispeech/asr2/local/train.sh
index 560424ea..1f414ad4 100755
--- a/examples/librispeech/asr2/local/train.sh
+++ b/examples/librispeech/asr2/local/train.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-if [ $# != 2 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
     exit -1
 fi
 
@@ -10,6 +10,13 @@ echo "using $ngpu gpus..."
 
 config_path=$1
 ckpt_name=$2
+ips=$3
+
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 
 mkdir -p exp
 
@@ -27,7 +34,7 @@ python3 -u ${BIN_DIR}/train.py \
 --output exp/${ckpt_name} \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --model-name u2_kaldi \
 --config ${config_path} \
diff --git a/examples/librispeech/asr2/run.sh b/examples/librispeech/asr2/run.sh
index c9a794e3..d156159f 100755
--- a/examples/librispeech/asr2/run.sh
+++ b/examples/librispeech/asr2/run.sh
@@ -9,6 +9,7 @@ gpus=0,1,2,3,4,5,6,7
 stage=0
 stop_stage=50
 conf_path=conf/transformer.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/decode/decode_base.yaml
 dict_path=data/lang_char/train_960_unigram5000_units.txt
 avg_num=10
@@ -26,7 +27,7 @@ fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
diff --git a/examples/mustc/st1/local/train.sh b/examples/mustc/st1/local/train.sh
index 456c9416..db2a575a 100755
--- a/examples/mustc/st1/local/train.sh
+++ b/examples/mustc/st1/local/train.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-if [ $# != 3 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ckpt_path"
+if [ $# -lt 3 ] && [ $# -gt 4 ];then
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ckpt_path ips(optional)"
     exit -1
 fi
 
@@ -11,6 +11,13 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 ckpt_path=$3
+ips=$3
+
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 
 
 mkdir -p exp
@@ -21,12 +28,21 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
+if [ ${ngpu} == 0 ]; then
 python3 -u ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
 --checkpoint_path "${ckpt_path}" \
 --seed ${seed}
+else
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
+--ngpu ${ngpu} \
+--config ${config_path} \
+--output exp/${ckpt_name} \
+--checkpoint_path "${ckpt_path}" \
+--seed ${seed}
+fi
 
 if [ ${seed} != 0 ]; then
     unset FLAGS_cudnn_deterministic
diff --git a/examples/mustc/st1/run.sh b/examples/mustc/st1/run.sh
index 6ceae3b8..99ee2295 100755
--- a/examples/mustc/st1/run.sh
+++ b/examples/mustc/st1/run.sh
@@ -7,6 +7,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=3
 conf_path=conf/transformer_es.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 must_c_path=
 lang=es
@@ -25,7 +26,7 @@ fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt} "${ckpt_path}" 
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt} "${ckpt_path}" ${ips} 
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
@@ -36,4 +37,4 @@ fi
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${lang} || exit -1
-fi
\ No newline at end of file
+fi
diff --git a/examples/ted_en_zh/st0/local/train.sh b/examples/ted_en_zh/st0/local/train.sh
index ad00653b..71659e28 100755
--- a/examples/ted_en_zh/st0/local/train.sh
+++ b/examples/ted_en_zh/st0/local/train.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-if [ $# != 2 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
     exit -1
 fi
 
@@ -10,6 +10,13 @@ echo "using $ngpu gpus..."
 
 config_path=$1
 ckpt_name=$2
+ips=$3
+
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 
 mkdir -p exp
 
@@ -26,7 +33,7 @@ python3 -u ${BIN_DIR}/train.py \
 --output exp/${ckpt_name} \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/ted_en_zh/st0/run.sh b/examples/ted_en_zh/st0/run.sh
index 1746c025..c5a59f65 100755
--- a/examples/ted_en_zh/st0/run.sh
+++ b/examples/ted_en_zh/st0/run.sh
@@ -6,6 +6,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=50
 conf_path=conf/transformer_mtl_noam.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=5
 data_path=./TED_EnZh # path to unzipped data
@@ -23,7 +24,7 @@ fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
diff --git a/examples/ted_en_zh/st1/local/train.sh b/examples/ted_en_zh/st1/local/train.sh
index 5da64e99..3e9295e5 100755
--- a/examples/ted_en_zh/st1/local/train.sh
+++ b/examples/ted_en_zh/st1/local/train.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-if [ $# != 3 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ckpt_path"
+if [ $# -lt 3 ] && [ $# -gt 4 ];then
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
     exit -1
 fi
 
@@ -11,6 +11,15 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 ckpt_path=$3
+ips=$3
+
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
+
+mkdir -p exp
 
 mkdir -p exp
 
@@ -28,7 +37,7 @@ python3 -u ${BIN_DIR}/train.py \
 --checkpoint_path "${ckpt_path}" \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/ted_en_zh/st1/run.sh b/examples/ted_en_zh/st1/run.sh
index 1808e37b..06a407d4 100755
--- a/examples/ted_en_zh/st1/run.sh
+++ b/examples/ted_en_zh/st1/run.sh
@@ -7,6 +7,7 @@ gpus=0,1,2,3
 stage=1
 stop_stage=4
 conf_path=conf/transformer_mtl_noam.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 ckpt_path= # paddle.98 # (finetune from FAT-ST pretrained model)
 avg_num=5
@@ -29,7 +30,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
         echo "Finetune from Pretrained Model" ${ckpt_path}
         ./local/download_pretrain.sh || exit -1
     fi
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} "${ckpt_path}"
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} "${ckpt_path}" ${ips}
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
diff --git a/examples/tiny/asr0/local/train.sh b/examples/tiny/asr0/local/train.sh
index 2f2cff77..8b67902f 100755
--- a/examples/tiny/asr0/local/train.sh
+++ b/examples/tiny/asr0/local/train.sh
@@ -15,13 +15,20 @@ if [ ${seed} != 0  ]; then
     echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
 fi
 
-if [ $# != 2 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
     exit -1
 fi
 
 config_path=$1
 ckpt_name=$2
+ips=$3
+
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 
 mkdir -p exp
 
@@ -33,7 +40,7 @@ python3 -u ${BIN_DIR}/train.py \
 --profiler-options "${profiler_options}" \
 --seed ${seed}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/tiny/asr0/run.sh b/examples/tiny/asr0/run.sh
index 16f43130..3e84d422 100755
--- a/examples/tiny/asr0/run.sh
+++ b/examples/tiny/asr0/run.sh
@@ -2,10 +2,11 @@
 set -e
 source path.sh
 
-gpus=0
+gpus=4
 stage=0
 stop_stage=100
 conf_path=conf/deepspeech2.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=1
 source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
@@ -21,7 +22,7 @@ fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
diff --git a/examples/tiny/asr1/local/train.sh b/examples/tiny/asr1/local/train.sh
index 5617f7ef..459f2e21 100755
--- a/examples/tiny/asr1/local/train.sh
+++ b/examples/tiny/asr1/local/train.sh
@@ -17,13 +17,20 @@ if [ ${seed} != 0  ]; then
     echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
 fi
 
-if [ $# != 2 ];then
-    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
+if [ $# -lt 2 ] && [ $# -gt 3 ];then
+    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
     exit -1
 fi
 
 config_path=$1
 ckpt_name=$2
+ips=$3
+
+if [ ! $ips ];then
+  ips_config=
+else
+  ips_config="--ips="${ips}
+fi
 
 mkdir -p exp
 
@@ -37,7 +44,7 @@ python3 -u ${BIN_DIR}/train.py \
 --benchmark-batch-size ${benchmark_batch_size} \
 --benchmark-max-step ${benchmark_max_step}
 else
-python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
+python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
 --ngpu ${ngpu} \
 --seed ${seed} \
 --config ${config_path} \
diff --git a/examples/tiny/asr1/run.sh b/examples/tiny/asr1/run.sh
index 1651c034..ca0a7a01 100755
--- a/examples/tiny/asr1/run.sh
+++ b/examples/tiny/asr1/run.sh
@@ -2,10 +2,11 @@
 set -e
 source path.sh
 
-gpus=0
+gpus=4
 stage=0
 stop_stage=50
 conf_path=conf/transformer.yaml
+ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=1
 
@@ -22,7 +23,7 @@ fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus}  ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=${gpus}  ./local/train.sh ${conf_path} ${ckpt} ${ips}
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-- 
GitLab