From 913b2300c30b4816429abcd03e790d5a81d0610e Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Wed, 22 Sep 2021 10:04:35 +0000
Subject: [PATCH] nprocs 0 for cpu, other for gpu

---
 deepspeech/exps/deepspeech2/bin/train.py          |  2 +-
 deepspeech/exps/deepspeech2/model.py              |  4 ++--
 deepspeech/exps/u2/bin/train.py                   |  2 +-
 deepspeech/exps/u2/model.py                       |  2 +-
 deepspeech/exps/u2_kaldi/bin/train.py             |  2 +-
 deepspeech/exps/u2_kaldi/model.py                 |  2 +-
 deepspeech/exps/u2_st/bin/train.py                |  2 +-
 deepspeech/exps/u2_st/model.py                    |  2 +-
 deepspeech/training/cli.py                        |  8 +-------
 deepspeech/training/trainer.py                    |  6 +++---
 examples/aishell/s0/local/export.sh               |  6 ------
 examples/aishell/s0/local/test.sh                 |  7 +------
 examples/aishell/s0/local/test_export.sh          |  7 +------
 examples/aishell/s0/local/train.sh                |  6 ------
 examples/aishell/s1/local/align.sh                |  7 +------
 examples/aishell/s1/local/export.sh               |  6 ------
 examples/aishell/s1/local/test.sh                 | 11 ++---------
 examples/aishell/s1/local/train.sh                |  6 ------
 examples/callcenter/s1/local/align.sh             |  8 +-------
 examples/callcenter/s1/local/export.sh            |  6 ------
 examples/callcenter/s1/local/test.sh              | 10 ++--------
 examples/callcenter/s1/local/train.sh             |  5 -----
 examples/librispeech/s0/local/export.sh           |  6 ------
 examples/librispeech/s0/local/test.sh             |  7 +------
 examples/librispeech/s0/local/train.sh            |  7 -------
 examples/librispeech/s1/local/align.sh            |  7 +------
 examples/librispeech/s1/local/export.sh           |  6 ------
 examples/librispeech/s1/local/test.sh             | 11 ++---------
 examples/librispeech/s1/local/train.sh            | 10 +++-------
 examples/librispeech/s2/local/align.sh            |  7 +------
 examples/librispeech/s2/local/export.sh           |  6 ------
 examples/librispeech/s2/local/train.sh            |  7 -------
 examples/ted_en_zh/t0/local/test.sh               |  7 +------
 examples/ted_en_zh/t0/local/train.sh              |  7 -------
 examples/timit/s1/local/align.sh                  |  7 +------
 examples/timit/s1/local/export.sh                 |  6 ------
 examples/timit/s1/local/test.sh                   | 11 ++---------
 examples/timit/s1/local/train.sh                  |  7 -------
 examples/tiny/s0/local/export.sh                  |  6 ------
 examples/tiny/s0/local/test.sh                    |  7 +------
 examples/tiny/s0/local/train.sh                   |  7 -------
 examples/tiny/s1/local/align.sh                   |  7 +------
 examples/tiny/s1/local/export.sh                  |  6 ------
 examples/tiny/s1/local/test.sh                    | 10 ++--------
 examples/tiny/s1/local/train.sh                   |  6 ------
 examples/v18_to_v2x/deepspeech2x/model.py         |  2 +-
 examples/v18_to_v2x/exp_aishell/local/test.sh     |  7 +------
 examples/v18_to_v2x/exp_baidu_en8k/local/test.sh  |  7 +------
 examples/v18_to_v2x/exp_librispeech/local/test.sh |  7 +------
 tests/chains/ds2_params_lite_train_infer.txt      |  4 ++--
 50 files changed, 43 insertions(+), 269 deletions(-)

diff --git a/deepspeech/exps/deepspeech2/bin/train.py b/deepspeech/exps/deepspeech2/bin/train.py
index 69ff043a..6740f288 100644
--- a/deepspeech/exps/deepspeech2/bin/train.py
+++ b/deepspeech/exps/deepspeech2/bin/train.py
@@ -27,7 +27,7 @@ def main_sp(config, args):
 
 
 def main(config, args):
-    if args.device == "gpu" and args.nprocs > 1:
+    if args.nprocs > 0:
         dist.spawn(main_sp, args=(config, args), nprocs=args.nprocs)
     else:
         main_sp(config, args)
diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py
index 7bf02930..8af2b02a 100644
--- a/deepspeech/exps/deepspeech2/model.py
+++ b/deepspeech/exps/deepspeech2/model.py
@@ -403,7 +403,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
     def setup(self):
         """Setup the experiment.
         """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' self.args.nprocs > 0 else 'cpu')
 
         self.setup_output_dir()
         self.setup_checkpointer()
@@ -635,7 +635,7 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester):
     def setup(self):
         """Setup the experiment.
         """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' self.args.nprocs > 0 else 'cpu')
 
         self.setup_output_dir()
 
diff --git a/deepspeech/exps/u2/bin/train.py b/deepspeech/exps/u2/bin/train.py
index b664401a..17fb08a6 100644
--- a/deepspeech/exps/u2/bin/train.py
+++ b/deepspeech/exps/u2/bin/train.py
@@ -32,7 +32,7 @@ def main_sp(config, args):
 
 
 def main(config, args):
-    if args.device == "gpu" and args.nprocs > 1:
+    if args.nprocs > 0:
         dist.spawn(main_sp, args=(config, args), nprocs=args.nprocs)
     else:
         main_sp(config, args)
diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py
index 6bf01900..bc46a104 100644
--- a/deepspeech/exps/u2/model.py
+++ b/deepspeech/exps/u2/model.py
@@ -653,7 +653,7 @@ class U2Tester(U2Trainer):
     def setup(self):
         """Setup the experiment.
         """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' self.args.nprocs > 0 else 'cpu')
 
         self.setup_output_dir()
         self.setup_checkpointer()
diff --git a/deepspeech/exps/u2_kaldi/bin/train.py b/deepspeech/exps/u2_kaldi/bin/train.py
index 1dcd154d..d909727f 100644
--- a/deepspeech/exps/u2_kaldi/bin/train.py
+++ b/deepspeech/exps/u2_kaldi/bin/train.py
@@ -36,7 +36,7 @@ def main_sp(config, args):
 
 
 def main(config, args):
-    if args.device == "gpu" and args.nprocs > 1:
+    if args.nprocs > 0:
         dist.spawn(main_sp, args=(config, args), nprocs=args.nprocs)
     else:
         main_sp(config, args)
diff --git a/deepspeech/exps/u2_kaldi/model.py b/deepspeech/exps/u2_kaldi/model.py
index bc7cd4fd..be89c3d6 100644
--- a/deepspeech/exps/u2_kaldi/model.py
+++ b/deepspeech/exps/u2_kaldi/model.py
@@ -637,7 +637,7 @@ class U2Tester(U2Trainer):
     def setup(self):
         """Setup the experiment.
         """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' self.args.nprocs > 0 else 'cpu')
 
         self.setup_output_dir()
         self.setup_checkpointer()
diff --git a/deepspeech/exps/u2_st/bin/train.py b/deepspeech/exps/u2_st/bin/train.py
index 86a0f000..1e6a746b 100644
--- a/deepspeech/exps/u2_st/bin/train.py
+++ b/deepspeech/exps/u2_st/bin/train.py
@@ -30,7 +30,7 @@ def main_sp(config, args):
 
 
 def main(config, args):
-    if args.device == "gpu" and args.nprocs > 1:
+    if args.nprocs > 0:
         dist.spawn(main_sp, args=(config, args), nprocs=args.nprocs)
     else:
         main_sp(config, args)
diff --git a/deepspeech/exps/u2_st/model.py b/deepspeech/exps/u2_st/model.py
index 4f95bc42..55dadee8 100644
--- a/deepspeech/exps/u2_st/model.py
+++ b/deepspeech/exps/u2_st/model.py
@@ -661,7 +661,7 @@ class U2STTester(U2STTrainer):
     def setup(self):
         """Setup the experiment.
         """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' self.args.nprocs > 0 else 'cpu')
 
         self.setup_output_dir()
         self.setup_checkpointer()
diff --git a/deepspeech/training/cli.py b/deepspeech/training/cli.py
index 07c213db..aa263a06 100644
--- a/deepspeech/training/cli.py
+++ b/deepspeech/training/cli.py
@@ -30,7 +30,7 @@ def default_argument_parser():
 
     The ``--checkpoint_path`` specifies the checkpoint to load from.
 
-    The ``--device`` and ``--nprocs`` specifies how to run the training.
+    The ``--nprocs`` specifies how to run the training.
 
 
     See Also
@@ -51,12 +51,6 @@ def default_argument_parser():
         default=None,
         help="seed to use for paddle, np and random. None or 0 for random, else set seed."
     )
-    train_group.add_argument(
-        "--device",
-        type=str,
-        default='gpu',
-        choices=["cpu", "gpu"],
-        help="device cpu and gpu are supported.")
     train_group.add_argument(
         "--nprocs",
         type=int,
diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py
index 8b1adcd0..b180f489 100644
--- a/deepspeech/training/trainer.py
+++ b/deepspeech/training/trainer.py
@@ -86,7 +86,7 @@ class Trainer():
     >>>     config.merge_from_list(args.opts)
     >>> config.freeze()
     >>>
-    >>> if args.nprocs > 1 and args.device == "gpu":
+    >>> if args.nprocs > 0:
     >>>     dist.spawn(main_sp, args=(config, args), nprocs=args.nprocs)
     >>> else:
     >>>     main_sp(config, args)
@@ -119,7 +119,7 @@ class Trainer():
     def setup(self):
         """Setup the experiment.
         """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' self.args.nprocs > 0 else 'cpu')
         if self.parallel:
             self.init_parallel()
 
@@ -139,7 +139,7 @@ class Trainer():
         """A flag indicating whether the experiment should run with
         multiprocessing.
         """
-        return self.args.device == "gpu" and self.args.nprocs > 1
+        return elf.args.nprocs > 0
 
     def init_parallel(self):
         """Init environment for multiprocess training.
diff --git a/examples/aishell/s0/local/export.sh b/examples/aishell/s0/local/export.sh
index 2e09e5f5..a5e62c28 100755
--- a/examples/aishell/s0/local/export.sh
+++ b/examples/aishell/s0/local/export.sh
@@ -13,13 +13,7 @@ ckpt_path_prefix=$2
 jit_model_export_path=$3
 model_type=$4
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 python3 -u ${BIN_DIR}/export.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
diff --git a/examples/aishell/s0/local/test.sh b/examples/aishell/s0/local/test.sh
index 9fd0bc8d..2ae0740b 100755
--- a/examples/aishell/s0/local/test.sh
+++ b/examples/aishell/s0/local/test.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 model_type=$3
@@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 
 python3 -u ${BIN_DIR}/test.py \
---device ${device} \
---nproc 1 \
+--nproc ${ngpu} \
 --config ${config_path} \
 --result_file ${ckpt_prefix}.rsl \
 --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/aishell/s0/local/test_export.sh b/examples/aishell/s0/local/test_export.sh
index b6d58097..a9a6b122 100755
--- a/examples/aishell/s0/local/test_export.sh
+++ b/examples/aishell/s0/local/test_export.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 jit_model_export_path=$2
 model_type=$3
@@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 
 python3 -u ${BIN_DIR}/test_export.py \
---device ${device} \
---nproc 1 \
+--nproc ${ngpu} \
 --config ${config_path} \
 --result_file ${jit_model_export_path}.rsl \
 --export_path ${jit_model_export_path} \
diff --git a/examples/aishell/s0/local/train.sh b/examples/aishell/s0/local/train.sh
index 668ad0ea..edbf3383 100755
--- a/examples/aishell/s0/local/train.sh
+++ b/examples/aishell/s0/local/train.sh
@@ -12,11 +12,6 @@ config_path=$1
 ckpt_name=$2
 model_type=$3
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 mkdir -p exp
 
 # seed may break model convergence
@@ -26,7 +21,6 @@ if [ ${seed} != 0 ]; then
 fi
 
 python3 -u ${BIN_DIR}/train.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/aishell/s1/local/align.sh b/examples/aishell/s1/local/align.sh
index ad6c84bc..279461aa 100755
--- a/examples/aishell/s1/local/align.sh
+++ b/examples/aishell/s1/local/align.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 
@@ -22,8 +18,7 @@ mkdir -p ${output_dir}
 # align dump in `result_file`
 # .tier, .TextGrid dump in `dir of result_file`
 python3 -u ${BIN_DIR}/alignment.py \
---device ${device} \
---nproc 1 \
+--nproc ${ngpu} \
 --config ${config_path} \
 --result_file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/aishell/s1/local/export.sh b/examples/aishell/s1/local/export.sh
index f99a15ba..b562218e 100755
--- a/examples/aishell/s1/local/export.sh
+++ b/examples/aishell/s1/local/export.sh
@@ -12,13 +12,7 @@ config_path=$1
 ckpt_path_prefix=$2
 jit_model_export_path=$3
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 python3 -u ${BIN_DIR}/export.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
diff --git a/examples/aishell/s1/local/test.sh b/examples/aishell/s1/local/test.sh
index f7e99ad7..c87412c9 100755
--- a/examples/aishell/s1/local/test.sh
+++ b/examples/aishell/s1/local/test.sh
@@ -8,11 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 config_path=$1
 ckpt_prefix=$2
 
@@ -39,8 +34,7 @@ for type in attention ctc_greedy_search; do
     output_dir=${ckpt_prefix}
     mkdir -p ${output_dir}
     python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
-    --nproc 1 \
+    --nproc ${ngpu} \
     --config ${config_path} \
     --result_file ${output_dir}/${type}.rsl \
     --checkpoint_path ${ckpt_prefix} \
@@ -58,8 +52,7 @@ for type in ctc_prefix_beam_search attention_rescoring; do
     output_dir=${ckpt_prefix}
     mkdir -p ${output_dir}
     python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
-    --nproc 1 \
+    --nproc ${ngpu} \
     --config ${config_path} \
     --result_file ${output_dir}/${type}.rsl \
     --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/aishell/s1/local/train.sh b/examples/aishell/s1/local/train.sh
index 5097d4d0..71af3a00 100755
--- a/examples/aishell/s1/local/train.sh
+++ b/examples/aishell/s1/local/train.sh
@@ -12,11 +12,6 @@ source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 if [ ${seed} != 0  ]; then
     export FLAGS_cudnn_deterministic=True
     echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
@@ -34,7 +29,6 @@ mkdir -p exp
 
 python3 -u ${BIN_DIR}/train.py \
 --seed ${seed} \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/callcenter/s1/local/align.sh b/examples/callcenter/s1/local/align.sh
index f2c878c2..b679e2ea 100755
--- a/examples/callcenter/s1/local/align.sh
+++ b/examples/callcenter/s1/local/align.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 
@@ -20,7 +16,6 @@ ckpt_name=$(basename ${ckpt_prefxi})
 mkdir -p exp
 
 
-
 batch_size=1
 output_dir=${ckpt_prefix}
 mkdir -p ${output_dir}
@@ -28,8 +23,7 @@ mkdir -p ${output_dir}
 # align dump in `result_file`
 # .tier, .TextGrid dump in `dir of result_file`
 python3 -u ${BIN_DIR}/alignment.py \
---device ${device} \
---nproc 1 \
+--nproc ${ngpu} \
 --config ${config_path} \
 --result_file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/callcenter/s1/local/export.sh b/examples/callcenter/s1/local/export.sh
index d171899c..d5f912e9 100755
--- a/examples/callcenter/s1/local/export.sh
+++ b/examples/callcenter/s1/local/export.sh
@@ -12,13 +12,7 @@ config_path=$1
 ckpt_path_prefix=$2
 jit_model_export_path=$3
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 python3 -u ${BIN_DIR}/export.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
diff --git a/examples/callcenter/s1/local/test.sh b/examples/callcenter/s1/local/test.sh
index 7a5b1cdb..dca3137d 100755
--- a/examples/callcenter/s1/local/test.sh
+++ b/examples/callcenter/s1/local/test.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 
@@ -32,8 +28,7 @@ for type in attention ctc_greedy_search; do
     output_dir=${ckpt_prefix}
     mkdir -p ${output_dir}
     python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
-    --nproc 1 \
+    --nproc ${ngpu} \
     --config ${config_path} \
     --result_file ${output_dir}/${type}.rsl \
     --checkpoint_path ${ckpt_prefix} \
@@ -51,8 +46,7 @@ for type in ctc_prefix_beam_search attention_rescoring; do
     output_dir=${ckpt_prefix}
     mkdir -p ${output_dir}
     python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
-    --nproc 1 \
+    --nproc ${ngpu} \
     --config ${config_path} \
     --result_file ${output_dir}/${type}.rsl \
     --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/callcenter/s1/local/train.sh b/examples/callcenter/s1/local/train.sh
index d5dc15b0..eb8f8662 100755
--- a/examples/callcenter/s1/local/train.sh
+++ b/examples/callcenter/s1/local/train.sh
@@ -11,10 +11,6 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 echo "using ${device}..."
 
 mkdir -p exp
@@ -26,7 +22,6 @@ if [ ${seed} != 0 ]; then
 fi
 
 python3 -u ${BIN_DIR}/train.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/librispeech/s0/local/export.sh b/examples/librispeech/s0/local/export.sh
index 2e09e5f5..a5e62c28 100755
--- a/examples/librispeech/s0/local/export.sh
+++ b/examples/librispeech/s0/local/export.sh
@@ -13,13 +13,7 @@ ckpt_path_prefix=$2
 jit_model_export_path=$3
 model_type=$4
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 python3 -u ${BIN_DIR}/export.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
diff --git a/examples/librispeech/s0/local/test.sh b/examples/librispeech/s0/local/test.sh
index b5b68c59..4d00f30b 100755
--- a/examples/librispeech/s0/local/test.sh
+++ b/examples/librispeech/s0/local/test.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 model_type=$3
@@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 
 python3 -u ${BIN_DIR}/test.py \
---device ${device} \
---nproc 1 \
+--nproc ${ngpu} \
 --config ${config_path} \
 --result_file ${ckpt_prefix}.rsl \
 --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/librispeech/s0/local/train.sh b/examples/librispeech/s0/local/train.sh
index 6aee372a..519df7fe 100755
--- a/examples/librispeech/s0/local/train.sh
+++ b/examples/librispeech/s0/local/train.sh
@@ -12,12 +12,6 @@ config_path=$1
 ckpt_name=$2
 model_type=$3
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-echo "using ${device}..."
-
 mkdir -p exp
 
 # seed may break model convergence
@@ -27,7 +21,6 @@ if [ ${seed} != 0 ]; then
 fi
 
 python3 -u ${BIN_DIR}/train.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/librispeech/s1/local/align.sh b/examples/librispeech/s1/local/align.sh
index ad6c84bc..279461aa 100755
--- a/examples/librispeech/s1/local/align.sh
+++ b/examples/librispeech/s1/local/align.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 
@@ -22,8 +18,7 @@ mkdir -p ${output_dir}
 # align dump in `result_file`
 # .tier, .TextGrid dump in `dir of result_file`
 python3 -u ${BIN_DIR}/alignment.py \
---device ${device} \
---nproc 1 \
+--nproc ${ngpu} \
 --config ${config_path} \
 --result_file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/librispeech/s1/local/export.sh b/examples/librispeech/s1/local/export.sh
index f99a15ba..b562218e 100755
--- a/examples/librispeech/s1/local/export.sh
+++ b/examples/librispeech/s1/local/export.sh
@@ -12,13 +12,7 @@ config_path=$1
 ckpt_path_prefix=$2
 jit_model_export_path=$3
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 python3 -u ${BIN_DIR}/export.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
diff --git a/examples/librispeech/s1/local/test.sh b/examples/librispeech/s1/local/test.sh
index 3bd3f0bb..f7ec34ab 100755
--- a/examples/librispeech/s1/local/test.sh
+++ b/examples/librispeech/s1/local/test.sh
@@ -8,11 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 config_path=$1
 ckpt_prefix=$2
 
@@ -38,8 +33,7 @@ for type in attention ctc_greedy_search; do
         batch_size=64
     fi
     python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
-    --nproc 1 \
+    --nproc ${ngpu} \
     --config ${config_path} \
     --result_file ${ckpt_prefix}.${type}.rsl \
     --checkpoint_path ${ckpt_prefix} \
@@ -55,8 +49,7 @@ for type in ctc_prefix_beam_search attention_rescoring; do
     echo "decoding ${type}"
     batch_size=1
     python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
-    --nproc 1 \
+    --nproc ${ngpu} \
     --config ${config_path} \
     --result_file ${ckpt_prefix}.${type}.rsl \
     --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/librispeech/s1/local/train.sh b/examples/librispeech/s1/local/train.sh
index f905b766..8f92c646 100755
--- a/examples/librispeech/s1/local/train.sh
+++ b/examples/librispeech/s1/local/train.sh
@@ -11,12 +11,6 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-echo "using ${device}..."
-
 mkdir -p exp
 
 # seed may break model convergence
@@ -25,8 +19,10 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
+# export FLAGS_cudnn_exhaustive_search=true
+# export FLAGS_conv_workspace_size_limit=4000
+
 python3 -u ${BIN_DIR}/train.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/librispeech/s2/local/align.sh b/examples/librispeech/s2/local/align.sh
index b3d8fa5f..b45f4a0f 100755
--- a/examples/librispeech/s2/local/align.sh
+++ b/examples/librispeech/s2/local/align.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 dict_path=$2
 ckpt_prefix=$3
@@ -26,8 +22,7 @@ python3 -u ${BIN_DIR}/test.py \
 --model-name 'u2_kaldi' \
 --run-mode 'align' \
 --dict-path ${dict_path} \
---device ${device} \
---nproc 1 \
+--nproc ${ngpu} \
 --config ${config_path} \
 --result-file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/librispeech/s2/local/export.sh b/examples/librispeech/s2/local/export.sh
index efa70a2b..9c66dc62 100755
--- a/examples/librispeech/s2/local/export.sh
+++ b/examples/librispeech/s2/local/export.sh
@@ -12,15 +12,9 @@ config_path=$1
 ckpt_path_prefix=$2
 jit_model_export_path=$3
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 python3 -u ${BIN_DIR}/test.py \
 --model-name 'u2_kaldi' \
 --run-mode 'export' \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
diff --git a/examples/librispeech/s2/local/train.sh b/examples/librispeech/s2/local/train.sh
index 66754201..33b46c20 100755
--- a/examples/librispeech/s2/local/train.sh
+++ b/examples/librispeech/s2/local/train.sh
@@ -11,12 +11,6 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-echo "using ${device}..."
-
 mkdir -p exp
 
 # seed may break model convergence
@@ -27,7 +21,6 @@ fi
 
 python3 -u ${BIN_DIR}/train.py \
 --model-name u2_kaldi \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/ted_en_zh/t0/local/test.sh b/examples/ted_en_zh/t0/local/test.sh
index 642328e8..34475085 100755
--- a/examples/ted_en_zh/t0/local/test.sh
+++ b/examples/ted_en_zh/t0/local/test.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 
@@ -19,8 +15,7 @@ for type in fullsentence; do
     echo "decoding ${type}"
     batch_size=32
     python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
-    --nproc 1 \
+    --nproc ${ngpu} \
     --config ${config_path} \
     --result_file ${ckpt_prefix}.${type}.rsl \
     --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/ted_en_zh/t0/local/train.sh b/examples/ted_en_zh/t0/local/train.sh
index f905b766..e5fd19dd 100755
--- a/examples/ted_en_zh/t0/local/train.sh
+++ b/examples/ted_en_zh/t0/local/train.sh
@@ -11,12 +11,6 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-echo "using ${device}..."
-
 mkdir -p exp
 
 # seed may break model convergence
@@ -26,7 +20,6 @@ if [ ${seed} != 0 ]; then
 fi
 
 python3 -u ${BIN_DIR}/train.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/timit/s1/local/align.sh b/examples/timit/s1/local/align.sh
index ad6c84bc..279461aa 100755
--- a/examples/timit/s1/local/align.sh
+++ b/examples/timit/s1/local/align.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 
@@ -22,8 +18,7 @@ mkdir -p ${output_dir}
 # align dump in `result_file`
 # .tier, .TextGrid dump in `dir of result_file`
 python3 -u ${BIN_DIR}/alignment.py \
---device ${device} \
---nproc 1 \
+--nproc ${ngpu} \
 --config ${config_path} \
 --result_file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/timit/s1/local/export.sh b/examples/timit/s1/local/export.sh
index f99a15ba..b562218e 100755
--- a/examples/timit/s1/local/export.sh
+++ b/examples/timit/s1/local/export.sh
@@ -12,13 +12,7 @@ config_path=$1
 ckpt_path_prefix=$2
 jit_model_export_path=$3
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 python3 -u ${BIN_DIR}/export.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
diff --git a/examples/timit/s1/local/test.sh b/examples/timit/s1/local/test.sh
index a137924e..868c8fda 100755
--- a/examples/timit/s1/local/test.sh
+++ b/examples/timit/s1/local/test.sh
@@ -8,11 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 config_path=$1
 ckpt_prefix=$2
 
@@ -37,8 +32,7 @@ for type in attention ctc_greedy_search; do
         batch_size=64
     fi
     python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
-    --nproc 1 \
+    --nproc ${ngpu} \
     --config ${config_path} \
     --result_file ${ckpt_prefix}.${type}.rsl \
     --checkpoint_path ${ckpt_prefix} \
@@ -54,8 +48,7 @@ for type in ctc_prefix_beam_search attention_rescoring; do
     echo "decoding ${type}"
     batch_size=1
     python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
-    --nproc 1 \
+    --nproc ${ngpu}  \
     --config ${config_path} \
     --result_file ${ckpt_prefix}.${type}.rsl \
     --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/timit/s1/local/train.sh b/examples/timit/s1/local/train.sh
index 180d8b5a..89a64327 100755
--- a/examples/timit/s1/local/train.sh
+++ b/examples/timit/s1/local/train.sh
@@ -11,12 +11,6 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-echo "using ${device}..."
-
 mkdir -p exp
 
 # seed may break model convergence
@@ -26,7 +20,6 @@ if [ ${seed} != 0  ]; then
 fi
 
 python3 -u ${BIN_DIR}/train.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/tiny/s0/local/export.sh b/examples/tiny/s0/local/export.sh
index 2e09e5f5..a5e62c28 100755
--- a/examples/tiny/s0/local/export.sh
+++ b/examples/tiny/s0/local/export.sh
@@ -13,13 +13,7 @@ ckpt_path_prefix=$2
 jit_model_export_path=$3
 model_type=$4
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 python3 -u ${BIN_DIR}/export.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
diff --git a/examples/tiny/s0/local/test.sh b/examples/tiny/s0/local/test.sh
index b5b68c59..4d00f30b 100755
--- a/examples/tiny/s0/local/test.sh
+++ b/examples/tiny/s0/local/test.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 model_type=$3
@@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 
 python3 -u ${BIN_DIR}/test.py \
---device ${device} \
---nproc 1 \
+--nproc ${ngpu} \
 --config ${config_path} \
 --result_file ${ckpt_prefix}.rsl \
 --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/tiny/s0/local/train.sh b/examples/tiny/s0/local/train.sh
index 9a76c7ad..5b87780a 100755
--- a/examples/tiny/s0/local/train.sh
+++ b/examples/tiny/s0/local/train.sh
@@ -10,17 +10,11 @@ source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 if [ ${seed} != 0  ]; then
     export FLAGS_cudnn_deterministic=True
     echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
 fi
 
-
 if [ $# != 3 ];then
     echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name model_type"
     exit -1
@@ -33,7 +27,6 @@ model_type=$3
 mkdir -p exp
 
 python3 -u ${BIN_DIR}/train.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/tiny/s1/local/align.sh b/examples/tiny/s1/local/align.sh
index ad6c84bc..279461aa 100755
--- a/examples/tiny/s1/local/align.sh
+++ b/examples/tiny/s1/local/align.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 
@@ -22,8 +18,7 @@ mkdir -p ${output_dir}
 # align dump in `result_file`
 # .tier, .TextGrid dump in `dir of result_file`
 python3 -u ${BIN_DIR}/alignment.py \
---device ${device} \
---nproc 1 \
+--nproc ${ngpu} \
 --config ${config_path} \
 --result_file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/tiny/s1/local/export.sh b/examples/tiny/s1/local/export.sh
index f99a15ba..b562218e 100755
--- a/examples/tiny/s1/local/export.sh
+++ b/examples/tiny/s1/local/export.sh
@@ -12,13 +12,7 @@ config_path=$1
 ckpt_path_prefix=$2
 jit_model_export_path=$3
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 python3 -u ${BIN_DIR}/export.py \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
diff --git a/examples/tiny/s1/local/test.sh b/examples/tiny/s1/local/test.sh
index 4d3ed081..19872bb3 100755
--- a/examples/tiny/s1/local/test.sh
+++ b/examples/tiny/s1/local/test.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 
@@ -35,8 +31,7 @@ for type in attention ctc_greedy_search; do
         batch_size=64
     fi
     python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
-    --nproc 1 \
+    --nproc ${ngpu} \
     --config ${config_path} \
     --result_file ${ckpt_prefix}.${type}.rsl \
     --checkpoint_path ${ckpt_prefix} \
@@ -52,8 +47,7 @@ for type in ctc_prefix_beam_search attention_rescoring; do
     echo "decoding ${type}"
     batch_size=1
     python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
-    --nproc 1 \
+    --nproc ${ngpu} \
     --config ${config_path} \
     --result_file ${ckpt_prefix}.${type}.rsl \
     --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/tiny/s1/local/train.sh b/examples/tiny/s1/local/train.sh
index 5097d4d0..71af3a00 100755
--- a/examples/tiny/s1/local/train.sh
+++ b/examples/tiny/s1/local/train.sh
@@ -12,11 +12,6 @@ source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
-
 if [ ${seed} != 0  ]; then
     export FLAGS_cudnn_deterministic=True
     echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
@@ -34,7 +29,6 @@ mkdir -p exp
 
 python3 -u ${BIN_DIR}/train.py \
 --seed ${seed} \
---device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
diff --git a/examples/v18_to_v2x/deepspeech2x/model.py b/examples/v18_to_v2x/deepspeech2x/model.py
index 1fe1e2d6..cbbc502d 100644
--- a/examples/v18_to_v2x/deepspeech2x/model.py
+++ b/examples/v18_to_v2x/deepspeech2x/model.py
@@ -401,7 +401,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
     def setup(self):
         """Setup the experiment.
         """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu')
 
         self.setup_output_dir()
         self.setup_checkpointer()
diff --git a/examples/v18_to_v2x/exp_aishell/local/test.sh b/examples/v18_to_v2x/exp_aishell/local/test.sh
index 9fd0bc8d..2ae0740b 100755
--- a/examples/v18_to_v2x/exp_aishell/local/test.sh
+++ b/examples/v18_to_v2x/exp_aishell/local/test.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 model_type=$3
@@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 
 python3 -u ${BIN_DIR}/test.py \
---device ${device} \
---nproc 1 \
+--nproc ${ngpu} \
 --config ${config_path} \
 --result_file ${ckpt_prefix}.rsl \
 --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/v18_to_v2x/exp_baidu_en8k/local/test.sh b/examples/v18_to_v2x/exp_baidu_en8k/local/test.sh
index b5b68c59..4d00f30b 100755
--- a/examples/v18_to_v2x/exp_baidu_en8k/local/test.sh
+++ b/examples/v18_to_v2x/exp_baidu_en8k/local/test.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 model_type=$3
@@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 
 python3 -u ${BIN_DIR}/test.py \
---device ${device} \
---nproc 1 \
+--nproc ${ngpu} \
 --config ${config_path} \
 --result_file ${ckpt_prefix}.rsl \
 --checkpoint_path ${ckpt_prefix} \
diff --git a/examples/v18_to_v2x/exp_librispeech/local/test.sh b/examples/v18_to_v2x/exp_librispeech/local/test.sh
index b5b68c59..4d00f30b 100755
--- a/examples/v18_to_v2x/exp_librispeech/local/test.sh
+++ b/examples/v18_to_v2x/exp_librispeech/local/test.sh
@@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 
-device=gpu
-if [ ${ngpu} == 0 ];then
-    device=cpu
-fi
 config_path=$1
 ckpt_prefix=$2
 model_type=$3
@@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 
 python3 -u ${BIN_DIR}/test.py \
---device ${device} \
---nproc 1 \
+--nproc ${ngpu} \
 --config ${config_path} \
 --result_file ${ckpt_prefix}.rsl \
 --checkpoint_path ${ckpt_prefix} \
diff --git a/tests/chains/ds2_params_lite_train_infer.txt b/tests/chains/ds2_params_lite_train_infer.txt
index 82a9da9a..47f7db95 100644
--- a/tests/chains/ds2_params_lite_train_infer.txt
+++ b/tests/chains/ds2_params_lite_train_infer.txt
@@ -13,7 +13,7 @@ null:null
 null:null
 ##
 trainer:norm_train
-norm_train: ../../../deepspeech/exps/deepspeech2/bin/train.py --nproc 1 --config conf/deepspeech2.yaml --model_type offline --device gpu
+norm_train: ../../../deepspeech/exps/deepspeech2/bin/train.py --nproc 1 --config conf/deepspeech2.yaml --model_type offline
 pact_train:null
 fpgm_train:null
 distill_train:null
@@ -21,7 +21,7 @@ null:null
 null:null
 ##
 ===========================eval_params===========================
-eval: ../../../deepspeech/exps/deepspeech2/bin/test.py --nproc 1 --config conf/deepspeech2.yaml --result_file tests/9.rsl  --model_type offline --device gpu
+eval: ../../../deepspeech/exps/deepspeech2/bin/test.py --nproc 1 --config conf/deepspeech2.yaml --result_file tests/9.rsl  --model_type offline
 null:null
 ##
 ===========================infer_params===========================
-- 
GitLab