diff --git a/demos/audio_tagging/README.md b/demos/audio_tagging/README.md
index d954ddfb6339cc7bedd1b4f80cc1636ea8e3fc55..5073393d4ce4c7bb7eff9aaa3ee1362ec83497db 100644
--- a/demos/audio_tagging/README.md
+++ b/demos/audio_tagging/README.md
@@ -3,7 +3,7 @@
 ## Introduction
 Audio tagging is the task of labelling an audio clip with one or more labels or tags, includeing music tagging, acoustic scene classification, audio event classification, etc.
 
-This demo is an implementation to tag an audio file with 527 [AudioSet](https://research.google.com/audioset/) labels. It can be done by a single command line  or a few lines in python using `PaddleSpeech`. 
+This demo is an implementation to tag an audio file with 527 [AudioSet](https://research.google.com/audioset/) labels. It can be done by a single command or a few lines in python using `PaddleSpeech`. 
 
 ## Usage
 ### 1. Installation
@@ -86,7 +86,7 @@ wget https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav https://paddlespeech
 
 ### 4.Pretrained Models
 
-Here is a list of pretrained models released by PaddleSpeech and can be used by command and python api:
+Here is a list of pretrained models released by PaddleSpeech that can be used by command and python api:
 
 | Model | Sample Rate
 | :--- | :---: 
diff --git a/demos/speech_recognition/README.md b/demos/speech_recognition/README.md
index 891c7b9f62bffe5e2bc8744f0447d8b70fde7968..60ee8e4d4faddc55aadcbc66b7e7740d6d226713 100644
--- a/demos/speech_recognition/README.md
+++ b/demos/speech_recognition/README.md
@@ -3,7 +3,7 @@
 ## Introduction
 ASR, or Automatic Speech Recognition, refers to the problem of getting a program to automatically transcribe spoken language (speech-to-text). 
 
-This demo is an implementation to recognize text from a specific audio file. It can be done by a single command line  or a few lines in python using `PaddleSpeech`. 
+This demo is an implementation to recognize text from a specific audio file. It can be done by a single command or a few lines in python using `PaddleSpeech`. 
 
 ## Usage
 ### 1. Installation
@@ -32,7 +32,7 @@ wget https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.
   - `input`(required): Audio file to recognize.
   - `model`: Model type of asr task. Default: `conformer_wenetspeech`.
   - `lang`: Model language. Default: `zh`.
-  - `sr`: Sample rate of the model. Default: `16000`.
+  - `sample_rate`: Sample rate of the model. Default: `16000`.
   - `config`: Config of asr task. Use pretrained model when it is None. Default: `None`.
   - `ckpt_path`: Model checkpoint. Use pretrained model when it is None. Default: `None`.
   - `device`: Choose device to execute model inference. Default: default device of paddlepaddle in current environment.
@@ -68,7 +68,7 @@ wget https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.
 
 ### 4.Pretrained Models
 
-Here is a list of pretrained models released by PaddleSpeech and can be used by command and python api:
+Here is a list of pretrained models released by PaddleSpeech that can be used by command and python api:
 
 | Model | Language | Sample Rate
 | :--- | :---: | :---: |
diff --git a/demos/speech_translation/README.md b/demos/speech_translation/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b2f29168acc0b45adf21a97d019c3a359b5f1672
--- /dev/null
+++ b/demos/speech_translation/README.md
@@ -0,0 +1,77 @@
+# Speech Translation
+
+## Introduction
+Speech translation is the process by which conversational spoken phrases are instantly translated and spoken aloud in a second language.
+
+This demo is an implementation to recognize text from a specific audio file and translate to target language. It can be done by a single command or a few lines in python using `PaddleSpeech`. 
+
+## Usage
+### 1. Installation
+```bash
+pip install paddlespeech
+```
+
+### 2. Prepare Input File
+Input of this demo should be a WAV file(`.wav`).
+
+Here are sample files for this demo that can be downloaded:
+```bash
+wget https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+```
+
+### 3. Usage
+- Command Line(Recommended)
+  ```bash
+  paddlespeech st --input ~/en.wav
+  ```
+  Usage:
+  ```bash
+  paddlespeech st --help
+  ```
+  Arguments:
+  - `input`(required): Audio file to recognize and translate.
+  - `model`: Model type of st task. Default: `fat_st_ted`.
+  - `src_lang`: Source language. Default: `en`.
+  - `tgt_lang`: Target language. Default: `zh`.
+  - `sample_rate`: Sample rate of the model. Default: `16000`.
+  - `config`: Config of st task. Use pretrained model when it is None. Default: `None`.
+  - `ckpt_path`: Model checkpoint. Use pretrained model when it is None. Default: `None`.
+  - `device`: Choose device to execute model inference. Default: default device of paddlepaddle in current environment.
+
+  Output:
+  ```bash
+  [2021-12-09 11:13:03,178] [    INFO] [utils.py] [L225] - ST Result: ['我 在 这栋 建筑 的 古老 门上 敲门 。']
+  ```
+
+- Python API
+  ```python
+  import paddle
+  from paddlespeech.cli import STExecutor
+
+  st_executor = STExecutor()
+  text = st_executor(
+      model='fat_st_ted',
+      src_lang='en',
+      tgt_lang='zh',
+      sample_rate=16000,
+      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
+      ckpt_path=None,
+      audio_file='./en.wav',
+      device=paddle.get_device())
+  print('ST Result: \n{}'.format(text))
+  ```
+
+  Output:
+  ```bash
+  ST Result:
+  ['我 在 这栋 建筑 的 古老 门上 敲门 。'] 
+  ```
+
+
+### 4.Pretrained Models
+
+Here is a list of pretrained models released by PaddleSpeech that can be used by command and python api:
+
+| Model | Source Language | Target Language
+| :--- | :---: | :---: |
+| fat_st_ted| en| zh
diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py
index 1d235201d06080c2268033d78792ef4ccebd5152..1e59f015ab888892315a5777e1adadb4d0631c24 100644
--- a/paddlespeech/cli/asr/infer.py
+++ b/paddlespeech/cli/asr/infer.py
@@ -88,6 +88,7 @@ class ASRExecutor(BaseExecutor):
             '--model',
             type=str,
             default='conformer_wenetspeech',
+            choices=[tag[:tag.index('-')] for tag in pretrained_models.keys()],
             help='Choose model type of asr task.')
         self.parser.add_argument(
             '--lang',
@@ -95,7 +96,7 @@ class ASRExecutor(BaseExecutor):
             default='zh',
             help='Choose model language. zh or en')
         self.parser.add_argument(
-            "--sr",
+            "--sample_rate",
             type=int,
             default=16000,
             choices=[8000, 16000],
@@ -200,8 +201,8 @@ class ASRExecutor(BaseExecutor):
                 raise Exception("wrong type")
         # Enter the path of model root
 
-        model_name = ''.join(
-            model_type.split('_')[:-1])  # model_type: {model_name}_{dataset}
+        model_name = model_type[:model_type.rindex(
+            '_')]  # model_type: {model_name}_{dataset}
         model_class = dynamic_import(model_name, model_alias)
         model_conf = self.config.model
         logger.info(model_conf)
@@ -314,7 +315,7 @@ class ASRExecutor(BaseExecutor):
                 num_processes=cfg.num_proc_bsearch)
             self._outputs["result"] = result_transcripts[0]
 
-        elif "conformer" in model_type or "transformer" in model_type or "wenetspeech" in model_type:
+        elif "conformer" in model_type or "transformer" in model_type:
             result_transcripts = self.model.decode(
                 audio,
                 audio_len,
@@ -419,7 +420,7 @@ class ASRExecutor(BaseExecutor):
 
         model = parser_args.model
         lang = parser_args.lang
-        sample_rate = parser_args.sr
+        sample_rate = parser_args.sample_rate
         config = parser_args.config
         ckpt_path = parser_args.ckpt_path
         audio_file = parser_args.input
diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py
index 0b4982d157b8768160d47d4c4eb0e39533a9884d..b73d1667946e1dd9acc7d9ff7b61fb18e1ea2bbf 100644
--- a/paddlespeech/cli/cls/infer.py
+++ b/paddlespeech/cli/cls/infer.py
@@ -81,6 +81,7 @@ class CLSExecutor(BaseExecutor):
             '--model',
             type=str,
             default='panns_cnn14',
+            choices=[tag[:tag.index('-')] for tag in pretrained_models.keys()],
             help='Choose model type of cls task.')
         self.parser.add_argument(
             '--config',
@@ -250,7 +251,6 @@ class CLSExecutor(BaseExecutor):
             Python API to call an executor.
         """
         audio_file = os.path.abspath(audio_file)
-        # self._check(audio_file, sample_rate)
         paddle.set_device(device)
         self._init_from_path(model, config, ckpt_path, label_file)
         self.preprocess(audio_file)
diff --git a/paddlespeech/cli/st/infer.py b/paddlespeech/cli/st/infer.py
index 534b9e3b9458f75fd89710ee20dd4b06feb38c4d..d7b53a0720be5df704dae6d9db76dece3595e05f 100644
--- a/paddlespeech/cli/st/infer.py
+++ b/paddlespeech/cli/st/infer.py
@@ -23,9 +23,6 @@ import numpy as np
 import paddle
 import soundfile
 from kaldiio import WriteHelper
-from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
-from paddlespeech.s2t.utils.dynamic_import import dynamic_import
-from paddlespeech.s2t.utils.utility import UpdateConfig
 from yacs.config import CfgNode
 
 from ..executor import BaseExecutor
@@ -33,11 +30,14 @@ from ..utils import cli_register
 from ..utils import download_and_decompress
 from ..utils import logger
 from ..utils import MODEL_HOME
+from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
+from paddlespeech.s2t.utils.dynamic_import import dynamic_import
+from paddlespeech.s2t.utils.utility import UpdateConfig
 
 __all__ = ["STExecutor"]
 
 pretrained_models = {
-    "fat_st_ted_en-zh": {
+    "fat_st_ted-en-zh": {
         "url":
         "https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/fat_st_ted-en-zh.tar.gz",
         "md5":
@@ -49,7 +49,7 @@ pretrained_models = {
     }
 }
 
-model_alias = {"fat_st_ted": "paddlespeech.s2t.models.u2_st:U2STModel"}
+model_alias = {"fat_st": "paddlespeech.s2t.models.u2_st:U2STModel"}
 
 kaldi_bins = {
     "url":
@@ -70,9 +70,10 @@ class STExecutor(BaseExecutor):
         self.parser.add_argument(
             "--input", type=str, required=True, help="Audio file to translate.")
         self.parser.add_argument(
-            "--model_type",
+            "--model",
             type=str,
             default="fat_st_ted",
+            choices=[tag[:tag.index('-')] for tag in pretrained_models.keys()],
             help="Choose model type of st task.")
         self.parser.add_argument(
             "--src_lang",
@@ -91,7 +92,7 @@ class STExecutor(BaseExecutor):
             choices=[16000],
             help='Choose the audio sample rate of the model. 8000 or 16000')
         self.parser.add_argument(
-            "--cfg_path",
+            "--config",
             type=str,
             default=None,
             help="Config of st task. Use deault config when it is None.")
@@ -150,7 +151,7 @@ class STExecutor(BaseExecutor):
             return
 
         if cfg_path is None or ckpt_path is None:
-            tag = model_type + "_" + src_lang + "-" + tgt_lang
+            tag = model_type + "-" + src_lang + "-" + tgt_lang
             res_path = self._get_pretrained_path(tag)
             self.cfg_path = os.path.join(res_path,
                                          pretrained_models[tag]["cfg_path"])
@@ -186,7 +187,9 @@ class STExecutor(BaseExecutor):
 
         model_conf = self.config.model
         logger.info(model_conf)
-        model_class = dynamic_import(model_type, model_alias)
+        model_name = model_type[:model_type.rindex(
+            '_')]  # model_type: {model_name}_{dataset}
+        model_class = dynamic_import(model_name, model_alias)
         self.model = model_class.from_config(model_conf)
         self.model.eval()
 
@@ -213,7 +216,7 @@ class STExecutor(BaseExecutor):
         audio_file = os.path.abspath(wav_file)
         logger.info("Preprocess audio_file:" + audio_file)
 
-        if model_type == "fat_st_ted":
+        if "fat_st" in model_type:
             cmvn = self.config.collator.cmvn_path
             utt_name = "_tmp"
 
@@ -321,25 +324,25 @@ class STExecutor(BaseExecutor):
         """
         parser_args = self.parser.parse_args(argv)
 
-        model_type = parser_args.model_type
+        model = parser_args.model
         src_lang = parser_args.src_lang
         tgt_lang = parser_args.tgt_lang
         sample_rate = parser_args.sample_rate
-        cfg_path = parser_args.cfg_path
+        config = parser_args.config
         ckpt_path = parser_args.ckpt_path
         audio_file = parser_args.input
         device = parser_args.device
 
         try:
-            res = self(model_type, src_lang, tgt_lang, sample_rate, cfg_path,
+            res = self(model, src_lang, tgt_lang, sample_rate, config,
                        ckpt_path, audio_file, device)
             logger.info("ST Result: {}".format(res))
             return True
         except Exception as e:
-            print(e)
+            logger.exception(e)
             return False
 
-    def __call__(self, model_type, src_lang, tgt_lang, sample_rate, cfg_path,
+    def __call__(self, model, src_lang, tgt_lang, sample_rate, config,
                  ckpt_path, audio_file, device):
         """
             Python API to call an executor.
@@ -347,10 +350,9 @@ class STExecutor(BaseExecutor):
         audio_file = os.path.abspath(audio_file)
         self._check(audio_file, sample_rate)
         paddle.set_device(device)
-        self._init_from_path(model_type, src_lang, tgt_lang, cfg_path,
-                             ckpt_path)
-        self.preprocess(audio_file, model_type)
-        self.infer(model_type)
-        res = self.postprocess(model_type)
+        self._init_from_path(model, src_lang, tgt_lang, config, ckpt_path)
+        self.preprocess(audio_file, model)
+        self.infer(model)
+        res = self.postprocess(model)
 
         return res