Merge pull request #1836 from Honei/punc

[asr][server]join streaming asr and punc server

Merge pull request #1836 from Honei/punc
[asr][server]join streaming asr and punc server
d99e99ce · Hui Zhang · GitHub · 435e86b3 · 0e2372ed · d99e99ce
9 changed file
--- a/demos/streaming_asr_server/README.md
+++ b/demos/streaming_asr_server/README.md
--- a/demos/streaming_asr_server/README_cn.md
+++ b/demos/streaming_asr_server/README_cn.md
--- a/demos/streaming_asr_server/conf/punc_application.yaml
+++ b/demos/streaming_asr_server/conf/punc_application.yaml
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+#                             SERVER SETTING                                    #
+#################################################################################
+host: 0.0.0.0
+port: 8190
+
+# The task format in the engin_list is: <speech task>_<engine type>
+# task choices = ['asr_python']
+# protocol = ['http'] (only one can be selected). 
+# http only support offline engine type.
+protocol: 'http'
+engine_list: ['text_python']
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### Text #########################################
+################### text task: punc; engine_type: python #######################
+text_python:
+    task: punc
+    model_type: 'ernie_linear_p3_wudao'
+    lang: 'zh'
+    sample_rate: 16000
+    cfg_path: # [optional]
+    ckpt_path: # [optional]
+    vocab_file: # [optional]
+    device: 'cpu' # set 'gpu:id' or 'cpu'
+
+
+
+
--- a/demos/streaming_asr_server/conf/ws_conformer_application.yaml
+++ b/demos/streaming_asr_server/conf/ws_conformer_application.yaml
@@ -4,7 +4,7 @@
 #                             SERVER SETTING                                    #
 #################################################################################
 host: 0.0.0.0
-port: 8090
+port: 8290

 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['asr_online']
@@ -29,7 +29,7 @@ asr_online:
    cfg_path: 
    decode_method: 
    force_yes: True
-    device: # cpu or gpu:id
+    device: 'cpu' # cpu or gpu:id
    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True

--- a/demos/streaming_asr_server/punc_server.py
+++ b/demos/streaming_asr_server/punc_server.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+
+from paddlespeech.cli.log import logger
+from paddlespeech.server.bin.paddlespeech_server import ServerExecutor
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        prog='paddlespeech_server.start', add_help=True)
+    parser.add_argument(
+        "--config_file",
+        action="store",
+        help="yaml file of the app",
+        default=None,
+        required=True)
+
+    parser.add_argument(
+        "--log_file",
+        action="store",
+        help="log file",
+        default="./log/paddlespeech.log")
+    logger.info("start to parse the args")
+    args = parser.parse_args()
+
+    logger.info("start to launch the punctuation server")
+    punc_server = ServerExecutor()
+    punc_server(config_file=args.config_file, log_file=args.log_file)
--- a/demos/streaming_asr_server/server.sh
+++ b/demos/streaming_asr_server/server.sh
+export CUDA_VISIBLE_DEVICE=0,1,2,3
+
+nohup python3 punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 &
+
+nohup python3 streaming_asr_server.py --config_file conf/ws_conformer_application.yaml > streaming_asr.log 2>&1 &
--- a/demos/streaming_asr_server/streaming_asr_server.py
+++ b/demos/streaming_asr_server/streaming_asr_server.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+
+from paddlespeech.cli.log import logger
+from paddlespeech.server.bin.paddlespeech_server import ServerExecutor
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        prog='paddlespeech_server.start', add_help=True)
+    parser.add_argument(
+        "--config_file",
+        action="store",
+        help="yaml file of the app",
+        default=None,
+        required=True)
+
+    parser.add_argument(
+        "--log_file",
+        action="store",
+        help="log file",
+        default="./log/paddlespeech.log")
+    logger.info("start to parse the args")
+    args = parser.parse_args()
+
+    logger.info("start to launch the streaming asr server")
+    streaming_asr_server = ServerExecutor()
+    streaming_asr_server(config_file=args.config_file, log_file=args.log_file)
--- a/demos/streaming_asr_server/test.sh
+++ b/demos/streaming_asr_server/test.sh
 # download the test wav
 wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav 

-# read the wav and pass it to service
-python3 websocket_client.py --wavfile ./zh.wav
+# read the wav and pass it to only streaming asr service
+python3 websocket_client.py --server_ip 127.0.0.1 --port 8290 --wavfile ./zh.wav
+
+# read the wav and call streaming and punc service
+python3 websocket_client.py --server_ip 127.0.0.1 --port 8290 --punc.server_ip 127.0.0.1 --punc.port 8190 --wavfile ./zh.wav
--- a/paddlespeech/server/bin/paddlespeech_client.py
+++ b/paddlespeech/server/bin/paddlespeech_client.py
@@ -411,6 +411,18 @@ class ASROnlineClientExecutor(BaseExecutor):
            '--lang', type=str, default="zh_cn", help='language')
        self.parser.add_argument(
            '--audio_format', type=str, default="wav", help='audio format')
+        self.parser.add_argument(
+            '--punc.server_ip',
+            type=str,
+            default=None,
+            dest="punc_server_ip",
+            help='Punctuation server ip')
+        self.parser.add_argument(
+            '--punc.port',
+            type=int,
+            default=8190,
+            dest="punc_server_port",
+            help='Punctuation server port')

    def execute(self, argv: List[str]) -> bool:
        args = self.parser.parse_args(argv)
@@ -428,7 +440,9 @@ class ASROnlineClientExecutor(BaseExecutor):
                port=port,
                sample_rate=sample_rate,
                lang=lang,
-                audio_format=audio_format)
+                audio_format=audio_format,
+                punc_server_ip=args.punc_server_ip,
+                punc_server_port=args.punc_server_port)
            time_end = time.time()
            logger.info(res)
            logger.info("Response time %f s." % (time_end - time_start))
@@ -445,12 +459,30 @@ class ASROnlineClientExecutor(BaseExecutor):
                 port: int=8091,
                 sample_rate: int=16000,
                 lang: str="zh_cn",
-                 audio_format: str="wav"):
-        """
-        Python API to call an executor.
+                 audio_format: str="wav",
+                 punc_server_ip: str=None,
+                 punc_server_port: str=None):
+        """Python API to call asr online executor.
+
+        Args:
+            input (str): the audio file to be send to streaming asr service.
+            server_ip (str, optional): streaming asr server ip. Defaults to "127.0.0.1".
+            port (int, optional): streaming asr server port. Defaults to 8091.
+            sample_rate (int, optional): audio sample rate. Defaults to 16000.
+            lang (str, optional): audio language type. Defaults to "zh_cn".
+            audio_format (str, optional): audio format. Defaults to "wav".
+            punc_server_ip (str, optional): punctuation server ip. Defaults to None.
+            punc_server_port (str, optional): punctuation server port. Defaults to None.
+
+        Returns:
+            str: the audio text
        """
        logger.info("asr websocket client start")
-        handler = ASRWsAudioHandler(server_ip, port)
+        handler = ASRWsAudioHandler(
+            server_ip,
+            port,
+            punc_server_ip=punc_server_ip,
+            punc_server_port=punc_server_port)
        loop = asyncio.get_event_loop()
        res = loop.run_until_complete(handler.run(input))
        logger.info("asr websocket client finished")