From 2ec8d608bf1ad5b0be9c36dc4339702271c27a6e Mon Sep 17 00:00:00 2001
From: WilliamZhang06 <xd_0614@163.com>
Date: Thu, 31 Mar 2022 16:06:16 +0800
Subject: [PATCH] fixed comments, test=doc

---
 paddlespeech/server/conf/application.yaml     | 13 ++--
 paddlespeech/server/conf/ws_application.yaml  | 51 ++++++++++++++++
 .../tests/asr/online/microphone_client.py     | 61 +++++++++++--------
 .../tests/asr/online/websocket_client.py      | 56 ++++++++---------
 paddlespeech/server/utils/vad.py              |  7 +--
 paddlespeech/server/ws/asr_socket.py          | 48 +++++++--------
 6 files changed, 142 insertions(+), 94 deletions(-)
 create mode 100644 paddlespeech/server/conf/ws_application.yaml
diff --git a/paddlespeech/server/conf/application.yaml b/paddlespeech/server/conf/application.yaml
index 40de8e3b..849349c2 100644
--- a/paddlespeech/server/conf/application.yaml
+++ b/paddlespeech/server/conf/application.yaml
@@ -3,18 +3,15 @@
 #################################################################################
 #                             SERVER SETTING                                    #
 #################################################################################
-host: 0.0.0.0
+host: 127.0.0.1
 port: 8090
 
 # The task format in the engin_list is: <speech task>_<engine type>
 # task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference']
-# protocol: 'http'
-# engine_list: ['asr_python', 'tts_python', 'cls_python']
-
-
-# websocket, http (only choose one). websocket only support online engine type.
-protocol: 'websocket'
-engine_list: ['asr_online']
+# protocol = ['websocket', 'http'] (only one can be selected). 
+# http only support offline engine type.
+protocol: 'http'
+engine_list: ['asr_python', 'tts_python', 'cls_python']
 
 
 #################################################################################
diff --git a/paddlespeech/server/conf/ws_application.yaml b/paddlespeech/server/conf/ws_application.yaml
new file mode 100644
index 00000000..ef23593e
--- /dev/null
+++ b/paddlespeech/server/conf/ws_application.yaml
@@ -0,0 +1,51 @@
+# This is the parameter configuration file for PaddleSpeech Serving.
+
+#################################################################################
+#                             SERVER SETTING                                    #
+#################################################################################
+host: 0.0.0.0
+port: 8091
+
+# The task format in the engin_list is: <speech task>_<engine type>
+# task choices = ['asr_online', 'tts_online']
+# protocol = ['websocket', 'http'] (only one can be selected).
+# websocket only support online engine type.
+protocol: 'websocket'
+engine_list: ['asr_online']
+
+
+#################################################################################
+#                                ENGINE CONFIG                                  #
+#################################################################################
+
+################################### ASR #########################################
+################### speech task: asr; engine_type: online #######################
+asr_online:
+    model_type: 'deepspeech2online_aishell'
+    am_model: # the pdmodel file of am static model [optional]
+    am_params:  # the pdiparams file of am static model [optional]
+    lang: 'zh'
+    sample_rate: 16000
+    cfg_path: 
+    decode_method: 
+    force_yes: True
+
+    am_predictor_conf:
+        device:  # set 'gpu:id' or 'cpu'
+        switch_ir_optim: True
+        glog_info: False  # True -> print glog
+        summary: True  # False -> do not show predictor config
+
+    chunk_buffer_conf:
+        frame_duration_ms: 80
+        shift_ms: 40
+        sample_rate: 16000
+        sample_width: 2
+
+    vad_conf:
+        aggressiveness: 2
+        sample_rate: 16000
+        frame_duration_ms: 20
+        sample_width: 2
+        padding_ms: 200
+        padding_ratio: 0.9
diff --git a/paddlespeech/server/tests/asr/online/microphone_client.py b/paddlespeech/server/tests/asr/online/microphone_client.py
index 74d457c5..2ceaf6d0 100644
--- a/paddlespeech/server/tests/asr/online/microphone_client.py
+++ b/paddlespeech/server/tests/asr/online/microphone_client.py
@@ -11,25 +11,23 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 record wave from the mic
 """
-
+import asyncio
+import json
+import logging
 import threading
-import pyaudio
 import wave
-import logging
-import asyncio
+from signal import SIGINT
+from signal import SIGTERM
+
+import pyaudio
 import websockets
-import json
-from signal import SIGINT, SIGTERM
 
 
 class ASRAudioHandler(threading.Thread):
-    def __init__(self,
-                 url="127.0.0.1",
-                 port=8090):
+    def __init__(self, url="127.0.0.1", port=8091):
         threading.Thread.__init__(self)
         self.url = url
         self.port = port
@@ -56,12 +54,13 @@ class ASRAudioHandler(threading.Thread):
         self._running = True
         self._frames = []
         p = pyaudio.PyAudio()
-        stream = p.open(format=self.format,
-                               channels=self.channels,
-                               rate=self.rate,
-                               input=True,
-                               frames_per_buffer=self.chunk)
-        while(self._running):
+        stream = p.open(
+            format=self.format,
+            channels=self.channels,
+            rate=self.rate,
+            input=True,
+            frames_per_buffer=self.chunk)
+        while (self._running):
             data = stream.read(self.chunk)
             self._frames.append(data)
             self.data_backup.append(data)
@@ -97,11 +96,15 @@ class ASRAudioHandler(threading.Thread):
 
             async with websockets.connect(self.url) as ws:
                 # 发送开始指令
-                audio_info = json.dumps({
-                                "name": "test.wav",
-                                "signal": "start",
-                                "nbest": 5
-                                }, sort_keys=True, indent=4, separators=(',', ': '))
+                audio_info = json.dumps(
+                    {
+                        "name": "test.wav",
+                        "signal": "start",
+                        "nbest": 5
+                    },
+                    sort_keys=True,
+                    indent=4,
+                    separators=(',', ': '))
                 await ws.send(audio_info)
                 msg = await ws.recv()
                 logging.info("receive msg={}".format(msg))
@@ -117,11 +120,15 @@ class ASRAudioHandler(threading.Thread):
                 except asyncio.CancelledError:
                     # quit
                     # send finished 
-                    audio_info = json.dumps({
-                                    "name": "test.wav",
-                                    "signal": "end",
-                                    "nbest": 5
-                                    }, sort_keys=True, indent=4, separators=(',', ': '))
+                    audio_info = json.dumps(
+                        {
+                            "name": "test.wav",
+                            "signal": "end",
+                            "nbest": 5
+                        },
+                        sort_keys=True,
+                        indent=4,
+                        separators=(',', ': '))
                     await ws.send(audio_info)
                     msg = await ws.recv()
                     logging.info("receive msg={}".format(msg))
@@ -141,7 +148,7 @@ if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
     logging.info("asr websocket client start")
 
-    handler = ASRAudioHandler("127.0.0.1", 8090)
+    handler = ASRAudioHandler("127.0.0.1", 8091)
     loop = asyncio.get_event_loop()
     main_task = asyncio.ensure_future(handler.run())
     for signal in [SIGINT, SIGTERM]:
diff --git a/paddlespeech/server/tests/asr/online/websocket_client.py b/paddlespeech/server/tests/asr/online/websocket_client.py
index d849ffea..58b1a452 100644
--- a/paddlespeech/server/tests/asr/online/websocket_client.py
+++ b/paddlespeech/server/tests/asr/online/websocket_client.py
@@ -11,26 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 #!/usr/bin/python
 # -*- coding: UTF-8 -*-
-
 import argparse
-import logging
-import time
-import os
+import asyncio
 import json
-import wave
+import logging
+
 import numpy as np
-import asyncio
-import websockets
 import soundfile
+import websockets
 
 
 class ASRAudioHandler:
-    def __init__(self,
-                 url="127.0.0.1",
-                 port=8090):
+    def __init__(self, url="127.0.0.1", port=8090):
         self.url = url
         self.port = port
         self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr"
@@ -38,17 +32,15 @@ class ASRAudioHandler:
     def read_wave(self, wavfile_path: str):
         samples, sample_rate = soundfile.read(wavfile_path, dtype='int16')
         x_len = len(samples)
-        chunk_stride = 40 * 16      #40ms, sample_rate = 16kHz
-        chunk_size = 80 * 16        #80ms, sample_rate = 16kHz
+        chunk_stride = 40 * 16  #40ms, sample_rate = 16kHz
+        chunk_size = 80 * 16  #80ms, sample_rate = 16kHz
 
         if (x_len - chunk_size) % chunk_stride != 0:
-            padding_len_x = chunk_stride - (x_len - chunk_size
-                                            ) % chunk_stride
+            padding_len_x = chunk_stride - (x_len - chunk_size) % chunk_stride
         else:
             padding_len_x = 0
 
-        padding = np.zeros(
-            (padding_len_x), dtype=samples.dtype)
+        padding = np.zeros((padding_len_x), dtype=samples.dtype)
         padded_x = np.concatenate([samples, padding], axis=0)
 
         num_chunk = (x_len + padding_len_x - chunk_size) / chunk_stride + 1
@@ -68,11 +60,15 @@ class ASRAudioHandler:
         async with websockets.connect(self.url) as ws:
             # server 端已经接收到 handshake 协议头
             # 发送开始指令
-            audio_info = json.dumps({
-                            "name": "test.wav",
-                            "signal": "start",
-                            "nbest": 5
-                            }, sort_keys=True, indent=4, separators=(',', ': '))
+            audio_info = json.dumps(
+                {
+                    "name": "test.wav",
+                    "signal": "start",
+                    "nbest": 5
+                },
+                sort_keys=True,
+                indent=4,
+                separators=(',', ': '))
             await ws.send(audio_info)
             msg = await ws.recv()
             logging.info("receive msg={}".format(msg))
@@ -84,11 +80,15 @@ class ASRAudioHandler:
                 logging.info("receive msg={}".format(msg))
 
             # finished 
-            audio_info = json.dumps({
-                            "name": "test.wav",
-                            "signal": "end",
-                            "nbest": 5
-                            }, sort_keys=True, indent=4, separators=(',', ': '))
+            audio_info = json.dumps(
+                {
+                    "name": "test.wav",
+                    "signal": "end",
+                    "nbest": 5
+                },
+                sort_keys=True,
+                indent=4,
+                separators=(',', ': '))
             await ws.send(audio_info)
             msg = await ws.recv()
             logging.info("receive msg={}".format(msg))
@@ -97,7 +97,7 @@ class ASRAudioHandler:
 def main(args):
     logging.basicConfig(level=logging.INFO)
     logging.info("asr websocket client start")
-    handler = ASRAudioHandler("127.0.0.1", 8090)
+    handler = ASRAudioHandler("127.0.0.1", 8091)
     loop = asyncio.get_event_loop()
     loop.run_until_complete(handler.run(args.wavfile))
     logging.info("asr websocket client finished")
diff --git a/paddlespeech/server/utils/vad.py b/paddlespeech/server/utils/vad.py
index e9b55717..a2dcf68b 100644
--- a/paddlespeech/server/utils/vad.py
+++ b/paddlespeech/server/utils/vad.py
@@ -12,16 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import collections
-import logging
 
 import webrtcvad
 
 
 class VADAudio():
     def __init__(self,
-                 aggressiveness,
-                 rate,
-                 frame_duration_ms,
+                 aggressiveness=2,
+                 rate=16000,
+                 frame_duration_ms=20,
                  sample_width=2,
                  padding_ms=200,
                  padding_ratio=0.9):
diff --git a/paddlespeech/server/ws/asr_socket.py b/paddlespeech/server/ws/asr_socket.py
index 5cc9472c..ea19816b 100644
--- a/paddlespeech/server/ws/asr_socket.py
+++ b/paddlespeech/server/ws/asr_socket.py
@@ -11,35 +11,39 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import base64
-import traceback
-from typing import Union
-import random
-import numpy as np
 import json
 
+import numpy as np
 from fastapi import APIRouter
 from fastapi import WebSocket
 from fastapi import WebSocketDisconnect
 from starlette.websockets import WebSocketState as WebSocketState
 
-from paddlespeech.server.engine.asr.online.asr_engine import ASREngine
 from paddlespeech.server.engine.engine_pool import get_engine_pool
 from paddlespeech.server.utils.buffer import ChunkBuffer
 from paddlespeech.server.utils.vad import VADAudio
 
-
 router = APIRouter()
 
+
 @router.websocket('/ws/asr')
 async def websocket_endpoint(websocket: WebSocket):
 
     await websocket.accept()
 
+    engine_pool = get_engine_pool()
+    asr_engine = engine_pool['asr']
     # init buffer
-    chunk_buffer = ChunkBuffer(sample_width=2)
+    chunk_buffer_conf = asr_engine.config.chunk_buffer_conf
+    chunk_buffer = ChunkBuffer(
+        sample_rate=chunk_buffer_conf['sample_rate'],
+        sample_width=chunk_buffer_conf['sample_width'])
     # init vad
-    vad = VADAudio(2, 16000, 20)
+    vad_conf = asr_engine.config.vad_conf
+    vad = VADAudio(
+        aggressiveness=vad_conf['aggressiveness'],
+        rate=vad_conf['sample_rate'],
+        frame_duration_ms=vad_conf['frame_duration_ms'])
 
     try:
         while True:
@@ -50,17 +54,11 @@ async def websocket_endpoint(websocket: WebSocket):
             if "text" in message:
                 message = json.loads(message["text"])
                 if 'signal' not in message:
-                    resp = {
-                            "status": "ok",
-                            "message": "no valid json data"
-                            }
+                    resp = {"status": "ok", "message": "no valid json data"}
                     await websocket.send_json(resp)
 
                 if message['signal'] == 'start':
-                    resp = {
-                            "status": "ok",
-                            "signal": "server_ready"
-                            }
+                    resp = {"status": "ok", "signal": "server_ready"}
                     # do something at begining here
                     await websocket.send_json(resp)
                 elif message['signal'] == 'end':
@@ -68,24 +66,19 @@ async def websocket_endpoint(websocket: WebSocket):
                     asr_engine = engine_pool['asr']
                     # reset single  engine for an new connection
                     asr_engine.reset()
-                    resp = {
-                            "status": "ok",
-                            "signal": "finished"
-                            }
+                    resp = {"status": "ok", "signal": "finished"}
                     await websocket.send_json(resp)
                     break
                 else:
-                    resp = {
-                            "status": "ok",
-                            "message": "no valid json data"
-                            }
+                    resp = {"status": "ok", "message": "no valid json data"}
                     await websocket.send_json(resp)
             elif "bytes" in message:
                 message = message["bytes"]
 
                 # vad for input bytes audio
                 vad.add_audio(message)
-                message = b''.join(f for f in vad.vad_collector() if f is not None)
+                message = b''.join(f for f in vad.vad_collector()
+                                   if f is not None)
 
                 engine_pool = get_engine_pool()
                 asr_engine = engine_pool['asr']
@@ -94,7 +87,8 @@ async def websocket_endpoint(websocket: WebSocket):
                 for frame in frames:
                     samples = np.frombuffer(frame.bytes, dtype=np.int16)
                     sample_rate = asr_engine.config.sample_rate
-                    x_chunk, x_chunk_lens = asr_engine.preprocess(samples, sample_rate)
+                    x_chunk, x_chunk_lens = asr_engine.preprocess(samples,
+                                                                  sample_rate)
                     asr_engine.run(x_chunk, x_chunk_lens)
                     asr_results = asr_engine.postprocess()
 
-- 
GitLab