diff --git a/paddlespeech/server/utils/audio_handler.py b/paddlespeech/server/utils/audio_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..3a864a781354f4a525c3150d48ec6059242922e7 --- /dev/null +++ b/paddlespeech/server/utils/audio_handler.py @@ -0,0 +1,86 @@ +import numpy as np +import logging +import argparse +import asyncio +import codecs +import json +import logging +import os + +import numpy as np +import soundfile +import websockets +from paddlespeech.cli.log import logger +class ASRAudioHandler: + def __init__(self, url="127.0.0.1", port=8090): + self.url = url + self.port = port + self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr" + + def read_wave(self, wavfile_path: str): + samples, sample_rate = soundfile.read(wavfile_path, dtype='int16') + x_len = len(samples) + + chunk_size = 85 * 16 #80ms, sample_rate = 16kHz + if x_len % chunk_size!= 0: + padding_len_x = chunk_size - x_len % chunk_size + else: + padding_len_x = 0 + + padding = np.zeros((padding_len_x), dtype=samples.dtype) + padded_x = np.concatenate([samples, padding], axis=0) + + assert (x_len + padding_len_x) % chunk_size == 0 + num_chunk = (x_len + padding_len_x) / chunk_size + num_chunk = int(num_chunk) + for i in range(0, num_chunk): + start = i * chunk_size + end = start + chunk_size + x_chunk = padded_x[start:end] + yield x_chunk + + async def run(self, wavfile_path: str): + logging.info("send a message to the server") + # self.read_wave() + # send websocket handshake protocal + async with websockets.connect(self.url) as ws: + # server has already received handshake protocal + # client start to send the command + audio_info = json.dumps( + { + "name": "test.wav", + "signal": "start", + "nbest": 5 + }, + sort_keys=True, + indent=4, + separators=(',', ': ')) + await ws.send(audio_info) + msg = await ws.recv() + logger.info("receive msg={}".format(msg)) + + # send chunk audio data to engine + for chunk_data in self.read_wave(wavfile_path): + await ws.send(chunk_data.tobytes()) + msg = await ws.recv() + msg = json.loads(msg) + logger.info("receive msg={}".format(msg)) + + # finished + audio_info = json.dumps( + { + "name": "test.wav", + "signal": "end", + "nbest": 5 + }, + sort_keys=True, + indent=4, + separators=(',', ': ')) + await ws.send(audio_info) + msg = await ws.recv() + + # decode the bytes to str + msg = json.loads(msg) + logger.info("final receive msg={}".format(msg)) + result = msg + return result \ No newline at end of file