未验证 提交 08e0cf2b 编写于 作者: H Hui Zhang 提交者: GitHub

Merge pull request #1737 from Honei/server

[asr][websocket]add streaming asr demo
此差异已折叠。
此差异已折叠。
# This is the parameter configuration file for PaddleSpeech Serving.
#################################################################################
# SERVER SETTING #
#################################################################################
host: 0.0.0.0
port: 8090
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online', 'tts_online']
# protocol = ['websocket', 'http'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
model_type: 'deepspeech2online_aishell'
am_model: # the pdmodel file of am static model [optional]
am_params: # the pdiparams file of am static model [optional]
lang: 'zh'
sample_rate: 16000
cfg_path:
decode_method:
force_yes: True
am_predictor_conf:
device: # set 'gpu:id' or 'cpu'
switch_ir_optim: True
glog_info: False # True -> print glog
summary: True # False -> do not show predictor config
chunk_buffer_conf:
frame_duration_ms: 80
shift_ms: 40
sample_rate: 16000
sample_width: 2
window_n: 7 # frame
shift_n: 4 # frame
window_ms: 20 # ms
shift_ms: 10 # ms
# This is the parameter configuration file for PaddleSpeech Serving.
#################################################################################
# SERVER SETTING #
#################################################################################
host: 0.0.0.0
port: 8090
# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online', 'tts_online']
# protocol = ['websocket', 'http'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']
#################################################################################
# ENGINE CONFIG #
#################################################################################
################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
model_type: 'conformer_online_multicn'
am_model: # the pdmodel file of am static model [optional]
am_params: # the pdiparams file of am static model [optional]
lang: 'zh'
sample_rate: 16000
cfg_path:
decode_method:
force_yes: True
device: # cpu or gpu:id
am_predictor_conf:
device: # set 'gpu:id' or 'cpu'
switch_ir_optim: True
glog_info: False # True -> print glog
summary: True # False -> do not show predictor config
chunk_buffer_conf:
window_n: 7 # frame
shift_n: 4 # frame
window_ms: 25 # ms
shift_ms: 10 # ms
sample_rate: 16000
sample_width: 2
\ No newline at end of file
# start the streaming asr service
paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml
\ No newline at end of file
# download the test wav
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
# read the wav and pass it to service
python3 websocket_client.py --wavfile ./zh.wav
......@@ -16,102 +16,24 @@
import argparse
import asyncio
import codecs
import json
import logging
import os
import numpy as np
import soundfile
import websockets
class ASRAudioHandler:
def __init__(self, url="127.0.0.1", port=8090):
self.url = url
self.port = port
self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr"
def read_wave(self, wavfile_path: str):
samples, sample_rate = soundfile.read(wavfile_path, dtype='int16')
x_len = len(samples)
chunk_size = 85 * 16 #80ms, sample_rate = 16kHz
if x_len % chunk_size!= 0:
padding_len_x = chunk_size - x_len % chunk_size
else:
padding_len_x = 0
padding = np.zeros((padding_len_x), dtype=samples.dtype)
padded_x = np.concatenate([samples, padding], axis=0)
assert (x_len + padding_len_x) % chunk_size == 0
num_chunk = (x_len + padding_len_x) / chunk_size
num_chunk = int(num_chunk)
for i in range(0, num_chunk):
start = i * chunk_size
end = start + chunk_size
x_chunk = padded_x[start:end]
yield x_chunk
async def run(self, wavfile_path: str):
logging.info("send a message to the server")
# self.read_wave()
# send websocket handshake protocal
async with websockets.connect(self.url) as ws:
# server has already received handshake protocal
# client start to send the command
audio_info = json.dumps(
{
"name": "test.wav",
"signal": "start",
"nbest": 5
},
sort_keys=True,
indent=4,
separators=(',', ': '))
await ws.send(audio_info)
msg = await ws.recv()
logging.info("receive msg={}".format(msg))
# send chunk audio data to engine
for chunk_data in self.read_wave(wavfile_path):
await ws.send(chunk_data.tobytes())
msg = await ws.recv()
msg = json.loads(msg)
logging.info("receive msg={}".format(msg))
# finished
audio_info = json.dumps(
{
"name": "test.wav",
"signal": "end",
"nbest": 5
},
sort_keys=True,
indent=4,
separators=(',', ': '))
await ws.send(audio_info)
msg = await ws.recv()
# decode the bytes to str
msg = json.loads(msg)
logging.info("final receive msg={}".format(msg))
result = msg
return result
from paddlespeech.cli.log import logger
from paddlespeech.server.utils.audio_handler import ASRAudioHandler
def main(args):
logging.basicConfig(level=logging.INFO)
logging.info("asr websocket client start")
logger.info("asr websocket client start")
handler = ASRAudioHandler("127.0.0.1", 8090)
loop = asyncio.get_event_loop()
# support to process single audio file
if args.wavfile and os.path.exists(args.wavfile):
logging.info(f"start to process the wavscp: {args.wavfile}")
logger.info(f"start to process the wavscp: {args.wavfile}")
result = loop.run_until_complete(handler.run(args.wavfile))
result = result["asr_results"]
logging.info(f"asr websocket client finished : {result}")
logger.info(f"asr websocket client finished : {result}")
# support to process batch audios from wav.scp
if args.wavscp and os.path.exists(args.wavscp):
......@@ -126,6 +48,7 @@ def main(args):
if __name__ == "__main__":
logger.info("Start to do streaming asr client")
parser = argparse.ArgumentParser()
parser.add_argument(
"--wavfile",
......
......@@ -30,11 +30,14 @@ from ..executor import BaseExecutor
from ..util import cli_client_register
from ..util import stats_wrapper
from paddlespeech.cli.log import logger
from paddlespeech.server.tests.asr.online.websocket_client import ASRAudioHandler
from paddlespeech.server.utils.audio_handler import ASRAudioHandler
from paddlespeech.server.utils.audio_process import wav2pcm
from paddlespeech.server.utils.util import wav2base64
__all__ = ['TTSClientExecutor', 'ASRClientExecutor', 'CLSClientExecutor']
__all__ = [
'TTSClientExecutor', 'ASRClientExecutor', 'ASROnlineClientExecutor',
'CLSClientExecutor'
]
@cli_client_register(
......@@ -236,11 +239,11 @@ class ASRClientExecutor(BaseExecutor):
@cli_client_register(
name='paddlespeech_client.asr_online',
description='visit asr online service')
class ASRClientExecutor(BaseExecutor):
class ASROnlineClientExecutor(BaseExecutor):
def __init__(self):
super(ASRClientExecutor, self).__init__()
super(ASROnlineClientExecutor, self).__init__()
self.parser = argparse.ArgumentParser(
prog='paddlespeech_client.asr', add_help=True)
prog='paddlespeech_client.asr_online', add_help=True)
self.parser.add_argument(
'--server_ip', type=str, default='127.0.0.1', help='server ip')
self.parser.add_argument(
......@@ -305,6 +308,7 @@ class ASRClientExecutor(BaseExecutor):
return res['asr_results']
@cli_client_register(
name='paddlespeech_client.cls', description='visit cls service')
class CLSClientExecutor(BaseExecutor):
......
......@@ -29,7 +29,7 @@ asr_online:
cfg_path:
decode_method:
force_yes: True
device: # cpu or gpu:id
am_predictor_conf:
device: # set 'gpu:id' or 'cpu'
switch_ir_optim: True
......
......@@ -1028,6 +1028,17 @@ class ASREngine(BaseEngine):
self.output = ""
self.executor = ASRServerExecutor()
self.config = config
try:
if self.config.get("device", None):
self.device = self.config.device
else:
self.device = paddle.get_device()
logger.info(f"paddlespeech_server set the device: {self.device}")
paddle.set_device(self.device)
except BaseException:
logger.error(
"Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
)
self.executor._init_from_path(
model_type=self.config.model_type,
......
([简体中文](./README_cn.md)|English)
# Speech Service
## Introduction
This document introduces a client for streaming asr service: microphone
## Usage
### 1. Install
Refer [Install](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
**paddlepaddle 2.2.1** 或以上版本。
It is recommended to use **paddlepaddle 2.2.1** or above.
You can choose one way from meduim and hard to install paddlespeech.
### 2. Prepare config File
The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.
Here are sample files for thisASR client demo that can be downloaded:
```bash
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
```
### 2. Streaming ASR Client Usage
- microphone
```
python microphone_client.py
```
([简体中文](./README_cn.md)|English)
([English](./README.md)|中文)
# 语音服务
## 介绍
本文档介绍如何使用流式ASR的三种不同客户端:网页、麦克风、Python模拟流式服务
本文档介绍如何使用流式ASR的一种不同客户端:麦克风
## 使用方法
......@@ -20,7 +20,7 @@
可以下载此 ASR client的示例音频:
```bash
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
```
### 2. 流式 ASR 客户端使用方法
......@@ -40,10 +40,3 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee
python microphone_client.py
```
- 网页
```
# 进入web目录后参考相关readme.md
```
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册