Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
3ce43016
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3ce43016
编写于
4月 16, 2022
作者:
X
xiongxinlei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add asr websocket server note, test=doc
上级
efc269b7
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
43 addition
and
27 deletion
+43
-27
demos/speech_recognition/run.sh
demos/speech_recognition/run.sh
+10
-0
paddlespeech/server/tests/asr/online/web/app.py
paddlespeech/server/tests/asr/online/web/app.py
+4
-3
paddlespeech/server/tests/asr/online/websocket_client.py
paddlespeech/server/tests/asr/online/websocket_client.py
+6
-11
paddlespeech/server/utils/buffer.py
paddlespeech/server/utils/buffer.py
+22
-8
paddlespeech/server/ws/asr_socket.py
paddlespeech/server/ws/asr_socket.py
+1
-5
未找到文件。
demos/speech_recognition/run.sh
0 → 100644
浏览文件 @
3ce43016
#!/bin/bash
wget
-c
https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
# asr
paddlespeech asr
--input
./zh.wav
# asr + punc
paddlespeech asr
--input
./zh.wav | paddlespeech text
--task
punc
paddlespeech/server/tests/asr/online/web/app.py
浏览文件 @
3ce43016
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2021 Mobvoi Inc. All Rights Reserved.
# Author: zhendong.peng@mobvoi.com (Zhendong Peng)
import
argparse
from
flask
import
Flask
,
render_template
from
flask
import
Flask
from
flask
import
render_template
parser
=
argparse
.
ArgumentParser
(
description
=
'training your network'
)
parser
.
add_argument
(
'--port'
,
default
=
19999
,
type
=
int
,
help
=
'port id'
)
...
...
@@ -14,9 +13,11 @@ args = parser.parse_args()
app
=
Flask
(
__name__
)
@
app
.
route
(
'/'
)
def
index
():
return
render_template
(
'index.html'
)
if
__name__
==
'__main__'
:
app
.
run
(
host
=
'0.0.0.0'
,
port
=
args
.
port
,
debug
=
True
)
paddlespeech/server/tests/asr/online/websocket_client.py
浏览文件 @
3ce43016
...
...
@@ -15,10 +15,11 @@
# -*- coding: UTF-8 -*-
import
argparse
import
asyncio
import
codecs
import
json
import
logging
import
os
import
codecs
import
numpy
as
np
import
soundfile
import
websockets
...
...
@@ -35,17 +36,17 @@ class ASRAudioHandler:
x_len
=
len
(
samples
)
# chunk_stride = 40 * 16 #40ms, sample_rate = 16kHz
chunk_size
=
80
*
16
#80ms, sample_rate = 16kHz
if
x_len
%
chunk_size
!=
0
:
padding_len_x
=
chunk_size
-
x_len
%
chunk_size
padding_len_x
=
chunk_size
-
x_len
%
chunk_size
else
:
padding_len_x
=
0
padding
=
np
.
zeros
((
padding_len_x
),
dtype
=
samples
.
dtype
)
padded_x
=
np
.
concatenate
([
samples
,
padding
],
axis
=
0
)
assert
(
x_len
+
padding_len_x
)
%
chunk_size
==
0
num_chunk
=
(
x_len
+
padding_len_x
)
/
chunk_size
assert
(
x_len
+
padding_len_x
)
%
chunk_size
==
0
num_chunk
=
(
x_len
+
padding_len_x
)
/
chunk_size
num_chunk
=
int
(
num_chunk
)
for
i
in
range
(
0
,
num_chunk
):
...
...
@@ -56,12 +57,7 @@ class ASRAudioHandler:
async
def
run
(
self
,
wavfile_path
:
str
):
logging
.
info
(
"send a message to the server"
)
# 读取音频
# self.read_wave()
# 发送 websocket 的 handshake 协议头
async
with
websockets
.
connect
(
self
.
url
)
as
ws
:
# server 端已经接收到 handshake 协议头
# 发送开始指令
audio_info
=
json
.
dumps
(
{
"name"
:
"test.wav"
,
...
...
@@ -97,7 +93,6 @@ class ASRAudioHandler:
msg
=
await
ws
.
recv
()
msg
=
json
.
loads
(
msg
)
logging
.
info
(
"receive msg={}"
.
format
(
msg
))
return
result
...
...
paddlespeech/server/utils/buffer.py
浏览文件 @
3ce43016
...
...
@@ -24,12 +24,22 @@ class Frame(object):
class
ChunkBuffer
(
object
):
def
__init__
(
self
,
window_n
=
7
,
# frame
shift_n
=
4
,
# frame
window_ms
=
20
,
# ms
shift_ms
=
10
,
# ms
window_n
=
7
,
shift_n
=
4
,
window_ms
=
20
,
shift_ms
=
10
,
sample_rate
=
16000
,
sample_width
=
2
):
"""audio sample data point buffer
Args:
window_n (int, optional): decode window frame length. Defaults to 7 frame.
shift_n (int, optional): decode shift frame length. Defaults to 4 frame.
window_ms (int, optional): frame length, ms. Defaults to 20 ms.
shift_ms (int, optional): shift length, ms. Defaults to 10 ms.
sample_rate (int, optional): audio sample rate. Defaults to 16000.
sample_width (int, optional): sample point bytes. Defaults to 2 bytes.
"""
self
.
window_n
=
window_n
self
.
shift_n
=
shift_n
self
.
window_ms
=
window_ms
...
...
@@ -38,11 +48,14 @@ class ChunkBuffer(object):
self
.
sample_width
=
sample_width
# int16 = 2; float32 = 4
self
.
remained_audio
=
b
''
self
.
window_sec
=
float
((
self
.
window_n
-
1
)
*
self
.
shift_ms
+
self
.
window_ms
)
/
1000.0
self
.
window_sec
=
float
((
self
.
window_n
-
1
)
*
self
.
shift_ms
+
self
.
window_ms
)
/
1000.0
self
.
shift_sec
=
float
(
self
.
shift_n
*
self
.
shift_ms
/
1000.0
)
self
.
window_bytes
=
int
(
self
.
window_sec
*
self
.
sample_rate
*
self
.
sample_width
)
self
.
shift_bytes
=
int
(
self
.
shift_sec
*
self
.
sample_rate
*
self
.
sample_width
)
self
.
window_bytes
=
int
(
self
.
window_sec
*
self
.
sample_rate
*
self
.
sample_width
)
self
.
shift_bytes
=
int
(
self
.
shift_sec
*
self
.
sample_rate
*
self
.
sample_width
)
def
frame_generator
(
self
,
audio
):
"""Generates audio frames from PCM audio data.
...
...
@@ -57,7 +70,8 @@ class ChunkBuffer(object):
timestamp
=
0.0
while
offset
+
self
.
window_bytes
<=
len
(
audio
):
yield
Frame
(
audio
[
offset
:
offset
+
self
.
window_bytes
],
timestamp
,
self
.
window_sec
)
yield
Frame
(
audio
[
offset
:
offset
+
self
.
window_bytes
],
timestamp
,
self
.
window_sec
)
timestamp
+=
self
.
shift_sec
offset
+=
self
.
shift_bytes
...
...
paddlespeech/server/ws/asr_socket.py
浏览文件 @
3ce43016
...
...
@@ -79,11 +79,6 @@ async def websocket_endpoint(websocket: WebSocket):
elif
"bytes"
in
message
:
message
=
message
[
"bytes"
]
# vad for input bytes audio
# vad.add_audio(message)
# message = b''.join(f for f in vad.vad_collector()
# if f is not None)
engine_pool
=
get_engine_pool
()
asr_engine
=
engine_pool
[
'asr'
]
asr_results
=
""
...
...
@@ -95,6 +90,7 @@ async def websocket_endpoint(websocket: WebSocket):
sample_rate
)
asr_engine
.
run
(
x_chunk
,
x_chunk_lens
)
asr_results
=
asr_engine
.
postprocess
()
asr_results
=
asr_engine
.
postprocess
()
resp
=
{
'asr_results'
:
asr_results
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录