Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
c7b987c5
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c7b987c5
编写于
4月 15, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
format
上级
cad09b49
变更
36
显示空白变更内容
内联
并排
Showing
36 changed file
with
46674 addition
and
34995 deletion
+46674
-34995
demos/audio_searching/src/audio_search.py
demos/audio_searching/src/audio_search.py
+1
-2
demos/audio_searching/src/encode.py
demos/audio_searching/src/encode.py
+1
-1
demos/audio_searching/src/mysql_helpers.py
demos/audio_searching/src/mysql_helpers.py
+0
-1
demos/audio_searching/src/operations/count.py
demos/audio_searching/src/operations/count.py
+0
-1
demos/audio_searching/src/operations/drop.py
demos/audio_searching/src/operations/drop.py
+0
-1
demos/audio_searching/src/operations/load.py
demos/audio_searching/src/operations/load.py
+2
-4
demos/audio_searching/src/operations/search.py
demos/audio_searching/src/operations/search.py
+0
-1
demos/audio_searching/src/vpr_search.py
demos/audio_searching/src/vpr_search.py
+1
-2
paddleaudio/setup.py
paddleaudio/setup.py
+1
-1
paddlespeech/server/bin/paddlespeech_client.py
paddlespeech/server/bin/paddlespeech_client.py
+5
-4
paddlespeech/server/tests/asr/online/web/app.py
paddlespeech/server/tests/asr/online/web/app.py
+4
-3
paddlespeech/server/tests/asr/online/web/readme.md
paddlespeech/server/tests/asr/online/web/readme.md
+1
-1
paddlespeech/vector/io/dataset.py
paddlespeech/vector/io/dataset.py
+1
-0
paddlespeech/vector/io/dataset_from_json.py
paddlespeech/vector/io/dataset_from_json.py
+1
-1
speechx/examples/ds2_ol/README.md
speechx/examples/ds2_ol/README.md
+1
-1
speechx/examples/ds2_ol/decoder/README.md
speechx/examples/ds2_ol/decoder/README.md
+1
-1
speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
...mples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
+5
-4
speechx/examples/ds2_ol/feat/README.md
speechx/examples/ds2_ol/feat/README.md
+0
-1
speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc
speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc
+7
-5
speechx/examples/ds2_ol/nnet/README.md
speechx/examples/ds2_ol/nnet/README.md
+1
-1
speechx/examples/ds2_ol/nnet/ds2-model-ol-test.cc
speechx/examples/ds2_ol/nnet/ds2-model-ol-test.cc
+4
-4
speechx/examples/ngram/README.md
speechx/examples/ngram/README.md
+0
-2
speechx/speechx/decoder/ctc_beam_search_decoder.cc
speechx/speechx/decoder/ctc_beam_search_decoder.cc
+1
-2
speechx/speechx/frontend/audio/linear_spectrogram.h
speechx/speechx/frontend/audio/linear_spectrogram.h
+2
-2
speechx/speechx/nnet/decodable.cc
speechx/speechx/nnet/decodable.cc
+7
-7
speechx/speechx/nnet/paddle_nnet.h
speechx/speechx/nnet/paddle_nnet.h
+2
-1
speechx/speechx/utils/file_utils.cc
speechx/speechx/utils/file_utils.cc
+2
-3
speechx/speechx/utils/file_utils.h
speechx/speechx/utils/file_utils.h
+0
-1
speechx/speechx/utils/simdjson.cpp
speechx/speechx/utils/simdjson.cpp
+13057
-9749
speechx/speechx/utils/simdjson.h
speechx/speechx/utils/simdjson.h
+33079
-24806
speechx/tools/fstbin/fstaddselfloops.cc
speechx/tools/fstbin/fstaddselfloops.cc
+76
-60
speechx/tools/fstbin/fstdeterminizestar.cc
speechx/tools/fstbin/fstdeterminizestar.cc
+62
-46
speechx/tools/fstbin/fstisstochastic.cc
speechx/tools/fstbin/fstisstochastic.cc
+53
-38
speechx/tools/fstbin/fstminimizeencoded.cc
speechx/tools/fstbin/fstminimizeencoded.cc
+44
-29
speechx/tools/fstbin/fsttablecompose.cc
speechx/tools/fstbin/fsttablecompose.cc
+113
-92
speechx/tools/lmbin/arpa2fst.cc
speechx/tools/lmbin/arpa2fst.cc
+139
-117
未找到文件。
demos/audio_searching/src/audio_search.py
浏览文件 @
c7b987c5
...
@@ -20,6 +20,7 @@ from diskcache import Cache
...
@@ -20,6 +20,7 @@ from diskcache import Cache
from
fastapi
import
FastAPI
from
fastapi
import
FastAPI
from
fastapi
import
File
from
fastapi
import
File
from
fastapi
import
UploadFile
from
fastapi
import
UploadFile
from
logs
import
LOGGER
from
milvus_helpers
import
MilvusHelper
from
milvus_helpers
import
MilvusHelper
from
mysql_helpers
import
MySQLHelper
from
mysql_helpers
import
MySQLHelper
from
operations.count
import
do_count
from
operations.count
import
do_count
...
@@ -31,8 +32,6 @@ from starlette.middleware.cors import CORSMiddleware
...
@@ -31,8 +32,6 @@ from starlette.middleware.cors import CORSMiddleware
from
starlette.requests
import
Request
from
starlette.requests
import
Request
from
starlette.responses
import
FileResponse
from
starlette.responses
import
FileResponse
from
logs
import
LOGGER
app
=
FastAPI
()
app
=
FastAPI
()
app
.
add_middleware
(
app
.
add_middleware
(
CORSMiddleware
,
CORSMiddleware
,
...
...
demos/audio_searching/src/encode.py
浏览文件 @
c7b987c5
...
@@ -12,8 +12,8 @@
...
@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
numpy
as
np
import
numpy
as
np
from
logs
import
LOGGER
from
logs
import
LOGGER
from
paddlespeech.cli
import
VectorExecutor
from
paddlespeech.cli
import
VectorExecutor
vector_executor
=
VectorExecutor
()
vector_executor
=
VectorExecutor
()
...
...
demos/audio_searching/src/mysql_helpers.py
浏览文件 @
c7b987c5
...
@@ -20,7 +20,6 @@ from config import MYSQL_HOST
...
@@ -20,7 +20,6 @@ from config import MYSQL_HOST
from
config
import
MYSQL_PORT
from
config
import
MYSQL_PORT
from
config
import
MYSQL_PWD
from
config
import
MYSQL_PWD
from
config
import
MYSQL_USER
from
config
import
MYSQL_USER
from
logs
import
LOGGER
from
logs
import
LOGGER
...
...
demos/audio_searching/src/operations/count.py
浏览文件 @
c7b987c5
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
import
sys
import
sys
from
config
import
DEFAULT_TABLE
from
config
import
DEFAULT_TABLE
from
logs
import
LOGGER
from
logs
import
LOGGER
...
...
demos/audio_searching/src/operations/drop.py
浏览文件 @
c7b987c5
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
import
sys
import
sys
from
config
import
DEFAULT_TABLE
from
config
import
DEFAULT_TABLE
from
logs
import
LOGGER
from
logs
import
LOGGER
...
...
demos/audio_searching/src/operations/load.py
浏览文件 @
c7b987c5
...
@@ -17,7 +17,6 @@ import sys
...
@@ -17,7 +17,6 @@ import sys
from
config
import
DEFAULT_TABLE
from
config
import
DEFAULT_TABLE
from
diskcache
import
Cache
from
diskcache
import
Cache
from
encode
import
get_audio_embedding
from
encode
import
get_audio_embedding
from
logs
import
LOGGER
from
logs
import
LOGGER
...
@@ -27,8 +26,7 @@ def get_audios(path):
...
@@ -27,8 +26,7 @@ def get_audios(path):
"""
"""
supported_formats
=
[
".wav"
,
".mp3"
,
".ogg"
,
".flac"
,
".m4a"
]
supported_formats
=
[
".wav"
,
".mp3"
,
".ogg"
,
".flac"
,
".m4a"
]
return
[
return
[
item
item
for
sublist
in
[[
os
.
path
.
join
(
dir
,
file
)
for
file
in
files
]
for
sublist
in
[[
os
.
path
.
join
(
dir
,
file
)
for
file
in
files
]
for
dir
,
_
,
files
in
list
(
os
.
walk
(
path
))]
for
dir
,
_
,
files
in
list
(
os
.
walk
(
path
))]
for
item
in
sublist
if
os
.
path
.
splitext
(
item
)[
1
]
in
supported_formats
for
item
in
sublist
if
os
.
path
.
splitext
(
item
)[
1
]
in
supported_formats
]
]
...
...
demos/audio_searching/src/operations/search.py
浏览文件 @
c7b987c5
...
@@ -17,7 +17,6 @@ import numpy
...
@@ -17,7 +17,6 @@ import numpy
from
config
import
DEFAULT_TABLE
from
config
import
DEFAULT_TABLE
from
config
import
TOP_K
from
config
import
TOP_K
from
encode
import
get_audio_embedding
from
encode
import
get_audio_embedding
from
logs
import
LOGGER
from
logs
import
LOGGER
...
...
demos/audio_searching/src/vpr_search.py
浏览文件 @
c7b987c5
...
@@ -18,6 +18,7 @@ from config import UPLOAD_PATH
...
@@ -18,6 +18,7 @@ from config import UPLOAD_PATH
from
fastapi
import
FastAPI
from
fastapi
import
FastAPI
from
fastapi
import
File
from
fastapi
import
File
from
fastapi
import
UploadFile
from
fastapi
import
UploadFile
from
logs
import
LOGGER
from
mysql_helpers
import
MySQLHelper
from
mysql_helpers
import
MySQLHelper
from
operations.count
import
do_count_vpr
from
operations.count
import
do_count_vpr
from
operations.count
import
do_get
from
operations.count
import
do_get
...
@@ -30,8 +31,6 @@ from starlette.middleware.cors import CORSMiddleware
...
@@ -30,8 +31,6 @@ from starlette.middleware.cors import CORSMiddleware
from
starlette.requests
import
Request
from
starlette.requests
import
Request
from
starlette.responses
import
FileResponse
from
starlette.responses
import
FileResponse
from
logs
import
LOGGER
app
=
FastAPI
()
app
=
FastAPI
()
app
.
add_middleware
(
app
.
add_middleware
(
CORSMiddleware
,
CORSMiddleware
,
...
...
paddleaudio/setup.py
浏览文件 @
c7b987c5
paddlespeech/server/bin/paddlespeech_client.py
浏览文件 @
c7b987c5
...
@@ -12,15 +12,15 @@
...
@@ -12,15 +12,15 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
argparse
import
argparse
import
asyncio
import
base64
import
base64
import
io
import
io
import
json
import
json
import
logging
import
os
import
os
import
random
import
random
import
time
import
time
from
typing
import
List
from
typing
import
List
import
logging
import
asyncio
import
numpy
as
np
import
numpy
as
np
import
requests
import
requests
...
@@ -30,9 +30,9 @@ from ..executor import BaseExecutor
...
@@ -30,9 +30,9 @@ from ..executor import BaseExecutor
from
..util
import
cli_client_register
from
..util
import
cli_client_register
from
..util
import
stats_wrapper
from
..util
import
stats_wrapper
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.log
import
logger
from
paddlespeech.server.tests.asr.online.websocket_client
import
ASRAudioHandler
from
paddlespeech.server.utils.audio_process
import
wav2pcm
from
paddlespeech.server.utils.audio_process
import
wav2pcm
from
paddlespeech.server.utils.util
import
wav2base64
from
paddlespeech.server.utils.util
import
wav2base64
from
paddlespeech.server.tests.asr.online.websocket_client
import
ASRAudioHandler
__all__
=
[
'TTSClientExecutor'
,
'ASRClientExecutor'
,
'CLSClientExecutor'
]
__all__
=
[
'TTSClientExecutor'
,
'ASRClientExecutor'
,
'CLSClientExecutor'
]
...
@@ -234,7 +234,8 @@ class ASRClientExecutor(BaseExecutor):
...
@@ -234,7 +234,8 @@ class ASRClientExecutor(BaseExecutor):
@
cli_client_register
(
@
cli_client_register
(
name
=
'paddlespeech_client.asr_online'
,
description
=
'visit asr online service'
)
name
=
'paddlespeech_client.asr_online'
,
description
=
'visit asr online service'
)
class
ASRClientExecutor
(
BaseExecutor
):
class
ASRClientExecutor
(
BaseExecutor
):
def
__init__
(
self
):
def
__init__
(
self
):
super
(
ASRClientExecutor
,
self
).
__init__
()
super
(
ASRClientExecutor
,
self
).
__init__
()
...
...
paddlespeech/server/tests/asr/online/web/app.py
浏览文件 @
c7b987c5
#!/usr/bin/env python3
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
# Copyright 2021 Mobvoi Inc. All Rights Reserved.
# Copyright 2021 Mobvoi Inc. All Rights Reserved.
# Author: zhendong.peng@mobvoi.com (Zhendong Peng)
# Author: zhendong.peng@mobvoi.com (Zhendong Peng)
import
argparse
import
argparse
from
flask
import
Flask
,
render_template
from
flask
import
Flask
from
flask
import
render_template
parser
=
argparse
.
ArgumentParser
(
description
=
'training your network'
)
parser
=
argparse
.
ArgumentParser
(
description
=
'training your network'
)
parser
.
add_argument
(
'--port'
,
default
=
19999
,
type
=
int
,
help
=
'port id'
)
parser
.
add_argument
(
'--port'
,
default
=
19999
,
type
=
int
,
help
=
'port id'
)
...
@@ -14,9 +13,11 @@ args = parser.parse_args()
...
@@ -14,9 +13,11 @@ args = parser.parse_args()
app
=
Flask
(
__name__
)
app
=
Flask
(
__name__
)
@
app
.
route
(
'/'
)
@
app
.
route
(
'/'
)
def
index
():
def
index
():
return
render_template
(
'index.html'
)
return
render_template
(
'index.html'
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
app
.
run
(
host
=
'0.0.0.0'
,
port
=
args
.
port
,
debug
=
True
)
app
.
run
(
host
=
'0.0.0.0'
,
port
=
args
.
port
,
debug
=
True
)
paddlespeech/server/tests/asr/online/web/readme.md
浏览文件 @
c7b987c5
paddlespeech/vector/io/dataset.py
浏览文件 @
c7b987c5
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
# limitations under the License.
# limitations under the License.
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
dataclasses
import
fields
from
dataclasses
import
fields
from
paddle.io
import
Dataset
from
paddle.io
import
Dataset
from
paddleaudio
import
load
as
load_audio
from
paddleaudio
import
load
as
load_audio
...
...
paddlespeech/vector/io/dataset_from_json.py
浏览文件 @
c7b987c5
...
@@ -12,9 +12,9 @@
...
@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
json
import
json
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
dataclasses
import
fields
from
dataclasses
import
fields
from
paddle.io
import
Dataset
from
paddle.io
import
Dataset
from
paddleaudio
import
load
as
load_audio
from
paddleaudio
import
load
as
load_audio
...
...
speechx/examples/ds2_ol/README.md
浏览文件 @
c7b987c5
speechx/examples/ds2_ol/decoder/README.md
浏览文件 @
c7b987c5
speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
浏览文件 @
c7b987c5
...
@@ -34,7 +34,8 @@ DEFINE_int32(receptive_field_length,
...
@@ -34,7 +34,8 @@ DEFINE_int32(receptive_field_length,
DEFINE_int32
(
downsampling_rate
,
DEFINE_int32
(
downsampling_rate
,
4
,
4
,
"two CNN(kernel=5) module downsampling rate."
);
"two CNN(kernel=5) module downsampling rate."
);
DEFINE_string
(
model_input_names
,
DEFINE_string
(
model_input_names
,
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box"
,
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box"
,
"model input names"
);
"model input names"
);
DEFINE_string
(
model_output_names
,
DEFINE_string
(
model_output_names
,
...
...
speechx/examples/ds2_ol/feat/README.md
浏览文件 @
c7b987c5
...
@@ -5,4 +5,3 @@ ASR audio feature test bins. We using theses bins to test linaer/fbank/mfcc asr
...
@@ -5,4 +5,3 @@ ASR audio feature test bins. We using theses bins to test linaer/fbank/mfcc asr
*
linear_spectrogram_without_db_norm_main.cc
*
linear_spectrogram_without_db_norm_main.cc
compute linear spectrogram w/o db norm in streaming manner.
compute linear spectrogram w/o db norm in streaming manner.
speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc
浏览文件 @
c7b987c5
...
@@ -31,7 +31,7 @@ int main(int argc, char* argv[]) {
...
@@ -31,7 +31,7 @@ int main(int argc, char* argv[]) {
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
google
::
InitGoogleLogging
(
argv
[
0
]);
LOG
(
INFO
)
<<
"cmvn josn path: "
<<
FLAGS_json_file
;
LOG
(
INFO
)
<<
"cmvn josn path: "
<<
FLAGS_json_file
;
padded_string
json
=
padded_string
::
load
(
FLAGS_json_file
);
padded_string
json
=
padded_string
::
load
(
FLAGS_json_file
);
ondemand
::
parser
parser
;
ondemand
::
parser
parser
;
...
@@ -43,7 +43,9 @@ int main(int argc, char* argv[]) {
...
@@ -43,7 +43,9 @@ int main(int argc, char* argv[]) {
for
(
double
x
:
mean_stat
)
{
for
(
double
x
:
mean_stat
)
{
mean_stat_vec
.
push_back
(
x
);
mean_stat_vec
.
push_back
(
x
);
}
}
// LOG(INFO) << mean_stat; this line will casue simdjson::simdjson_error("Objects and arrays can only be iterated when they are first encountered")
// LOG(INFO) << mean_stat; this line will casue
// simdjson::simdjson_error("Objects and arrays can only be iterated when
// they are first encountered")
ondemand
::
array
var_stat
=
val
[
"var_stat"
];
ondemand
::
array
var_stat
=
val
[
"var_stat"
];
std
::
vector
<
kaldi
::
BaseFloat
>
var_stat_vec
;
std
::
vector
<
kaldi
::
BaseFloat
>
var_stat_vec
;
...
...
speechx/examples/ds2_ol/nnet/README.md
浏览文件 @
c7b987c5
speechx/examples/ds2_ol/nnet/ds2-model-ol-test.cc
浏览文件 @
c7b987c5
...
@@ -14,8 +14,6 @@
...
@@ -14,8 +14,6 @@
// deepspeech2 online model info
// deepspeech2 online model info
#include "base/flags.h"
#include "base/log.h"
#include <algorithm>
#include <algorithm>
#include <fstream>
#include <fstream>
#include <functional>
#include <functional>
...
@@ -23,6 +21,8 @@
...
@@ -23,6 +21,8 @@
#include <iterator>
#include <iterator>
#include <numeric>
#include <numeric>
#include <thread>
#include <thread>
#include "base/flags.h"
#include "base/log.h"
#include "paddle_inference_api.h"
#include "paddle_inference_api.h"
using
std
::
cout
;
using
std
::
cout
;
...
...
speechx/examples/ngram/README.md
浏览文件 @
c7b987c5
# NGram Train
# NGram Train
speechx/speechx/decoder/ctc_beam_search_decoder.cc
浏览文件 @
c7b987c5
...
@@ -92,8 +92,7 @@ void CTCBeamSearch::AdvanceDecode(
...
@@ -92,8 +92,7 @@ void CTCBeamSearch::AdvanceDecode(
while
(
1
)
{
while
(
1
)
{
vector
<
vector
<
BaseFloat
>>
likelihood
;
vector
<
vector
<
BaseFloat
>>
likelihood
;
vector
<
BaseFloat
>
frame_prob
;
vector
<
BaseFloat
>
frame_prob
;
bool
flag
=
bool
flag
=
decodable
->
FrameLikelihood
(
num_frame_decoded_
,
&
frame_prob
);
decodable
->
FrameLikelihood
(
num_frame_decoded_
,
&
frame_prob
);
if
(
flag
==
false
)
break
;
if
(
flag
==
false
)
break
;
likelihood
.
push_back
(
frame_prob
);
likelihood
.
push_back
(
frame_prob
);
AdvanceDecoding
(
likelihood
);
AdvanceDecoding
(
likelihood
);
...
...
speechx/speechx/frontend/audio/linear_spectrogram.h
浏览文件 @
c7b987c5
speechx/speechx/nnet/decodable.cc
浏览文件 @
c7b987c5
...
@@ -49,10 +49,9 @@ bool Decodable::IsLastFrame(int32 frame) {
...
@@ -49,10 +49,9 @@ bool Decodable::IsLastFrame(int32 frame) {
int32
Decodable
::
NumIndices
()
const
{
return
0
;
}
int32
Decodable
::
NumIndices
()
const
{
return
0
;
}
// the ilable(TokenId) of wfst(TLG) insert <eps>(id = 0) in front of Nnet prob id.
// the ilable(TokenId) of wfst(TLG) insert <eps>(id = 0) in front of Nnet prob
int32
Decodable
::
TokenId2NnetId
(
int32
token_id
)
{
// id.
return
token_id
-
1
;
int32
Decodable
::
TokenId2NnetId
(
int32
token_id
)
{
return
token_id
-
1
;
}
}
BaseFloat
Decodable
::
LogLikelihood
(
int32
frame
,
int32
index
)
{
BaseFloat
Decodable
::
LogLikelihood
(
int32
frame
,
int32
index
)
{
CHECK_LE
(
index
,
nnet_cache_
.
NumCols
());
CHECK_LE
(
index
,
nnet_cache_
.
NumCols
());
...
@@ -60,7 +59,8 @@ BaseFloat Decodable::LogLikelihood(int32 frame, int32 index) {
...
@@ -60,7 +59,8 @@ BaseFloat Decodable::LogLikelihood(int32 frame, int32 index) {
int32
frame_idx
=
frame
-
frame_offset_
;
int32
frame_idx
=
frame
-
frame_offset_
;
// the nnet output is prob ranther than log prob
// the nnet output is prob ranther than log prob
// the index - 1, because the ilabel
// the index - 1, because the ilabel
return
acoustic_scale_
*
std
::
log
(
nnet_cache_
(
frame_idx
,
TokenId2NnetId
(
index
))
+
return
acoustic_scale_
*
std
::
log
(
nnet_cache_
(
frame_idx
,
TokenId2NnetId
(
index
))
+
std
::
numeric_limits
<
float
>::
min
());
std
::
numeric_limits
<
float
>::
min
());
}
}
...
...
speechx/speechx/nnet/paddle_nnet.h
浏览文件 @
c7b987c5
...
@@ -45,7 +45,8 @@ struct ModelOptions {
...
@@ -45,7 +45,8 @@ struct ModelOptions {
thread_num
(
2
),
thread_num
(
2
),
use_gpu
(
false
),
use_gpu
(
false
),
input_names
(
input_names
(
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box"
),
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_"
"box"
),
output_names
(
output_names
(
"save_infer_model/scale_0.tmp_1,save_infer_model/"
"save_infer_model/scale_0.tmp_1,save_infer_model/"
"scale_1.tmp_1,save_infer_model/scale_2.tmp_1,save_infer_model/"
"scale_1.tmp_1,save_infer_model/scale_2.tmp_1,save_infer_model/"
...
...
speechx/speechx/utils/file_utils.cc
浏览文件 @
c7b987c5
...
@@ -40,5 +40,4 @@ std::string ReadFile2String(const std::string& path) {
...
@@ -40,5 +40,4 @@ std::string ReadFile2String(const std::string& path) {
return
std
::
string
((
std
::
istreambuf_iterator
<
char
>
(
input_file
)),
return
std
::
string
((
std
::
istreambuf_iterator
<
char
>
(
input_file
)),
std
::
istreambuf_iterator
<
char
>
());
std
::
istreambuf_iterator
<
char
>
());
}
}
}
}
speechx/speechx/utils/file_utils.h
浏览文件 @
c7b987c5
...
@@ -20,5 +20,4 @@ bool ReadFileToVector(const std::string& filename,
...
@@ -20,5 +20,4 @@ bool ReadFileToVector(const std::string& filename,
std
::
vector
<
std
::
string
>*
data
);
std
::
vector
<
std
::
string
>*
data
);
std
::
string
ReadFile2String
(
const
std
::
string
&
path
);
std
::
string
ReadFile2String
(
const
std
::
string
&
path
);
}
}
speechx/speechx/utils/simdjson.cpp
浏览文件 @
c7b987c5
因为 它太大了无法显示 source diff 。你可以改为
查看blob
。
speechx/speechx/utils/simdjson.h
浏览文件 @
c7b987c5
因为 它太大了无法显示 source diff 。你可以改为
查看blob
。
speechx/tools/fstbin/fstaddselfloops.cc
浏览文件 @
c7b987c5
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fstaddselfloops.cc
// fstbin/fstaddselfloops.cc
// Copyright 2009-2011 Microsoft Corporation
// Copyright 2009-2011 Microsoft Corporation
...
@@ -43,12 +57,14 @@ int main(int argc, char *argv[]) {
...
@@ -43,12 +57,14 @@ int main(int argc, char *argv[]) {
const
char
*
usage
=
const
char
*
usage
=
"Adds self-loops to states of an FST to propagate disambiguation "
"Adds self-loops to states of an FST to propagate disambiguation "
"symbols through it
\n
"
"symbols through it
\n
"
"They are added on each final state and each state with non-epsilon "
"They are added on each final state and each state with "
"non-epsilon "
"output symbols
\n
"
"output symbols
\n
"
"on at least one arc out of the state. Useful in conjunction with "
"on at least one arc out of the state. Useful in conjunction with "
"predeterminize
\n
"
"predeterminize
\n
"
"
\n
"
"
\n
"
"Usage: fstaddselfloops in-disambig-list out-disambig-list [in.fst "
"Usage: fstaddselfloops in-disambig-list out-disambig-list "
"[in.fst "
"[out.fst] ]
\n
"
"[out.fst] ]
\n
"
"E.g: fstaddselfloops in.list out.list < in.fst > withloops.fst
\n
"
"E.g: fstaddselfloops in.list out.list < in.fst > withloops.fst
\n
"
"in.list and out.list are lists of integers, one per line, of the
\n
"
"in.list and out.list are lists of integers, one per line, of the
\n
"
...
@@ -71,19 +87,19 @@ int main(int argc, char *argv[]) {
...
@@ -71,19 +87,19 @@ int main(int argc, char *argv[]) {
std
::
vector
<
int32
>
disambig_in
;
std
::
vector
<
int32
>
disambig_in
;
if
(
!
ReadIntegerVectorSimple
(
disambig_in_rxfilename
,
&
disambig_in
))
if
(
!
ReadIntegerVectorSimple
(
disambig_in_rxfilename
,
&
disambig_in
))
KALDI_ERR
KALDI_ERR
<<
"fstaddselfloops: Could not read disambiguation "
<<
"fstaddselfloops: Could not read disambiguation
symbols from "
"
symbols from "
<<
kaldi
::
PrintableRxfilename
(
disambig_in_rxfilename
);
<<
kaldi
::
PrintableRxfilename
(
disambig_in_rxfilename
);
std
::
vector
<
int32
>
disambig_out
;
std
::
vector
<
int32
>
disambig_out
;
if
(
!
ReadIntegerVectorSimple
(
disambig_out_rxfilename
,
&
disambig_out
))
if
(
!
ReadIntegerVectorSimple
(
disambig_out_rxfilename
,
&
disambig_out
))
KALDI_ERR
KALDI_ERR
<<
"fstaddselfloops: Could not read disambiguation "
<<
"fstaddselfloops: Could not read disambiguation
symbols from "
"
symbols from "
<<
kaldi
::
PrintableRxfilename
(
disambig_out_rxfilename
);
<<
kaldi
::
PrintableRxfilename
(
disambig_out_rxfilename
);
if
(
disambig_in
.
size
()
!=
disambig_out
.
size
())
if
(
disambig_in
.
size
()
!=
disambig_out
.
size
())
KALDI_ERR
KALDI_ERR
<<
"fstaddselfloops: mismatch in size of disambiguation "
<<
"fstaddselfloops: mismatch in size of disambiguation
symbols"
;
"
symbols"
;
AddSelfLoops
(
fst
,
disambig_in
,
disambig_out
);
AddSelfLoops
(
fst
,
disambig_in
,
disambig_out
);
...
...
speechx/tools/fstbin/fstdeterminizestar.cc
浏览文件 @
c7b987c5
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fstdeterminizestar.cc
// fstbin/fstdeterminizestar.cc
// Copyright 2009-2011 Microsoft Corporation
// Copyright 2009-2011 Microsoft Corporation
...
@@ -73,11 +87,13 @@ int main(int argc, char *argv[]) {
...
@@ -73,11 +87,13 @@ int main(int argc, char *argv[]) {
bool
use_log
=
false
;
bool
use_log
=
false
;
ParseOptions
po
(
usage
);
ParseOptions
po
(
usage
);
po
.
Register
(
"use-log"
,
&
use_log
,
"Determinize in log semiring."
);
po
.
Register
(
"use-log"
,
&
use_log
,
"Determinize in log semiring."
);
po
.
Register
(
"delta"
,
&
delta
,
po
.
Register
(
"delta"
,
&
delta
,
"Delta value used to determine equivalence of weights."
);
"Delta value used to determine equivalence of weights."
);
po
.
Register
(
po
.
Register
(
"max-states"
,
"max-states"
,
&
max_states
,
&
max_states
,
"Maximum number of states in determinized FST before it will abort."
);
"Maximum number of states in determinized FST before it "
"will abort."
);
po
.
Read
(
argc
,
argv
);
po
.
Read
(
argc
,
argv
);
if
(
po
.
NumArgs
()
>
2
)
{
if
(
po
.
NumArgs
()
>
2
)
{
...
@@ -87,8 +103,8 @@ int main(int argc, char *argv[]) {
...
@@ -87,8 +103,8 @@ int main(int argc, char *argv[]) {
std
::
string
fst_in_str
=
po
.
GetOptArg
(
1
),
fst_out_str
=
po
.
GetOptArg
(
2
);
std
::
string
fst_in_str
=
po
.
GetOptArg
(
1
),
fst_out_str
=
po
.
GetOptArg
(
2
);
// This enables us to get traceback info from determinization that is
// This enables us to get traceback info from determinization that is
// not seeming to terminate.
// not seeming to terminate.
#if !defined(_MSC_VER) && !defined(__APPLE__)
#if !defined(_MSC_VER) && !defined(__APPLE__)
signal
(
SIGUSR1
,
signal_handler
);
signal
(
SIGUSR1
,
signal_handler
);
#endif
#endif
...
...
speechx/tools/fstbin/fstisstochastic.cc
浏览文件 @
c7b987c5
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fstisstochastic.cc
// fstbin/fstisstochastic.cc
// Copyright 2009-2011 Microsoft Corporation
// Copyright 2009-2011 Microsoft Corporation
...
@@ -48,7 +62,8 @@ int main(int argc, char *argv[]) {
...
@@ -48,7 +62,8 @@ int main(int argc, char *argv[]) {
using
kaldi
::
int32
;
using
kaldi
::
int32
;
const
char
*
usage
=
const
char
*
usage
=
"Checks whether an FST is stochastic and exits with success if so.
\n
"
"Checks whether an FST is stochastic and exits with success if "
"so.
\n
"
"Prints out maximum error (in log units).
\n
"
"Prints out maximum error (in log units).
\n
"
"
\n
"
"
\n
"
"Usage: fstisstochastic [ in.fst ]
\n
"
;
"Usage: fstisstochastic [ in.fst ]
\n
"
;
...
@@ -58,8 +73,8 @@ int main(int argc, char *argv[]) {
...
@@ -58,8 +73,8 @@ int main(int argc, char *argv[]) {
ParseOptions
po
(
usage
);
ParseOptions
po
(
usage
);
po
.
Register
(
"delta"
,
&
delta
,
"Maximum error to accept."
);
po
.
Register
(
"delta"
,
&
delta
,
"Maximum error to accept."
);
po
.
Register
(
"test-in-log"
,
&
test_in_log
,
po
.
Register
(
"Test stochasticity in log semiring."
);
"test-in-log"
,
&
test_in_log
,
"Test stochasticity in log semiring."
);
po
.
Read
(
argc
,
argv
);
po
.
Read
(
argc
,
argv
);
if
(
po
.
NumArgs
()
>
1
)
{
if
(
po
.
NumArgs
()
>
1
)
{
...
...
speechx/tools/fstbin/fstminimizeencoded.cc
浏览文件 @
c7b987c5
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fstminimizeencoded.cc
// fstbin/fstminimizeencoded.cc
// Copyright 2009-2011 Microsoft Corporation
// Copyright 2009-2011 Microsoft Corporation
...
@@ -46,7 +60,8 @@ int main(int argc, char *argv[]) {
...
@@ -46,7 +60,8 @@ int main(int argc, char *argv[]) {
float
delta
=
kDelta
;
float
delta
=
kDelta
;
ParseOptions
po
(
usage
);
ParseOptions
po
(
usage
);
po
.
Register
(
"delta"
,
&
delta
,
po
.
Register
(
"delta"
,
&
delta
,
"Delta likelihood used for quantization of weights"
);
"Delta likelihood used for quantization of weights"
);
po
.
Read
(
argc
,
argv
);
po
.
Read
(
argc
,
argv
);
...
...
speechx/tools/fstbin/fsttablecompose.cc
浏览文件 @
c7b987c5
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fsttablecompose.cc
// fstbin/fsttablecompose.cc
// Copyright 2009-2011 Microsoft Corporation
// Copyright 2009-2011 Microsoft Corporation
...
@@ -54,12 +68,15 @@ int main(int argc, char *argv[]) {
...
@@ -54,12 +68,15 @@ int main(int argc, char *argv[]) {
const
char
*
usage
=
const
char
*
usage
=
"Composition algorithm [between two FSTs of standard type, in "
"Composition algorithm [between two FSTs of standard type, in "
"tropical
\n
"
"tropical
\n
"
"semiring] that is more efficient for certain cases-- in particular,
\n
"
"semiring] that is more efficient for certain cases-- in "
"where one of the FSTs (the left one, if --match-side=left) has large
\n
"
"particular,
\n
"
"where one of the FSTs (the left one, if --match-side=left) has "
"large
\n
"
"out-degree
\n
"
"out-degree
\n
"
"
\n
"
"
\n
"
"Usage: fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
"Usage: fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
"(fst2-rxfilename|fst2-rspecifier) [(out-rxfilename|out-rspecifier)]
\n
"
;
"(fst2-rxfilename|fst2-rspecifier) "
"[(out-rxfilename|out-rspecifier)]
\n
"
;
ParseOptions
po
(
usage
);
ParseOptions
po
(
usage
);
...
@@ -67,11 +84,15 @@ int main(int argc, char *argv[]) {
...
@@ -67,11 +84,15 @@ int main(int argc, char *argv[]) {
std
::
string
match_side
=
"left"
;
std
::
string
match_side
=
"left"
;
std
::
string
compose_filter
=
"sequence"
;
std
::
string
compose_filter
=
"sequence"
;
po
.
Register
(
"connect"
,
&
opts
.
connect
,
"If true, trim FST before output."
);
po
.
Register
(
po
.
Register
(
"match-side"
,
&
match_side
,
"connect"
,
&
opts
.
connect
,
"If true, trim FST before output."
);
po
.
Register
(
"match-side"
,
&
match_side
,
"Side of composition to do table "
"Side of composition to do table "
"match, one of:
\"
left
\"
or
\"
right
\"
."
);
"match, one of:
\"
left
\"
or
\"
right
\"
."
);
po
.
Register
(
"compose-filter"
,
&
compose_filter
,
po
.
Register
(
"compose-filter"
,
&
compose_filter
,
"Composition filter to use, "
"Composition filter to use, "
"one of:
\"
alt_sequence
\"
,
\"
auto
\"
,
\"
match
\"
,
\"
sequence
\"
"
);
"one of:
\"
alt_sequence
\"
,
\"
auto
\"
,
\"
match
\"
,
\"
sequence
\"
"
);
...
...
speechx/tools/lmbin/arpa2fst.cc
浏览文件 @
c7b987c5
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// bin/arpa2fst.cc
// bin/arpa2fst.cc
//
//
// Copyright 2009-2011 Gilles Boulianne.
// Copyright 2009-2011 Gilles Boulianne.
...
@@ -31,8 +45,10 @@ int main(int argc, char *argv[]) {
...
@@ -31,8 +45,10 @@ int main(int argc, char *argv[]) {
"Usage: arpa2fst [opts] <input-arpa> <output-fst>
\n
"
"Usage: arpa2fst [opts] <input-arpa> <output-fst>
\n
"
" e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
" e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
"data/lang/words.txt lm/input.arpa G.fst
\n\n
"
"data/lang/words.txt lm/input.arpa G.fst
\n\n
"
"Note: When called without switches, the output G.fst will contain
\n
"
"Note: When called without switches, the output G.fst will "
"an embedded symbol table. This is compatible with the way a previous
\n
"
"contain
\n
"
"an embedded symbol table. This is compatible with the way a "
"previous
\n
"
"version of arpa2fst worked.
\n
"
;
"version of arpa2fst worked.
\n
"
;
ParseOptions
po
(
usage
);
ParseOptions
po
(
usage
);
...
@@ -51,14 +67,20 @@ int main(int argc, char *argv[]) {
...
@@ -51,14 +67,20 @@ int main(int argc, char *argv[]) {
po
.
Register
(
"bos-symbol"
,
&
bos_symbol
,
"Beginning of sentence symbol"
);
po
.
Register
(
"bos-symbol"
,
&
bos_symbol
,
"Beginning of sentence symbol"
);
po
.
Register
(
"eos-symbol"
,
&
eos_symbol
,
"End of sentence symbol"
);
po
.
Register
(
"eos-symbol"
,
&
eos_symbol
,
"End of sentence symbol"
);
po
.
Register
(
"disambig-symbol"
,
&
disambig_symbol
,
po
.
Register
(
"disambig-symbol"
,
&
disambig_symbol
,
"Disambiguator. If provided (e. g. #0), used on input side of "
"Disambiguator. If provided (e. g. #0), used on input side of "
"backoff links, and <s> and </s> are replaced with epsilons"
);
"backoff links, and <s> and </s> are replaced with epsilons"
);
po
.
Register
(
"read-symbol-table"
,
&
read_syms_filename
,
po
.
Register
(
"read-symbol-table"
,
&
read_syms_filename
,
"Use existing symbol table"
);
"Use existing symbol table"
);
po
.
Register
(
"write-symbol-table"
,
&
write_syms_filename
,
po
.
Register
(
"write-symbol-table"
,
&
write_syms_filename
,
"Write generated symbol table to a file"
);
"Write generated symbol table to a file"
);
po
.
Register
(
"keep-symbols"
,
&
keep_symbols
,
po
.
Register
(
"keep-symbols"
,
&
keep_symbols
,
"Store symbol table with FST. Symbols always saved to FST if "
"Store symbol table with FST. Symbols always saved to FST if "
"symbol tables are neither read or written (otherwise symbols "
"symbol tables are neither read or written (otherwise symbols "
"would be lost entirely)"
);
"would be lost entirely)"
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录