Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
b75268c5
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
11 个月 前同步成功
通知
203
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
b75268c5
编写于
3月 29, 2022
作者:
Y
YangZhou
提交者:
GitHub
3月 29, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1616 from zh794390558/spx
[speechx] more comment of code
上级
2ea578e8
84d712d4
变更
22
隐藏空白更改
内联
并排
Showing
22 changed file
with
226 addition
and
33 deletion
+226
-33
dataset/librispeech/librispeech.py
dataset/librispeech/librispeech.py
+1
-1
demos/audio_searching/src/encode.py
demos/audio_searching/src/encode.py
+1
-1
demos/audio_searching/src/operations/load.py
demos/audio_searching/src/operations/load.py
+2
-3
examples/ami/sd0/local/ami_prepare.py
examples/ami/sd0/local/ami_prepare.py
+1
-1
paddlespeech/s2t/decoders/recog_bin.py
paddlespeech/s2t/decoders/recog_bin.py
+1
-1
paddlespeech/s2t/utils/cli_utils.py
paddlespeech/s2t/utils/cli_utils.py
+1
-1
paddlespeech/s2t/utils/utility.py
paddlespeech/s2t/utils/utility.py
+1
-1
paddlespeech/vector/cluster/diarization.py
paddlespeech/vector/cluster/diarization.py
+1
-1
speechx/README.md
speechx/README.md
+3
-1
speechx/build.sh
speechx/build.sh
+2
-3
speechx/cmake/FindGFortranLibs.cmake
speechx/cmake/FindGFortranLibs.cmake
+145
-0
speechx/cmake/external/openblas.cmake
speechx/cmake/external/openblas.cmake
+21
-0
speechx/cmake/external/openfst.cmake
speechx/cmake/external/openfst.cmake
+5
-4
speechx/speechx/frontend/feature_cache.cc
speechx/speechx/frontend/feature_cache.cc
+4
-0
speechx/speechx/frontend/feature_cache.h
speechx/speechx/frontend/feature_cache.h
+11
-2
speechx/speechx/frontend/feature_extractor_interface.h
speechx/speechx/frontend/feature_extractor_interface.h
+15
-6
speechx/speechx/frontend/linear_spectrogram.h
speechx/speechx/frontend/linear_spectrogram.h
+5
-3
utils/DER.py
utils/DER.py
+1
-1
utils/addjson.py
utils/addjson.py
+1
-1
utils/apply-cmvn.py
utils/apply-cmvn.py
+1
-1
utils/copy-feats.py
utils/copy-feats.py
+1
-0
utils/merge_scp2json.py
utils/merge_scp2json.py
+2
-1
未找到文件。
dataset/librispeech/librispeech.py
浏览文件 @
b75268c5
...
...
@@ -20,12 +20,12 @@ of each audio file in the data set.
"""
import
argparse
import
codecs
import
distutils.util
import
io
import
json
import
os
from
multiprocessing.pool
import
Pool
import
distutils.util
import
soundfile
from
utils.utility
import
download
...
...
demos/audio_searching/src/encode.py
浏览文件 @
b75268c5
...
...
@@ -16,8 +16,8 @@ import os
import
librosa
import
numpy
as
np
from
config
import
DEFAULT_TABLE
from
logs
import
LOGGER
from
paddlespeech.cli
import
VectorExecutor
vector_executor
=
VectorExecutor
()
...
...
demos/audio_searching/src/operations/load.py
浏览文件 @
b75268c5
...
...
@@ -26,9 +26,8 @@ def get_audios(path):
"""
supported_formats
=
[
".wav"
,
".mp3"
,
".ogg"
,
".flac"
,
".m4a"
]
return
[
item
for
sublist
in
[[
os
.
path
.
join
(
dir
,
file
)
for
file
in
files
]
for
dir
,
_
,
files
in
list
(
os
.
walk
(
path
))]
item
for
sublist
in
[[
os
.
path
.
join
(
dir
,
file
)
for
file
in
files
]
for
dir
,
_
,
files
in
list
(
os
.
walk
(
path
))]
for
item
in
sublist
if
os
.
path
.
splitext
(
item
)[
1
]
in
supported_formats
]
...
...
examples/ami/sd0/local/ami_prepare.py
浏览文件 @
b75268c5
...
...
@@ -24,11 +24,11 @@ import json
import
logging
import
os
import
xml.etree.ElementTree
as
et
from
distutils.util
import
strtobool
from
ami_splits
import
get_AMI_split
from
dataio
import
load_pkl
from
dataio
import
save_pkl
from
distutils.util
import
strtobool
logger
=
logging
.
getLogger
(
__name__
)
SAMPLERATE
=
16000
...
...
paddlespeech/s2t/decoders/recog_bin.py
浏览文件 @
b75268c5
...
...
@@ -17,10 +17,10 @@ import logging
import
os
import
random
import
sys
from
distutils.util
import
strtobool
import
configargparse
import
numpy
as
np
from
distutils.util
import
strtobool
def
get_parser
():
...
...
paddlespeech/s2t/utils/cli_utils.py
浏览文件 @
b75268c5
...
...
@@ -14,9 +14,9 @@
# Modified from espnet(https://github.com/espnet/espnet)
import
sys
from
collections.abc
import
Sequence
from
distutils.util
import
strtobool
as
dist_strtobool
import
numpy
from
distutils.util
import
strtobool
as
dist_strtobool
def
strtobool
(
x
):
...
...
paddlespeech/s2t/utils/utility.py
浏览文件 @
b75268c5
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains common utility functions."""
import
distutils.util
import
math
import
os
import
random
...
...
@@ -21,6 +20,7 @@ from contextlib import contextmanager
from
pprint
import
pformat
from
typing
import
List
import
distutils.util
import
numpy
as
np
import
paddle
import
soundfile
...
...
paddlespeech/vector/cluster/diarization.py
浏览文件 @
b75268c5
...
...
@@ -18,11 +18,11 @@ A few sklearn functions are modified in this script as per requirement.
"""
import
argparse
import
warnings
from
distutils.util
import
strtobool
import
numpy
as
np
import
scipy
import
sklearn
from
distutils.util
import
strtobool
from
scipy
import
sparse
from
scipy.sparse.csgraph
import
connected_components
from
scipy.sparse.csgraph
import
laplacian
as
csgraph_laplacian
...
...
speechx/README.md
浏览文件 @
b75268c5
...
...
@@ -5,7 +5,7 @@
We develop under:
*
docker - registry.baidubce.com/paddlepaddle/paddle:2.1.1-gpu-cuda10.2-cudnn7
*
os - Ubuntu 16.04.7 LTS
*
gcc/g++ - 8.2.0
*
** gcc/g++/gfortran - 8.2.0 **
*
cmake - 3.16.0
> We make sure all things work fun under docker, and recommend using it to develop and deploy.
...
...
@@ -29,6 +29,8 @@ nvidia-docker run --privileged --net=host --ipc=host -it --rm -v $PWD:/workspac
2.
Build
`speechx`
and
`examples`
.
> Do not source venv.
```
pushd /path/to/speechx
./build.sh
...
...
speechx/build.sh
浏览文件 @
b75268c5
...
...
@@ -2,8 +2,7 @@
# the build script had verified in the paddlepaddle docker image.
# please follow the instruction below to install PaddlePaddle image.
# https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html
# https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html
boost_SOURCE_DIR
=
$PWD
/fc_patch/boost-src
if
[
!
-d
${
boost_SOURCE_DIR
}
]
;
then
wget
-c
https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz
tar
xzfv boost_1_75_0.tar.gz
...
...
@@ -23,6 +22,6 @@ cd build
cmake ..
-DBOOST_ROOT
:STRING
=
${
boost_SOURCE_DIR
}
#cmake ..
make
-j1
make
-j1
0
cd
-
speechx/cmake/FindGFortranLibs.cmake
0 → 100644
浏览文件 @
b75268c5
#.rst:
# FindGFortranLibs
# --------
# https://github.com/Argonne-National-Laboratory/PIPS/blob/master/cmake/Modules/FindGFortranLibs.cmake
# https://enccs.github.io/cmake-workshop/cxx-fortran/
#
# Find gcc Fortran compiler & library paths
#
# The module defines the following variables:
#
# ::
#
#
# GFORTRANLIBS_FOUND - true if system has gfortran
# LIBGFORTRAN_LIBRARIES - path to libgfortran
# LIBQUADMATH_LIBRARIES - path to libquadmath
# GFORTRAN_LIBARIES_DIR - directory containing libgfortran, libquadmath
# GFORTRAN_INCLUDE_DIR - directory containing gfortran/gcc headers
# LIBGOMP_LIBRARIES - path to libgomp
# LIBGOMP_INCLUDE_DIR - directory containing omp.h header
# GFORTRAN_VERSION_STRING - version of gfortran found
#
set
(
CMAKE_REQUIRED_QUIET
${
LIBIOMP_FIND_QUIETLY
}
)
if
(
NOT CMAKE_REQUIRED_QUIET
)
message
(
STATUS
"Looking for gfortran related libraries..."
)
endif
()
enable_language
(
Fortran
)
if
(
CMAKE_Fortran_COMPILER_ID MATCHES
"GNU"
)
# Basically, call "gfortran -v" to dump compiler info to the string
# GFORTRAN_VERBOSE_STR, which will be used to get necessary paths
message
(
STATUS
"Extracting library and header information by calling 'gfortran -v'..."
)
execute_process
(
COMMAND
"
${
CMAKE_Fortran_COMPILER
}
"
"-v"
ERROR_VARIABLE
GFORTRAN_VERBOSE_STR RESULT_VARIABLE FLAG
)
# For debugging
message
(
STATUS
"'gfortran -v' returned:"
)
message
(
STATUS
"
${
GFORTRAN_VERBOSE_STR
}
"
)
# Detect gfortran version
string
(
REGEX MATCH
"gcc version [^
\t\n
]+"
GFORTRAN_VER_STR
"
${
GFORTRAN_VERBOSE_STR
}
"
)
string
(
REGEX REPLACE
"gcc version ([^
\t\n
]+)"
"
\\
1"
GFORTRAN_VERSION_STRING
"
${
GFORTRAN_VER_STR
}
"
)
message
(
STATUS
"Detected gfortran version
${
GFORTRAN_VERSION_STRING
}
"
)
unset
(
GFORTRAN_VER_STR
)
set
(
MATCH_REGEX
"[^
\t\n
]+[
\t\n
]+"
)
set
(
REPLACE_REGEX
"([^
\t\n
]+)"
)
# Find architecture for compiler
string
(
REGEX MATCH
"Target: [^
\t\n
]+"
GFORTRAN_ARCH_STR
"
${
GFORTRAN_VERBOSE_STR
}
"
)
message
(
STATUS
"Architecture string:
${
GFORTRAN_ARCH_STR
}
"
)
string
(
REGEX REPLACE
"Target: ([^
\t\n
]+)"
"
\\
1"
GFORTRAN_ARCH
"
${
GFORTRAN_ARCH_STR
}
"
)
message
(
STATUS
"Detected gfortran architecture:
${
GFORTRAN_ARCH
}
"
)
unset
(
GFORTRAN_ARCH_STR
)
# Find install prefix, if it exists; if not, use default
string
(
REGEX MATCH
"--prefix=[^
\t\n
]+[
\t\n
]+"
GFORTRAN_PREFIX_STR
"
${
GFORTRAN_VERBOSE_STR
}
"
)
if
(
NOT GFORTRAN_PREFIX_STR
)
message
(
STATUS
"Detected default gfortran prefix"
)
set
(
GFORTRAN_PREFIX_DIR
"/usr/local"
)
# default prefix for gcc install
else
()
string
(
REGEX REPLACE
"--prefix=([^
\t\n
]+)"
"
\\
1"
GFORTRAN_PREFIX_DIR
"
${
GFORTRAN_PREFIX_STR
}
"
)
endif
()
message
(
STATUS
"Detected gfortran prefix:
${
GFORTRAN_PREFIX_DIR
}
"
)
unset
(
GFORTRAN_PREFIX_STR
)
# Find install exec-prefix, if it exists; if not, use default
string
(
REGEX MATCH
"--exec-prefix=[^
\t\n
]+[
\t\n
]+"
"
\\
1"
GFORTRAN_EXEC_PREFIX_STR
"
${
GFORTRAN_VERBOSE_STR
}
"
)
if
(
NOT GFORTRAN_EXEC_PREFIX_STR
)
message
(
STATUS
"Detected default gfortran exec-prefix"
)
set
(
GFORTRAN_EXEC_PREFIX_DIR
"
${
GFORTRAN_PREFIX_DIR
}
"
)
else
()
string
(
REGEX REPLACE
"--exec-prefix=([^
\t\n
]+)"
"
\\
1"
GFORTRAN_EXEC_PREFIX_DIR
"
${
GFORTRAN_EXEC_PREFIX_STR
}
"
)
endif
()
message
(
STATUS
"Detected gfortran exec-prefix:
${
GFORTRAN_EXEC_PREFIX_DIR
}
"
)
UNSET
(
GFORTRAN_EXEC_PREFIX_STR
)
# Find library directory and include directory, if library directory specified
string
(
REGEX MATCH
"--libdir=[^
\t\n
]+"
GFORTRAN_LIB_DIR_STR
"
${
GFORTRAN_VERBOSE_STR
}
"
)
if
(
NOT GFORTRAN_LIB_DIR_STR
)
message
(
STATUS
"Found --libdir flag -- not found"
)
message
(
STATUS
"Using default gfortran library & include directory paths"
)
set
(
GFORTRAN_LIBRARIES_DIR
"
${
GFORTRAN_EXEC_PREFIX_DIR
}
/lib/gcc/
${
GFORTRAN_ARCH
}
/
${
GFORTRAN_VERSION_STRING
}
"
)
string
(
CONCAT GFORTRAN_INCLUDE_DIR
"
${
GFORTRAN_LIBRARIES_DIR
}
"
"/include"
)
else
()
message
(
STATUS
"Found --libdir flag -- yes"
)
string
(
REGEX REPLACE
"--libdir=([^
\t\n
]+)"
"
\\
1"
GFORTRAN_LIBRARIES_DIR
"
${
GFORTRAN_LIB_DIR_STR
}
"
)
string
(
CONCAT GFORTRAN_INCLUDE_DIR
"
${
GFORTRAN_LIBRARIES_DIR
}
"
"/gcc/"
"
${
GFORTRAN_ARCH
}
"
"/"
"
${
GFORTRAN_VERSION_STRING
}
"
"/include"
)
endif
()
message
(
STATUS
"gfortran libraries path:
${
GFORTRAN_LIBRARIES_DIR
}
"
)
message
(
STATUS
"gfortran include path dir:
${
GFORTRAN_INCLUDE_DIR
}
"
)
unset
(
GFORTRAN_LIB_DIR_STR
)
# There are lots of other build options for gcc & gfortran. For now, the
# options implemented above should cover a lot of common use cases.
# Clean up be deleting the output string from "gfortran -v"
unset
(
GFORTRAN_VERBOSE_STR
)
# Find paths for libgfortran, libquadmath, libgomp
# libgomp needed for OpenMP support without Clang
find_library
(
LIBGFORTRAN_LIBRARIES NAMES gfortran libgfortran
HINTS
${
GFORTRAN_LIBRARIES_DIR
}
)
find_library
(
LIBQUADMATH_LIBRARIES NAMES quadmath libquadmath
HINTS
${
GFORTRAN_LIBRARIES_DIR
}
)
find_library
(
LIBGOMP_LIBRARIES NAMES gomp libgomp
HINTS
${
GFORTRAN_LIBRARIES_DIR
}
)
# Find OpenMP headers
find_path
(
LIBGOMP_INCLUDE_DIR NAMES omp.h HINTS
${
GFORTRAN_INCLUDE_DIR
}
)
else
()
message
(
STATUS
"CMAKE_Fortran_COMPILER_ID does not match 'GNU'!"
)
endif
()
include
(
FindPackageHandleStandardArgs
)
# Required: libgfortran, libquadmath, path for gfortran libraries
# Optional: libgomp, path for OpenMP headers, path for gcc/gfortran headers
find_package_handle_standard_args
(
GFortranLibs
REQUIRED_VARS LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES GFORTRAN_LIBRARIES_DIR
VERSION_VAR GFORTRAN_VERSION_STRING
)
if
(
GFORTRANLIBS_FOUND
)
message
(
STATUS
"Looking for gfortran libraries -- found"
)
message
(
STATUS
"gfortran version:
${
GFORTRAN_VERSION_STRING
}
"
)
else
()
message
(
STATUS
"Looking for gfortran libraries -- not found"
)
endif
()
mark_as_advanced
(
LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES
LIBGOMP_LIBRARIES LIBGOMP_INCLUDE_DIR
GFORTRAN_LIBRARIES_DIR GFORTRAN_INCLUDE_DIR
)
# FindGFortranLIBS.cmake ends here
\ No newline at end of file
speechx/cmake/external/openblas.cmake
浏览文件 @
b75268c5
...
...
@@ -7,6 +7,27 @@ set(OpenBLAS_PREFIX ${fc_patch}/OpenBLAS-prefix)
# OPENBLAS https://github.com/lattice/quda/blob/develop/CMakeLists.txt#L575
# ######################################################################################################################
enable_language
(
Fortran
)
include
(
FortranCInterface
)
# # Clang doesn't have a Fortran compiler in its suite (yet),
# # so detect libraries for gfortran; we need equivalents to
# # libgfortran and libquadmath, which are implicitly
# # linked by flags in CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES
# include(FindGFortranLibs REQUIRED)
# # Add directory containing libgfortran and libquadmath to
# # linker. Should also contain libgomp, if not using
# # Intel OpenMP runtime
# link_directories(${GFORTRAN_LIBRARIES_DIR})
# # gfortan dir in the docker.
# link_directories(/usr/local/gcc-8.2/lib64)
# # if you are working with C and Fortran
# FortranCInterface_VERIFY()
# # if you are working with C++ and Fortran
# FortranCInterface_VERIFY(CXX)
#TODO: switch to CPM
include
(
GNUInstallDirs
)
ExternalProject_Add
(
...
...
speechx/cmake/external/openfst.cmake
浏览文件 @
b75268c5
include
(
FetchContent
)
set
(
openfst_PREFIX_DIR
${
fc_patch
}
/openfst
)
set
(
openfst_SOURCE_DIR
${
fc_patch
}
/openfst-src
)
set
(
openfst_BINARY_DIR
${
fc_patch
}
/openfst-build
)
ExternalProject_Add
(
openfst
URL https://github.com/mjansche/openfst/archive/refs/tags/1.7.2.zip
URL_HASH SHA256=ffc56931025579a8af3515741c0f3b0fc3a854c023421472c07ca0c6389c75e6
# #
PREFIX ${openfst_PREFIX_DIR}
#
SOURCE_DIR ${openfst_SOURCE_DIR}
#
BINARY_DIR ${openfst_BINARY_DIR}
PREFIX
${
openfst_PREFIX_DIR
}
SOURCE_DIR
${
openfst_SOURCE_DIR
}
BINARY_DIR
${
openfst_BINARY_DIR
}
CONFIGURE_COMMAND
${
openfst_SOURCE_DIR
}
/configure --prefix=
${
openfst_PREFIX_DIR
}
"CPPFLAGS=-I
${
gflags_BINARY_DIR
}
/include -I
${
glog_SOURCE_DIR
}
/src -I
${
glog_BINARY_DIR
}
"
"LDFLAGS=-L
${
gflags_BINARY_DIR
}
-L
${
glog_BINARY_DIR
}
"
...
...
@@ -16,4 +17,4 @@ ExternalProject_Add(openfst
BUILD_COMMAND make -j 4
)
link_directories
(
${
openfst_PREFIX_DIR
}
/lib
)
include_directories
(
${
openfst_PREFIX_DIR
}
/include
)
include_directories
(
${
openfst_PREFIX_DIR
}
/include
)
\ No newline at end of file
speechx/speechx/frontend/feature_cache.cc
浏览文件 @
b75268c5
...
...
@@ -41,6 +41,7 @@ void FeatureCache::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
// pop feature chunk
bool
FeatureCache
::
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
)
{
kaldi
::
Timer
timer
;
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
cache_
.
empty
()
&&
base_extractor_
->
IsFinished
()
==
false
)
{
ready_read_condition_
.
wait
(
lock
);
...
...
@@ -64,10 +65,13 @@ bool FeatureCache::Compute() {
// compute and feed
Vector
<
BaseFloat
>
feature_chunk
;
bool
result
=
base_extractor_
->
Read
(
&
feature_chunk
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
cache_
.
size
()
>=
max_size_
)
{
ready_feed_condition_
.
wait
(
lock
);
}
// feed cache
if
(
feature_chunk
.
Dim
()
!=
0
)
{
cache_
.
push
(
feature_chunk
);
}
...
...
speechx/speechx/frontend/feature_cache.h
浏览文件 @
b75268c5
...
...
@@ -24,17 +24,24 @@ class FeatureCache : public FeatureExtractorInterface {
explicit
FeatureCache
(
int32
max_size
=
kint16max
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
=
NULL
);
// Feed feats or waves
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
// feats dim = num_frames * feature_dim
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
// feature cache only cache feature which from base extractor
virtual
size_t
Dim
()
const
{
return
base_extractor_
->
Dim
();
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
// read the last chunk data
Compute
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
void
Reset
()
{
base_extractor_
->
Reset
();
while
(
!
cache_
.
empty
())
{
...
...
@@ -45,12 +52,14 @@ class FeatureCache : public FeatureExtractorInterface {
private:
bool
Compute
();
std
::
mutex
mutex_
;
size_t
max_size_
;
std
::
queue
<
kaldi
::
Vector
<
BaseFloat
>>
cache_
;
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
std
::
mutex
mutex_
;
std
::
queue
<
kaldi
::
Vector
<
BaseFloat
>>
cache_
;
std
::
condition_variable
ready_feed_condition_
;
std
::
condition_variable
ready_read_condition_
;
// DISALLOW_COPY_AND_ASSGIN(FeatureCache);
};
...
...
speechx/speechx/frontend/feature_extractor_interface.h
浏览文件 @
b75268c5
...
...
@@ -21,17 +21,26 @@ namespace ppspeech {
class
FeatureExtractorInterface
{
public:
// accept input data, accept feature or raw waves which decided
// by the base_extractor
// Feed inputs: features(2D saved in 1D) or waveforms(1D).
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
=
0
;
// get the processed result
// the length of output = feature_row * feature_dim,
// the Matrix is squashed into Vector
// Fetch processed data: features or waveforms.
// For features(2D saved in 1D), the Matrix is squashed into Vector,
// the length of output = feature_row * feature_dim.
// For waveforms(1D), samples saved in vector.
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
outputs
)
=
0
;
// the Dim is the feature dim
// Dim is the feature dim. For waveforms(1D), Dim is zero; else is specific,
// e.g 80 for fbank.
virtual
size_t
Dim
()
const
=
0
;
// End Flag for Streaming Data.
virtual
void
SetFinished
()
=
0
;
// whether is end of Streaming Data.
virtual
bool
IsFinished
()
const
=
0
;
// Reset to start state.
virtual
void
Reset
()
=
0
;
};
...
...
speechx/speechx/frontend/linear_spectrogram.h
浏览文件 @
b75268c5
...
...
@@ -23,12 +23,14 @@ namespace ppspeech {
struct
LinearSpectrogramOptions
{
kaldi
::
FrameExtractionOptions
frame_opts
;
kaldi
::
BaseFloat
streaming_chunk
;
kaldi
::
BaseFloat
streaming_chunk
;
// second
LinearSpectrogramOptions
()
:
streaming_chunk
(
0.36
),
frame_opts
()
{}
void
Register
(
kaldi
::
OptionsItf
*
opts
)
{
opts
->
Register
(
"streaming-chunk"
,
&
streaming_chunk
,
"streaming chunk size"
);
opts
->
Register
(
"streaming-chunk"
,
&
streaming_chunk
,
"streaming chunk size, default: 0.36 sec"
);
frame_opts
.
Register
(
opts
);
}
};
...
...
utils/DER.py
浏览文件 @
b75268c5
...
...
@@ -26,9 +26,9 @@ import argparse
import
os
import
re
import
subprocess
from
distutils.util
import
strtobool
import
numpy
as
np
from
distutils.util
import
strtobool
FILE_IDS
=
re
.
compile
(
r
"(?<=Speaker Diarization for).+(?=\*\*\*)"
)
SCORED_SPEAKER_TIME
=
re
.
compile
(
r
"(?<=SCORED SPEAKER TIME =)[\d.]+"
)
...
...
utils/addjson.py
浏览文件 @
b75268c5
...
...
@@ -10,8 +10,8 @@ import codecs
import
json
import
logging
import
sys
from
distutils.util
import
strtobool
from
distutils.util
import
strtobool
from
espnet.utils.cli_utils
import
get_commandline_args
is_python2
=
sys
.
version_info
[
0
]
==
2
...
...
utils/apply-cmvn.py
浏览文件 @
b75268c5
#!/usr/bin/env python3
import
argparse
import
logging
from
distutils.util
import
strtobool
import
kaldiio
import
numpy
from
distutils.util
import
strtobool
from
paddlespeech.s2t.transform.cmvn
import
CMVN
from
paddlespeech.s2t.utils.cli_readers
import
file_reader_helper
...
...
utils/copy-feats.py
浏览文件 @
b75268c5
#!/usr/bin/env python3
import
argparse
import
logging
from
distutils.util
import
strtobool
from
paddlespeech.s2t.transform.transformation
import
Transformation
...
...
utils/merge_scp2json.py
浏览文件 @
b75268c5
...
...
@@ -5,9 +5,10 @@ import codecs
import
json
import
logging
import
sys
from
distutils.util
import
strtobool
from
io
import
open
from
distutils.util
import
strtobool
from
paddlespeech.s2t.utils.cli_utils
import
get_commandline_args
PY2
=
sys
.
version_info
[
0
]
==
2
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录