Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
a3911ab5
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
a3911ab5
编写于
6月 28, 2022
作者:
H
Hui Zhang
提交者:
GitHub
6月 28, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2089 from zh794390558/cpplint
[audio] format code
上级
fb7cbb34
fb1300f9
变更
32
展开全部
显示空白变更内容
内联
并排
Showing
32 changed file
with
2077 addition
and
1967 deletion
+2077
-1967
.pre-commit-config.yaml
.pre-commit-config.yaml
+2
-2
audio/audio/csrc/pybind/pybind.cpp
audio/audio/csrc/pybind/pybind.cpp
+6
-4
audio/audio/csrc/pybind/sox/io.cpp
audio/audio/csrc/pybind/sox/io.cpp
+45
-42
audio/audio/csrc/pybind/sox/io.h
audio/audio/csrc/pybind/sox/io.h
+2
-2
audio/audio/csrc/pybind/sox/utils.cpp
audio/audio/csrc/pybind/sox/utils.cpp
+66
-65
audio/audio/csrc/pybind/sox/utils.h
audio/audio/csrc/pybind/sox/utils.h
+15
-15
paddlespeech/__init__.py
paddlespeech/__init__.py
+0
-2
paddlespeech/audio/README.md
paddlespeech/audio/README.md
+1
-1
paddlespeech/audio/__init__.py
paddlespeech/audio/__init__.py
+1
-1
paddlespeech/audio/_class.py
paddlespeech/audio/_class.py
+4
-1
paddlespeech/audio/_extension.py
paddlespeech/audio/_extension.py
+3
-2
paddlespeech/audio/_internal/module_utils.py
paddlespeech/audio/_internal/module_utils.py
+18
-12
paddlespeech/audio/_ops.py
paddlespeech/audio/_ops.py
+5
-4
paddlespeech/audio/src/optional/optional.hpp
paddlespeech/audio/src/optional/optional.hpp
+1451
-1333
paddlespeech/audio/src/pybind/kaldi_frontend/feature_common.h
...lespeech/audio/src/pybind/kaldi_frontend/feature_common.h
+6
-11
paddlespeech/audio/src/pybind/kaldi_frontend/feature_common_inl.h
...eech/audio/src/pybind/kaldi_frontend/feature_common_inl.h
+7
-8
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.cc
...lespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.cc
+71
-70
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.h
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.h
+32
-33
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.cc
.../audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.cc
+24
-22
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h
...h/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h
+3
-7
paddlespeech/audio/src/pybind/pybind.cpp
paddlespeech/audio/src/pybind/pybind.cpp
+8
-6
paddlespeech/audio/src/pybind/sox/io.cpp
paddlespeech/audio/src/pybind/sox/io.cpp
+47
-44
paddlespeech/audio/src/pybind/sox/io.h
paddlespeech/audio/src/pybind/sox/io.h
+4
-4
paddlespeech/audio/src/pybind/sox/utils.cpp
paddlespeech/audio/src/pybind/sox/utils.cpp
+68
-67
paddlespeech/audio/src/pybind/sox/utils.h
paddlespeech/audio/src/pybind/sox/utils.h
+17
-17
paddlespeech/audio/src/sox/io.cpp
paddlespeech/audio/src/sox/io.cpp
+105
-112
paddlespeech/audio/src/sox/io.h
paddlespeech/audio/src/sox/io.h
+16
-19
paddlespeech/audio/src/utils.cpp
paddlespeech/audio/src/utils.cpp
+8
-8
setup.py
setup.py
+15
-42
speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py
speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py
+5
-0
tools/setup_helpers/__init__.py
tools/setup_helpers/__init__.py
+1
-1
tools/setup_helpers/extension.py
tools/setup_helpers/extension.py
+21
-10
未找到文件。
.pre-commit-config.yaml
浏览文件 @
a3911ab5
...
...
@@ -76,4 +76,4 @@ repos:
entry
:
bash .pre-commit-hooks/cpplint.hook
language
:
system
files
:
\.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
exclude
:
(?=speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin).*(\.cpp|\.cc|\.h|\.py)$
\ No newline at end of file
exclude
:
(?=speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|paddlespeech/audio/src/optional).*(\.cpp|\.cc|\.h|\.hpp)$
\ No newline at end of file
audio/audio/csrc/pybind/pybind.cpp
浏览文件 @
a3911ab5
#include "pybind/sox/io.h"
PYBIND11_MODULE
(
_paddleaudio
,
m
)
{
m
.
def
(
"get_info_file"
,
&
paddleaudio
::
sox_io
::
get_info_file
,
m
.
def
(
"get_info_file"
,
&
paddleaudio
::
sox_io
::
get_info_file
,
"Get metadata of audio file."
);
m
.
def
(
"get_info_fileobj"
,
&
paddleaudio
::
sox_io
::
get_info_fileobj
,
m
.
def
(
"get_info_fileobj"
,
&
paddleaudio
::
sox_io
::
get_info_fileobj
,
"Get metadata of audio in file object."
);
}
\ No newline at end of file
audio/audio/csrc/pybind/sox/io.cpp
浏览文件 @
a3911ab5
...
...
@@ -8,7 +8,8 @@ namespace sox_io {
auto
get_info_file
(
const
std
::
string
&
path
,
const
std
::
string
&
format
)
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
{
SoxFormat
sf
(
sox_open_read
(
path
.
data
(),
SoxFormat
sf
(
sox_open_read
(
path
.
data
(),
/*signal=*/
nullptr
,
/*encoding=*/
nullptr
,
/*filetype=*/
format
.
empty
()
?
nullptr
:
format
.
data
()));
...
...
@@ -38,7 +39,9 @@ auto get_info_fileobj(py::object fileobj, const std::string &format)
// If the file is shorter than 256, then libsox cannot read the header.
auto
buf_size
=
(
num_read
>
256
)
?
num_read
:
256
;
SoxFormat
sf
(
sox_open_mem_read
(
buf
,
buf_size
,
SoxFormat
sf
(
sox_open_mem_read
(
buf
,
buf_size
,
/*signal=*/
nullptr
,
/*encoding=*/
nullptr
,
/*filetype=*/
format
.
empty
()
?
nullptr
:
format
.
data
()));
...
...
audio/audio/csrc/pybind/sox/io.h
浏览文件 @
a3911ab5
audio/audio/csrc/pybind/sox/utils.cpp
浏览文件 @
a3911ab5
...
...
@@ -31,7 +31,8 @@ auto read_fileobj(py::object *fileobj, const uint64_t size, char *buffer)
}
if
(
chunk_len
>
request
)
{
std
::
ostringstream
message
;
message
<<
"Requested up to "
<<
request
<<
" bytes but, "
message
<<
"Requested up to "
<<
request
<<
" bytes but, "
<<
"received "
<<
chunk_len
<<
" bytes. "
<<
"The given object does not confirm to read protocol of file "
"object."
;
...
...
@@ -48,8 +49,8 @@ int64_t get_buffer_size() { return sox_get_globals()->bufsiz; }
void
validate_input_file
(
const
SoxFormat
&
sf
,
const
std
::
string
&
path
)
{
if
(
static_cast
<
sox_format_t
*>
(
sf
)
==
nullptr
)
{
throw
std
::
runtime_error
(
"Error loading audio file: failed to open file "
+
path
);
throw
std
::
runtime_error
(
"Error loading audio file: failed to open file "
+
path
);
}
if
(
sf
->
encoding
.
encoding
==
SOX_ENCODING_UNKNOWN
)
{
throw
std
::
runtime_error
(
"Error loading audio file: unknown encoding."
);
...
...
audio/audio/csrc/pybind/sox/utils.h
浏览文件 @
a3911ab5
...
...
@@ -22,7 +22,7 @@ struct SoxFormat {
void
close
();
private:
private:
sox_format_t
*
fd_
;
};
...
...
paddlespeech/__init__.py
浏览文件 @
a3911ab5
...
...
@@ -14,5 +14,3 @@
import
_locale
_locale
.
_getdefaultlocale
=
(
lambda
*
args
:
[
'en_US'
,
'utf8'
])
paddlespeech/audio/README.md
浏览文件 @
a3911ab5
paddlespeech/audio/__init__.py
浏览文件 @
a3911ab5
paddlespeech/audio/_class.py
浏览文件 @
a3911ab5
import
types
class
_ClassNamespace
(
types
.
ModuleType
):
def
__init__
(
self
,
name
):
super
(
_ClassNamespace
,
self
).
__init__
(
'paddlespeech.classes'
+
name
)
...
...
@@ -11,6 +12,7 @@ class _ClassNamespace(types.ModuleType):
raise
RuntimeError
(
f
'Class
{
self
.
name
}
.
{
attr
}
not registered!'
)
return
proxy
class
_Classes
(
types
.
ModuleType
):
__file__
=
'_classes.py'
...
...
@@ -43,5 +45,6 @@ class _Classes(types.ModuleType):
"""
paddlespeech
.
ops
.
load_library
(
path
)
# The classes "namespace"
classes
=
_Classes
()
paddlespeech/audio/_extension.py
浏览文件 @
a3911ab5
...
...
@@ -64,7 +64,8 @@ def _init_ffmpeg():
try
:
_load_lib
(
"libpaddlleaudio_ffmpeg"
)
except
OSError
as
err
:
raise
ImportError
(
"FFmpeg libraries are not found. Please install FFmpeg."
)
from
err
raise
ImportError
(
"FFmpeg libraries are not found. Please install FFmpeg."
)
from
err
import
paddllespeech
.
_paddlleaudio_ffmpeg
# noqa
...
...
paddlespeech/audio/_internal/module_utils.py
浏览文件 @
a3911ab5
...
...
@@ -3,6 +3,7 @@ import warnings
from
functools
import
wraps
from
typing
import
Optional
def
is_module_available
(
*
modules
:
str
)
->
bool
:
r
"""Returns if a top-level module with :attr:`name` exists *without**
importing it. This is generally safer than try-catch block around a
...
...
@@ -26,19 +27,21 @@ def requires_module(*modules: str):
return
func
else
:
req
=
f
"module:
{
missing
[
0
]
}
"
if
len
(
missing
)
==
1
else
f
"modules:
{
missing
}
"
req
=
f
"module:
{
missing
[
0
]
}
"
if
len
(
missing
)
==
1
else
f
"modules:
{
missing
}
"
def
decorator
(
func
):
@
wraps
(
func
)
def
wrapped
(
*
args
,
**
kwargs
):
raise
RuntimeError
(
f
"
{
func
.
__module__
}
.
{
func
.
__name__
}
requires
{
req
}
"
)
raise
RuntimeError
(
f
"
{
func
.
__module__
}
.
{
func
.
__name__
}
requires
{
req
}
"
)
return
wrapped
return
decorator
def
deprecated
(
direction
:
str
,
version
:
Optional
[
str
]
=
None
):
def
deprecated
(
direction
:
str
,
version
:
Optional
[
str
]
=
None
):
"""Decorator to add deprecation message
Args:
direction (str): Migration steps to be given to users.
...
...
@@ -51,8 +54,7 @@ def deprecated(direction: str, version: Optional[str] = None):
message
=
(
f
"
{
func
.
__module__
}
.
{
func
.
__name__
}
has been deprecated "
f
'and will be removed from
{
"future"
if
version
is
None
else
version
}
release. '
f
"
{
direction
}
"
)
f
"
{
direction
}
"
)
warnings
.
warn
(
message
,
stacklevel
=
2
)
return
func
(
*
args
,
**
kwargs
)
...
...
@@ -62,7 +64,7 @@ def deprecated(direction: str, version: Optional[str] = None):
def
is_kaldi_available
():
return
is_module_available
(
"paddlespeech
"
.
_paddleaudio
") and paddlespeech.ops.paddleaudio.is_kaldi_available(
)
return
is_module_available
(
"paddlespeech
.audio._paddleaudio"
)
def
requires_kaldi
():
...
...
@@ -76,7 +78,8 @@ def requires_kaldi():
def
decorator
(
func
):
@
wraps
(
func
)
def
wrapped
(
*
args
,
**
kwargs
):
raise RuntimeError(f"
{
func
.
__module__
}.{
func
.
__name__
}
requires
kaldi
")
raise
RuntimeError
(
f
"
{
func
.
__module__
}
.
{
func
.
__name__
}
requires kaldi"
)
return
wrapped
...
...
@@ -91,7 +94,8 @@ def _check_soundfile_importable():
return
True
except
Exception
:
warnings.warn("
Failed
to
import
soundfile.
'soundfile'
backend
is
not
available
.
")
warnings
.
warn
(
"Failed to import soundfile. 'soundfile' backend is not available."
)
return
False
...
...
@@ -113,7 +117,8 @@ def requires_soundfile():
def
decorator
(
func
):
@
wraps
(
func
)
def
wrapped
(
*
args
,
**
kwargs
):
raise RuntimeError(f"
{
func
.
__module__
}.{
func
.
__name__
}
requires
soundfile
")
raise
RuntimeError
(
f
"
{
func
.
__module__
}
.
{
func
.
__name__
}
requires soundfile"
)
return
wrapped
...
...
@@ -121,7 +126,7 @@ def requires_soundfile():
def
is_sox_available
():
return is_module_available("
paddlespeech
.
_paddleaudio
") and paddlespeech.ops.paddleaudio.is_sox_available(
)
return
is_module_available
(
"paddlespeech.
audio._paddleaudio"
)
def
requires_sox
():
...
...
@@ -135,7 +140,8 @@ def requires_sox():
def
decorator
(
func
):
@
wraps
(
func
)
def
wrapped
(
*
args
,
**
kwargs
):
raise RuntimeError(f"
{
func
.
__module__
}.{
func
.
__name__
}
requires
sox
")
raise
RuntimeError
(
f
"
{
func
.
__module__
}
.
{
func
.
__name__
}
requires sox"
)
return
wrapped
...
...
paddlespeech/audio/_ops.py
浏览文件 @
a3911ab5
import
contextlib
import
ctypes
import
sys
import
os
import
sys
import
types
# Query `hasattr` only once.
_SET_GLOBAL_FLAGS
=
hasattr
(
sys
,
'getdlopenflags'
)
and
hasattr
(
sys
,
'setdlopenflags'
)
_SET_GLOBAL_FLAGS
=
hasattr
(
sys
,
'getdlopenflags'
)
and
hasattr
(
sys
,
'setdlopenflags'
)
@
contextlib
.
contextmanager
...
...
paddlespeech/audio/src/optional/optional.hpp
浏览文件 @
a3911ab5
此差异已折叠。
点击以展开。
paddlespeech/audio/src/pybind/kaldi_frontend/feature_common.h
浏览文件 @
a3911ab5
...
...
@@ -14,9 +14,9 @@
#pragma once
#include "feat/feature-window.h"
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include "feat/feature-window.h"
namespace
paddleaudio
{
...
...
@@ -29,13 +29,9 @@ class StreamingFeatureTpl {
StreamingFeatureTpl
(
const
Options
&
opts
);
bool
ComputeFeature
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
wav
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
void
Reset
()
{
remained_wav_
.
Resize
(
0
);
}
void
Reset
()
{
remained_wav_
.
Resize
(
0
);
}
int
Dim
()
{
return
computer_
.
Dim
();
}
int
Dim
()
{
return
computer_
.
Dim
();
}
private:
bool
Compute
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
waves
,
...
...
@@ -49,4 +45,3 @@ class StreamingFeatureTpl {
}
// namespace ppspeech
#include "feature_common_inl.h"
paddlespeech/audio/src/pybind/kaldi_frontend/feature_common_inl.h
浏览文件 @
a3911ab5
...
...
@@ -17,15 +17,14 @@
namespace
paddleaudio
{
template
<
class
F
>
StreamingFeatureTpl
<
F
>::
StreamingFeatureTpl
(
const
Options
&
opts
)
:
opts_
(
opts
),
computer_
(
opts
),
window_function_
(
opts
.
frame_opts
)
{
//window_function_(computer_.GetFrameOptions()) { the opt set to zero
StreamingFeatureTpl
<
F
>::
StreamingFeatureTpl
(
const
Options
&
opts
)
:
opts_
(
opts
),
computer_
(
opts
),
window_function_
(
opts
.
frame_opts
)
{
// window_function_(computer_.GetFrameOptions()) { the opt set to zero
}
template
<
class
F
>
bool
StreamingFeatureTpl
<
F
>::
ComputeFeature
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
wav
,
bool
StreamingFeatureTpl
<
F
>::
ComputeFeature
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
wav
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
)
{
// append remaned waves
kaldi
::
int32
wav_len
=
wav
.
Dim
();
...
...
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.cc
浏览文件 @
a3911ab5
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include "kaldi_feature_wrapper.h"
namespace
py
=
pybind11
;
namespace
py
=
pybind11
;
bool
InitFbank
(
float
samp_freq
,
// frame opts
bool
InitFbank
(
float
samp_freq
,
// frame opts
float
frame_shift_ms
,
float
frame_length_ms
,
float
dither
,
...
...
@@ -139,5 +138,7 @@ PYBIND11_MODULE(kaldi_featurepy, m) {
m
.
def
(
"InitFbank"
,
&
InitFbank
,
"init fbank"
);
m
.
def
(
"ResetFbank"
,
&
ResetFbank
,
"reset fbank"
);
m
.
def
(
"ComputeFbank"
,
&
ComputeFbank
,
"compute fbank"
);
m
.
def
(
"ComputeFbankStreaming"
,
&
ComputeFbankStreaming
,
"compute fbank streaming"
);
m
.
def
(
"ComputeFbankStreaming"
,
&
ComputeFbankStreaming
,
"compute fbank streaming"
);
}
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.h
浏览文件 @
a3911ab5
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include "kaldi_feature_wrapper.h"
namespace
py
=
pybind11
;
namespace
py
=
pybind11
;
bool
InitFbank
(
float
samp_freq
,
// frame opts
bool
InitFbank
(
float
samp_freq
,
// frame opts
float
frame_shift_ms
,
float
frame_length_ms
,
float
dither
,
...
...
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.cc
浏览文件 @
a3911ab5
...
...
@@ -12,7 +12,8 @@ bool KaldiFeatureWrapper::InitFbank(kaldi::FbankOptions opts) {
return
true
;
}
py
::
array_t
<
double
>
KaldiFeatureWrapper
::
ComputeFbank
(
const
py
::
array_t
<
double
>
wav
)
{
py
::
array_t
<
double
>
KaldiFeatureWrapper
::
ComputeFbank
(
const
py
::
array_t
<
double
>
wav
)
{
py
::
buffer_info
info
=
wav
.
request
();
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
input_wav
(
info
.
size
);
double
*
wav_ptr
=
(
double
*
)
info
.
ptr
;
...
...
@@ -38,8 +39,8 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(const py::array_t<double>
res_ptr
++
;
}
return
result
.
reshape
({
feats
.
Dim
()
/
Dim
(),
Dim
()});
/*
return
result
.
reshape
({
feats
.
Dim
()
/
Dim
(),
Dim
()});
/*
py::buffer_info info = wav.request();
std::cout << info.size << std::endl;
auto result = py::array_t<double>(info.size);
...
...
@@ -48,9 +49,10 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(const py::array_t<double>
py::buffer_info info_re = result.request();
memcpy(input_wav.Data(), (double*)info.ptr, wav.nbytes());
memcpy((double*)info_re.ptr, input_wav.Data(), input_wav.Dim()* sizeof(double));
memcpy((double*)info_re.ptr, input_wav.Data(), input_wav.Dim()*
sizeof(double));
return result;
*/
*/
}
...
...
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h
浏览文件 @
a3911ab5
#include "base/kaldi-common.h"
#include "feature_common.h"
#include "feat/feature-fbank.h"
#include "feature_common.h"
#pragma once
...
...
@@ -14,12 +14,8 @@ class KaldiFeatureWrapper {
static
KaldiFeatureWrapper
*
GetInstance
();
bool
InitFbank
(
kaldi
::
FbankOptions
opts
);
py
::
array_t
<
double
>
ComputeFbank
(
const
py
::
array_t
<
double
>
wav
);
int
Dim
()
{
return
fbank_
->
Dim
();
}
void
ResetFbank
()
{
fbank_
->
Reset
();
}
int
Dim
()
{
return
fbank_
->
Dim
();
}
void
ResetFbank
()
{
fbank_
->
Reset
();
}
private:
std
::
unique_ptr
<
paddleaudio
::
Fbank
>
fbank_
;
...
...
paddlespeech/audio/src/pybind/pybind.cpp
浏览文件 @
a3911ab5
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//All rights reserved.
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//
All rights reserved.
#include "paddlespeech/audio/src/pybind/sox/io.h"
PYBIND11_MODULE
(
_paddleaudio
,
m
)
{
m
.
def
(
"get_info_file"
,
&
paddleaudio
::
sox_io
::
get_info_file
,
m
.
def
(
"get_info_file"
,
&
paddleaudio
::
sox_io
::
get_info_file
,
"Get metadata of audio file."
);
m
.
def
(
"get_info_fileobj"
,
&
paddleaudio
::
sox_io
::
get_info_fileobj
,
m
.
def
(
"get_info_fileobj"
,
&
paddleaudio
::
sox_io
::
get_info_fileobj
,
"Get metadata of audio in file object."
);
}
\ No newline at end of file
paddlespeech/audio/src/pybind/sox/io.cpp
浏览文件 @
a3911ab5
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//All rights reserved.
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//
All rights reserved.
#include "paddlespeech/audio/src/pybind/sox/io.h"
#include "paddlespeech/audio/src/pybind/sox/utils.h"
...
...
@@ -11,7 +11,8 @@ namespace sox_io {
auto
get_info_file
(
const
std
::
string
&
path
,
const
std
::
string
&
format
)
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
{
SoxFormat
sf
(
sox_open_read
(
path
.
data
(),
SoxFormat
sf
(
sox_open_read
(
path
.
data
(),
/*signal=*/
nullptr
,
/*encoding=*/
nullptr
,
/*filetype=*/
format
.
empty
()
?
nullptr
:
format
.
data
()));
...
...
@@ -41,7 +42,9 @@ auto get_info_fileobj(py::object fileobj, const std::string &format)
// If the file is shorter than 256, then libsox cannot read the header.
auto
buf_size
=
(
num_read
>
256
)
?
num_read
:
256
;
SoxFormat
sf
(
sox_open_mem_read
(
buf
,
buf_size
,
SoxFormat
sf
(
sox_open_mem_read
(
buf
,
buf_size
,
/*signal=*/
nullptr
,
/*encoding=*/
nullptr
,
/*filetype=*/
format
.
empty
()
?
nullptr
:
format
.
data
()));
...
...
paddlespeech/audio/src/pybind/sox/io.h
浏览文件 @
a3911ab5
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//All rights reserved.
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//
All rights reserved.
#ifndef PADDLEAUDIO_PYBIND_SOX_IO_H
#define PADDLEAUDIO_PYBIND_SOX_IO_H
...
...
paddlespeech/audio/src/pybind/sox/utils.cpp
浏览文件 @
a3911ab5
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//All rights reserved.
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//
All rights reserved.
#include "paddlespeech/audio/src/pybind/sox/utils.h"
...
...
@@ -34,7 +34,8 @@ auto read_fileobj(py::object *fileobj, const uint64_t size, char *buffer)
}
if
(
chunk_len
>
request
)
{
std
::
ostringstream
message
;
message
<<
"Requested up to "
<<
request
<<
" bytes but, "
message
<<
"Requested up to "
<<
request
<<
" bytes but, "
<<
"received "
<<
chunk_len
<<
" bytes. "
<<
"The given object does not confirm to read protocol of file "
"object."
;
...
...
@@ -51,8 +52,8 @@ int64_t get_buffer_size() { return sox_get_globals()->bufsiz; }
void
validate_input_file
(
const
SoxFormat
&
sf
,
const
std
::
string
&
path
)
{
if
(
static_cast
<
sox_format_t
*>
(
sf
)
==
nullptr
)
{
throw
std
::
runtime_error
(
"Error loading audio file: failed to open file "
+
path
);
throw
std
::
runtime_error
(
"Error loading audio file: failed to open file "
+
path
);
}
if
(
sf
->
encoding
.
encoding
==
SOX_ENCODING_UNKNOWN
)
{
throw
std
::
runtime_error
(
"Error loading audio file: unknown encoding."
);
...
...
paddlespeech/audio/src/pybind/sox/utils.h
浏览文件 @
a3911ab5
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//All rights reserved.
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//
All rights reserved.
#ifndef PADDLEAUDIO_PYBIND_SOX_UTILS_H
#define PADDLEAUDIO_PYBIND_SOX_UTILS_H
...
...
@@ -25,7 +25,7 @@ struct SoxFormat {
void
close
();
private:
private:
sox_format_t
*
fd_
;
};
...
...
paddlespeech/audio/src/sox/io.cpp
浏览文件 @
a3911ab5
...
...
@@ -11,8 +11,7 @@ namespace paddleaudio {
namespace
sox_io
{
tl
::
optional
<
MetaDataTuple
>
get_info_file
(
const
std
::
string
&
path
,
const
tl
::
optional
<
std
::
string
>&
format
)
{
const
std
::
string
&
path
,
const
tl
::
optional
<
std
::
string
>&
format
)
{
SoxFormat
sf
(
sox_open_read
(
path
.
c_str
(),
/*signal=*/
nullptr
,
...
...
@@ -73,8 +72,7 @@ tl::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
path
,
effects
,
normalize
,
channels_first
,
format
);
}
void
save_audio_file
(
const
std
::
string
&
path
,
void
save_audio_file
(
const
std
::
string
&
path
,
torch
::
Tensor
tensor
,
int64_t
sample_rate
,
bool
channels_first
,
...
...
@@ -85,25 +83,23 @@ void save_audio_file(
validate_input_tensor
(
tensor
);
const
auto
filetype
=
[
&
]()
{
if
(
format
.
has_value
())
return
format
.
value
();
if
(
format
.
has_value
())
return
format
.
value
();
return
get_filetype
(
path
);
}();
if
(
filetype
==
"amr-nb"
)
{
const
auto
num_channels
=
tensor
.
size
(
channels_first
?
0
:
1
);
TORCH_CHECK
(
num_channels
==
1
,
"amr-nb format only supports single channel audio."
);
TORCH_CHECK
(
num_channels
==
1
,
"amr-nb format only supports single channel audio."
);
}
else
if
(
filetype
==
"htk"
)
{
const
auto
num_channels
=
tensor
.
size
(
channels_first
?
0
:
1
);
TORCH_CHECK
(
num_channels
==
1
,
"htk format only supports single channel audio."
);
TORCH_CHECK
(
num_channels
==
1
,
"htk format only supports single channel audio."
);
}
else
if
(
filetype
==
"gsm"
)
{
const
auto
num_channels
=
tensor
.
size
(
channels_first
?
0
:
1
);
TORCH_CHECK
(
num_channels
==
1
,
"gsm format only supports single channel audio."
);
TORCH_CHECK
(
sample_rate
==
8000
,
TORCH_CHECK
(
num_channels
==
1
,
"gsm format only supports single channel audio."
);
TORCH_CHECK
(
sample_rate
==
8000
,
"gsm format only supports a sampling rate of 8kHz."
);
}
const
auto
signal_info
=
...
...
@@ -111,8 +107,7 @@ void save_audio_file(
const
auto
encoding_info
=
get_encodinginfo_for_save
(
filetype
,
tensor
.
dtype
(),
compression
,
encoding
,
bits_per_sample
);
SoxFormat
sf
(
sox_open_write
(
path
.
c_str
(),
SoxFormat
sf
(
sox_open_write
(
path
.
c_str
(),
&
signal_info
,
&
encoding_info
,
/*filetype=*/
filetype
.
c_str
(),
...
...
@@ -134,11 +129,9 @@ void save_audio_file(
TORCH_LIBRARY_FRAGMENT
(
paddleaudio
,
m
)
{
m
.
def
(
"paddleaudio::sox_io_get_info"
,
&
paddleaudio
::
sox_io
::
get_info_file
);
m
.
def
(
"paddleaudio::sox_io_load_audio_file"
,
m
.
def
(
"paddleaudio::sox_io_load_audio_file"
,
&
paddleaudio
::
sox_io
::
load_audio_file
);
m
.
def
(
"paddleaudio::sox_io_save_audio_file"
,
m
.
def
(
"paddleaudio::sox_io_save_audio_file"
,
&
paddleaudio
::
sox_io
::
save_audio_file
);
}
...
...
paddlespeech/audio/src/sox/io.h
浏览文件 @
a3911ab5
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//All rights reserved.
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//
All rights reserved.
#ifndef PADDLEAUDIO_SOX_IO_H
#define PADDLEAUDIO_SOX_IO_H
...
...
@@ -11,8 +11,7 @@
namespace
paddleaudio
{
namespace
sox_io
{
auto
get_effects
(
const
tl
::
optional
<
int64_t
>&
frame_offset
,
auto
get_effects
(
const
tl
::
optional
<
int64_t
>&
frame_offset
,
const
tl
::
optional
<
int64_t
>&
num_frames
)
->
std
::
vector
<
std
::
vector
<
std
::
string
>>
;
...
...
@@ -20,8 +19,7 @@ using MetaDataTuple =
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
;
tl
::
optional
<
MetaDataTuple
>
get_info_file
(
const
std
::
string
&
path
,
const
tl
::
optional
<
std
::
string
>&
format
);
const
std
::
string
&
path
,
const
tl
::
optional
<
std
::
string
>&
format
);
tl
::
optional
<
std
::
tuple
<
torch
::
Tensor
,
int64_t
>>
load_audio_file
(
const
std
::
string
&
path
,
...
...
@@ -31,8 +29,7 @@ tl::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
tl
::
optional
<
bool
>
channels_first
,
const
tl
::
optional
<
std
::
string
>&
format
);
void
save_audio_file
(
const
std
::
string
&
path
,
void
save_audio_file
(
const
std
::
string
&
path
,
torch
::
Tensor
tensor
,
int64_t
sample_rate
,
bool
channels_first
,
...
...
paddlespeech/audio/src/utils.cpp
浏览文件 @
a3911ab5
setup.py
浏览文件 @
a3911ab5
...
...
@@ -18,9 +18,9 @@ import os
import
subprocess
as
sp
import
sys
from
pathlib
import
Path
from
typing
import
Union
from
typing
import
Tuple
from
typing
import
List
from
typing
import
Tuple
from
typing
import
Union
import
distutils.command.clean
from
setuptools
import
Command
...
...
@@ -38,43 +38,13 @@ VERSION = '0.0.0'
COMMITID
=
'none'
base
=
[
"editdistance"
,
"g2p_en"
,
"g2pM"
,
"h5py"
,
"inflect"
,
"jieba"
,
"jsonlines"
,
"kaldiio"
,
"librosa==0.8.1"
,
"loguru"
,
"matplotlib"
,
"nara_wpe"
,
"onnxruntime"
,
"pandas"
,
"paddlenlp"
,
"paddlespeech_feat"
,
"praatio==5.0.0"
,
"pypinyin"
,
"pypinyin-dict"
,
"python-dateutil"
,
"pyworld"
,
"resampy==0.2.2"
,
"sacrebleu"
,
"scipy"
,
"sentencepiece~=0.1.96"
,
"soundfile~=0.10"
,
"textgrid"
,
"timer"
,
"tqdm"
,
"typeguard"
,
"visualdl"
,
"webrtcvad"
,
"yacs~=0.1.8"
,
"prettytable"
,
"zhon"
,
'colorlog'
,
'pathos == 0.2.8'
"editdistance"
,
"g2p_en"
,
"g2pM"
,
"h5py"
,
"inflect"
,
"jieba"
,
"jsonlines"
,
"kaldiio"
,
"librosa==0.8.1"
,
"loguru"
,
"matplotlib"
,
"nara_wpe"
,
"onnxruntime"
,
"pandas"
,
"paddlenlp"
,
"paddlespeech_feat"
,
"praatio==5.0.0"
,
"pypinyin"
,
"pypinyin-dict"
,
"python-dateutil"
,
"pyworld"
,
"resampy==0.2.2"
,
"sacrebleu"
,
"scipy"
,
"sentencepiece~=0.1.96"
,
"soundfile~=0.10"
,
"textgrid"
,
"timer"
,
"tqdm"
,
"typeguard"
,
"visualdl"
,
"webrtcvad"
,
"yacs~=0.1.8"
,
"prettytable"
,
"zhon"
,
'colorlog'
,
'pathos == 0.2.8'
]
server
=
[
...
...
@@ -264,6 +234,7 @@ class clean(distutils.command.clean.clean):
print
(
f
"removing '
{
path
}
' (and everything under it)"
)
shutil
.
rmtree
(
str
(
path
),
ignore_errors
=
True
)
def
main
():
sha
=
check_output
([
"git"
,
"rev-parse"
,
"HEAD"
])
# commit id
branch
=
check_output
([
"git"
,
"rev-parse"
,
"--abbrev-ref"
,
"HEAD"
])
...
...
@@ -319,7 +290,8 @@ def main():
requirements
[
"develop"
],
'doc'
:
[
"sphinx"
,
"sphinx-rtd-theme"
,
"numpydoc"
,
"myst_parser"
,
"recommonmark>=0.5.0"
,
"sphinx-markdown-tables"
,
"sphinx-autobuild"
"recommonmark>=0.5.0"
,
"sphinx-markdown-tables"
,
"sphinx-autobuild"
],
'test'
:
[
'nose'
,
'torchaudio==0.10.2'
],
},
...
...
@@ -358,5 +330,6 @@ def main():
setup
(
**
setup_info
)
if
__name__
==
'__main__'
:
main
()
speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py
浏览文件 @
a3911ab5
...
...
@@ -494,6 +494,11 @@ class SymbolicShapeInference:
# contrib ops
'Attention'
,
'BiasGelu'
,
\
'EmbedLayerNormalization'
,
\
'FastGelu'
,
'Gelu'
,
'LayerNormalization'
,
\
...
...
tools/setup_helpers/__init__.py
浏览文件 @
a3911ab5
tools/setup_helpers/extension.py
浏览文件 @
a3911ab5
import
distutils.sysconfig
import
os
import
platform
import
subprocess
from
pathlib
import
Path
import
distutils.sysconfig
from
setuptools
import
Extension
from
setuptools.command.build_ext
import
build_ext
...
...
@@ -27,11 +27,13 @@ def _get_build(var, default=False):
if
val
in
trues
:
return
True
if
val
not
in
falses
:
print
(
f
"WARNING: Unexpected environment variable value `
{
var
}
=
{
val
}
`. "
f
"Expected one of
{
trues
+
falses
}
"
)
print
(
f
"WARNING: Unexpected environment variable value `
{
var
}
=
{
val
}
`. "
f
"Expected one of
{
trues
+
falses
}
"
)
return
False
_BUILD_SOX
=
False
if
platform
.
system
()
==
"Windows"
else
_get_build
(
"BUILD_SOX"
,
True
)
_BUILD_SOX
=
False
if
platform
.
system
()
==
"Windows"
else
_get_build
(
"BUILD_SOX"
,
True
)
_BUILD_MAD
=
_get_build
(
"BUILD_MAD"
,
False
)
# _BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True)
# _BUILD_RNNT = _get_build("BUILD_RNNT", True)
...
...
@@ -40,7 +42,8 @@ _BUILD_MAD = _get_build("BUILD_MAD", False)
# _USE_ROCM = _get_build("USE_ROCM", torch.cuda.is_available() and torch.version.hip is not None)
# _USE_CUDA = _get_build("USE_CUDA", torch.cuda.is_available() and torch.version.hip is None)
# _USE_OPENMP = _get_build("USE_OPENMP", True) and "ATen parallel backend: OpenMP" in torch.__config__.parallel_info()
_PADDLESPEECH_CUDA_ARCH_LIST
=
os
.
environ
.
get
(
"PADDLESPEECH_CUDA_ARCH_LIST"
,
None
)
_PADDLESPEECH_CUDA_ARCH_LIST
=
os
.
environ
.
get
(
"PADDLESPEECH_CUDA_ARCH_LIST"
,
None
)
def
get_ext_modules
():
...
...
@@ -71,7 +74,8 @@ class CMakeBuild(build_ext):
if
ext
.
name
!=
"paddlespeech.audio._paddleaudio"
:
return
extdir
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
self
.
get_ext_filename
(
ext
.
name
)))
extdir
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
self
.
get_ext_filename
(
ext
.
name
)))
# required for auto-detection of auxiliary "native" libs
if
not
extdir
.
endswith
(
os
.
path
.
sep
):
...
...
@@ -101,8 +105,12 @@ class CMakeBuild(build_ext):
if
_PADDLESPEECH_CUDA_ARCH_LIST
is
not
None
:
# Convert MAJOR.MINOR[+PTX] list to new style one
# defined at https://cmake.org/cmake/help/latest/prop_tgt/CUDA_ARCHITECTURES.html
_arches
=
_PADDLESPEECH_CUDA_ARCH_LIST
.
replace
(
"."
,
""
).
replace
(
" "
,
";"
).
split
(
";"
)
_arches
=
[
arch
[:
-
4
]
if
arch
.
endswith
(
"+PTX"
)
else
f
"
{
arch
}
-real"
for
arch
in
_arches
]
_arches
=
_PADDLESPEECH_CUDA_ARCH_LIST
.
replace
(
"."
,
""
).
replace
(
" "
,
";"
).
split
(
";"
)
_arches
=
[
arch
[:
-
4
]
if
arch
.
endswith
(
"+PTX"
)
else
f
"
{
arch
}
-real"
for
arch
in
_arches
]
cmake_args
+=
[
f
"-DCMAKE_CUDA_ARCHITECTURES=
{
';'
.
join
(
_arches
)
}
"
]
# Default to Ninja
...
...
@@ -131,10 +139,13 @@ class CMakeBuild(build_ext):
if
not
os
.
path
.
exists
(
self
.
build_temp
):
os
.
makedirs
(
self
.
build_temp
)
print
(
f
"cmake
{
_ROOT_DIR
}
{
' '
.
join
(
cmake_args
)
}
, cwd=
{
self
.
build_temp
}
"
)
subprocess
.
check_call
([
"cmake"
,
str
(
_ROOT_DIR
)]
+
cmake_args
,
cwd
=
self
.
build_temp
)
print
(
f
"cmake
{
_ROOT_DIR
}
{
' '
.
join
(
cmake_args
)
}
, cwd=
{
self
.
build_temp
}
"
)
subprocess
.
check_call
(
[
"cmake"
,
str
(
_ROOT_DIR
)]
+
cmake_args
,
cwd
=
self
.
build_temp
)
print
(
f
"cmake --build .
{
' '
.
join
(
build_args
)
}
, cwd=
{
self
.
build_temp
}
"
)
subprocess
.
check_call
([
"cmake"
,
"--build"
,
"."
]
+
build_args
,
cwd
=
self
.
build_temp
)
subprocess
.
check_call
(
[
"cmake"
,
"--build"
,
"."
]
+
build_args
,
cwd
=
self
.
build_temp
)
def
get_ext_filename
(
self
,
fullname
):
ext_filename
=
super
().
get_ext_filename
(
fullname
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录