Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
a3911ab5
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a3911ab5
编写于
6月 28, 2022
作者:
H
Hui Zhang
提交者:
GitHub
6月 28, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2089 from zh794390558/cpplint
[audio] format code
上级
fb7cbb34
fb1300f9
变更
32
展开全部
隐藏空白更改
内联
并排
Showing
32 changed file
with
2077 addition
and
1967 deletion
+2077
-1967
.pre-commit-config.yaml
.pre-commit-config.yaml
+2
-2
audio/audio/csrc/pybind/pybind.cpp
audio/audio/csrc/pybind/pybind.cpp
+6
-4
audio/audio/csrc/pybind/sox/io.cpp
audio/audio/csrc/pybind/sox/io.cpp
+45
-42
audio/audio/csrc/pybind/sox/io.h
audio/audio/csrc/pybind/sox/io.h
+2
-2
audio/audio/csrc/pybind/sox/utils.cpp
audio/audio/csrc/pybind/sox/utils.cpp
+66
-65
audio/audio/csrc/pybind/sox/utils.h
audio/audio/csrc/pybind/sox/utils.h
+15
-15
paddlespeech/__init__.py
paddlespeech/__init__.py
+0
-2
paddlespeech/audio/README.md
paddlespeech/audio/README.md
+1
-1
paddlespeech/audio/__init__.py
paddlespeech/audio/__init__.py
+1
-1
paddlespeech/audio/_class.py
paddlespeech/audio/_class.py
+4
-1
paddlespeech/audio/_extension.py
paddlespeech/audio/_extension.py
+3
-2
paddlespeech/audio/_internal/module_utils.py
paddlespeech/audio/_internal/module_utils.py
+18
-12
paddlespeech/audio/_ops.py
paddlespeech/audio/_ops.py
+5
-4
paddlespeech/audio/src/optional/optional.hpp
paddlespeech/audio/src/optional/optional.hpp
+1451
-1333
paddlespeech/audio/src/pybind/kaldi_frontend/feature_common.h
...lespeech/audio/src/pybind/kaldi_frontend/feature_common.h
+6
-11
paddlespeech/audio/src/pybind/kaldi_frontend/feature_common_inl.h
...eech/audio/src/pybind/kaldi_frontend/feature_common_inl.h
+7
-8
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.cc
...lespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.cc
+71
-70
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.h
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.h
+32
-33
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.cc
.../audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.cc
+24
-22
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h
...h/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h
+3
-7
paddlespeech/audio/src/pybind/pybind.cpp
paddlespeech/audio/src/pybind/pybind.cpp
+8
-6
paddlespeech/audio/src/pybind/sox/io.cpp
paddlespeech/audio/src/pybind/sox/io.cpp
+47
-44
paddlespeech/audio/src/pybind/sox/io.h
paddlespeech/audio/src/pybind/sox/io.h
+4
-4
paddlespeech/audio/src/pybind/sox/utils.cpp
paddlespeech/audio/src/pybind/sox/utils.cpp
+68
-67
paddlespeech/audio/src/pybind/sox/utils.h
paddlespeech/audio/src/pybind/sox/utils.h
+17
-17
paddlespeech/audio/src/sox/io.cpp
paddlespeech/audio/src/sox/io.cpp
+105
-112
paddlespeech/audio/src/sox/io.h
paddlespeech/audio/src/sox/io.h
+16
-19
paddlespeech/audio/src/utils.cpp
paddlespeech/audio/src/utils.cpp
+8
-8
setup.py
setup.py
+15
-42
speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py
speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py
+5
-0
tools/setup_helpers/__init__.py
tools/setup_helpers/__init__.py
+1
-1
tools/setup_helpers/extension.py
tools/setup_helpers/extension.py
+21
-10
未找到文件。
.pre-commit-config.yaml
浏览文件 @
a3911ab5
...
@@ -5,7 +5,7 @@ repos:
...
@@ -5,7 +5,7 @@ repos:
-
id
:
yapf
-
id
:
yapf
files
:
\.py$
files
:
\.py$
exclude
:
(?=third_party).*(\.py)$
exclude
:
(?=third_party).*(\.py)$
-
repo
:
https://github.com/pre-commit/pre-commit-hooks
-
repo
:
https://github.com/pre-commit/pre-commit-hooks
rev
:
a11d9314b22d8f8c7556443875b731ef05965464
rev
:
a11d9314b22d8f8c7556443875b731ef05965464
hooks
:
hooks
:
...
@@ -76,4 +76,4 @@ repos:
...
@@ -76,4 +76,4 @@ repos:
entry
:
bash .pre-commit-hooks/cpplint.hook
entry
:
bash .pre-commit-hooks/cpplint.hook
language
:
system
language
:
system
files
:
\.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
files
:
\.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
exclude
:
(?=speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin).*(\.cpp|\.cc|\.h|\.py)$
exclude
:
(?=speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|paddlespeech/audio/src/optional).*(\.cpp|\.cc|\.h|\.hpp)$
\ No newline at end of file
\ No newline at end of file
audio/audio/csrc/pybind/pybind.cpp
浏览文件 @
a3911ab5
#include "pybind/sox/io.h"
#include "pybind/sox/io.h"
PYBIND11_MODULE
(
_paddleaudio
,
m
)
{
PYBIND11_MODULE
(
_paddleaudio
,
m
)
{
m
.
def
(
"get_info_file"
,
&
paddleaudio
::
sox_io
::
get_info_file
,
m
.
def
(
"get_info_file"
,
"Get metadata of audio file."
);
&
paddleaudio
::
sox_io
::
get_info_file
,
m
.
def
(
"get_info_fileobj"
,
&
paddleaudio
::
sox_io
::
get_info_fileobj
,
"Get metadata of audio file."
);
"Get metadata of audio in file object."
);
m
.
def
(
"get_info_fileobj"
,
&
paddleaudio
::
sox_io
::
get_info_fileobj
,
"Get metadata of audio in file object."
);
}
}
\ No newline at end of file
audio/audio/csrc/pybind/sox/io.cpp
浏览文件 @
a3911ab5
...
@@ -8,51 +8,54 @@ namespace sox_io {
...
@@ -8,51 +8,54 @@ namespace sox_io {
auto
get_info_file
(
const
std
::
string
&
path
,
const
std
::
string
&
format
)
auto
get_info_file
(
const
std
::
string
&
path
,
const
std
::
string
&
format
)
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
{
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
{
SoxFormat
sf
(
sox_open_read
(
path
.
data
(),
SoxFormat
sf
(
/*signal=*/
nullptr
,
sox_open_read
(
path
.
data
(),
/*encoding=*/
nullptr
,
/*signal=*/
nullptr
,
/*filetype=*/
format
.
empty
()
?
nullptr
:
format
.
data
()));
/*encoding=*/
nullptr
,
/*filetype=*/
format
.
empty
()
?
nullptr
:
format
.
data
()));
validate_input_file
(
sf
,
path
);
validate_input_file
(
sf
,
path
);
return
std
::
make_tuple
(
static_cast
<
int64_t
>
(
sf
->
signal
.
rate
),
return
std
::
make_tuple
(
static_cast
<
int64_t
>
(
sf
->
signal
.
length
/
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
signal
.
rate
),
static_cast
<
int64_t
>
(
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
signal
.
length
/
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
encoding
.
bits_per_sample
),
static_cast
<
int64_t
>
(
sf
->
signal
.
channels
),
get_encoding
(
sf
->
encoding
.
encoding
));
static_cast
<
int64_t
>
(
sf
->
encoding
.
bits_per_sample
),
get_encoding
(
sf
->
encoding
.
encoding
));
}
}
auto
get_info_fileobj
(
py
::
object
fileobj
,
const
std
::
string
&
format
)
auto
get_info_fileobj
(
py
::
object
fileobj
,
const
std
::
string
&
format
)
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
{
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
{
const
auto
capacity
=
[
&
]()
{
const
auto
capacity
=
[
&
]()
{
const
auto
bufsiz
=
get_buffer_size
();
const
auto
bufsiz
=
get_buffer_size
();
const
int64_t
kDefaultCapacityInBytes
=
4096
;
const
int64_t
kDefaultCapacityInBytes
=
4096
;
return
(
bufsiz
>
kDefaultCapacityInBytes
)
?
bufsiz
return
(
bufsiz
>
kDefaultCapacityInBytes
)
?
bufsiz
:
kDefaultCapacityInBytes
;
:
kDefaultCapacityInBytes
;
}();
}();
std
::
string
buffer
(
capacity
,
'\0'
);
std
::
string
buffer
(
capacity
,
'\0'
);
auto
*
buf
=
const_cast
<
char
*>
(
buffer
.
data
());
auto
*
buf
=
const_cast
<
char
*>
(
buffer
.
data
());
auto
num_read
=
read_fileobj
(
&
fileobj
,
capacity
,
buf
);
auto
num_read
=
read_fileobj
(
&
fileobj
,
capacity
,
buf
);
// If the file is shorter than 256, then libsox cannot read the header.
// If the file is shorter than 256, then libsox cannot read the header.
auto
buf_size
=
(
num_read
>
256
)
?
num_read
:
256
;
auto
buf_size
=
(
num_read
>
256
)
?
num_read
:
256
;
SoxFormat
sf
(
sox_open_mem_read
(
buf
,
buf_size
,
SoxFormat
sf
(
sox_open_mem_read
(
/*signal=*/
nullptr
,
buf
,
/*encoding=*/
nullptr
,
buf_size
,
/*filetype=*/
format
.
empty
()
?
nullptr
:
format
.
data
()));
/*signal=*/
nullptr
,
/*encoding=*/
nullptr
,
// In case of streamed data, length can be 0
/*filetype=*/
format
.
empty
()
?
nullptr
:
format
.
data
()));
validate_input_memfile
(
sf
);
// In case of streamed data, length can be 0
return
std
::
make_tuple
(
validate_input_memfile
(
sf
);
static_cast
<
int64_t
>
(
sf
->
signal
.
rate
),
static_cast
<
int64_t
>
(
sf
->
signal
.
length
/
sf
->
signal
.
channels
),
return
std
::
make_tuple
(
static_cast
<
int64_t
>
(
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
signal
.
rate
),
static_cast
<
int64_t
>
(
sf
->
encoding
.
bits_per_sample
),
static_cast
<
int64_t
>
(
sf
->
signal
.
length
/
sf
->
signal
.
channels
),
get_encoding
(
sf
->
encoding
.
encoding
));
static_cast
<
int64_t
>
(
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
encoding
.
bits_per_sample
),
get_encoding
(
sf
->
encoding
.
encoding
));
}
}
}
// namespace paddleaudio
}
// namespace paddleaudio
}
// namespace sox_io
}
// namespace sox_io
audio/audio/csrc/pybind/sox/io.h
浏览文件 @
a3911ab5
...
@@ -12,7 +12,7 @@ auto get_info_file(const std::string &path, const std::string &format)
...
@@ -12,7 +12,7 @@ auto get_info_file(const std::string &path, const std::string &format)
auto
get_info_fileobj
(
py
::
object
fileobj
,
const
std
::
string
&
format
)
auto
get_info_fileobj
(
py
::
object
fileobj
,
const
std
::
string
&
format
)
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
;
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
;
}
// namespace paddleaudio
}
// namespace paddleaudio
}
// namespace sox_io
}
// namespace sox_io
#endif
#endif
audio/audio/csrc/pybind/sox/utils.cpp
浏览文件 @
a3911ab5
...
@@ -12,86 +12,87 @@ sox_format_t *SoxFormat::operator->() const noexcept { return fd_; }
...
@@ -12,86 +12,87 @@ sox_format_t *SoxFormat::operator->() const noexcept { return fd_; }
SoxFormat
::
operator
sox_format_t
*
()
const
noexcept
{
return
fd_
;
}
SoxFormat
::
operator
sox_format_t
*
()
const
noexcept
{
return
fd_
;
}
void
SoxFormat
::
close
()
{
void
SoxFormat
::
close
()
{
if
(
fd_
!=
nullptr
)
{
if
(
fd_
!=
nullptr
)
{
sox_close
(
fd_
);
sox_close
(
fd_
);
fd_
=
nullptr
;
fd_
=
nullptr
;
}
}
}
}
auto
read_fileobj
(
py
::
object
*
fileobj
,
const
uint64_t
size
,
char
*
buffer
)
auto
read_fileobj
(
py
::
object
*
fileobj
,
const
uint64_t
size
,
char
*
buffer
)
->
uint64_t
{
->
uint64_t
{
uint64_t
num_read
=
0
;
uint64_t
num_read
=
0
;
while
(
num_read
<
size
)
{
while
(
num_read
<
size
)
{
auto
request
=
size
-
num_read
;
auto
request
=
size
-
num_read
;
auto
chunk
=
static_cast
<
std
::
string
>
(
auto
chunk
=
static_cast
<
std
::
string
>
(
static_cast
<
py
::
bytes
>
(
fileobj
->
attr
(
"read"
)(
request
)));
static_cast
<
py
::
bytes
>
(
fileobj
->
attr
(
"read"
)(
request
)));
auto
chunk_len
=
chunk
.
length
();
auto
chunk_len
=
chunk
.
length
();
if
(
chunk_len
==
0
)
{
if
(
chunk_len
==
0
)
{
break
;
break
;
}
}
if
(
chunk_len
>
request
)
{
if
(
chunk_len
>
request
)
{
std
::
ostringstream
message
;
std
::
ostringstream
message
;
message
<<
"Requested up to "
<<
request
<<
" bytes but, "
message
<<
"received "
<<
chunk_len
<<
" bytes. "
<<
"Requested up to "
<<
request
<<
" bytes but, "
<<
"The given object does not confirm to read protocol of file "
<<
"received "
<<
chunk_len
<<
" bytes. "
"object."
;
<<
"The given object does not confirm to read protocol of file "
throw
std
::
runtime_error
(
message
.
str
());
"object."
;
throw
std
::
runtime_error
(
message
.
str
());
}
memcpy
(
buffer
,
chunk
.
data
(),
chunk_len
);
buffer
+=
chunk_len
;
num_read
+=
chunk_len
;
}
}
memcpy
(
buffer
,
chunk
.
data
(),
chunk_len
);
return
num_read
;
buffer
+=
chunk_len
;
num_read
+=
chunk_len
;
}
return
num_read
;
}
}
int64_t
get_buffer_size
()
{
return
sox_get_globals
()
->
bufsiz
;
}
int64_t
get_buffer_size
()
{
return
sox_get_globals
()
->
bufsiz
;
}
void
validate_input_file
(
const
SoxFormat
&
sf
,
const
std
::
string
&
path
)
{
void
validate_input_file
(
const
SoxFormat
&
sf
,
const
std
::
string
&
path
)
{
if
(
static_cast
<
sox_format_t
*>
(
sf
)
==
nullptr
)
{
if
(
static_cast
<
sox_format_t
*>
(
sf
)
==
nullptr
)
{
throw
std
::
runtime_error
(
"Error loading audio file: failed to open file "
+
throw
std
::
runtime_error
(
path
);
"Error loading audio file: failed to open file "
+
path
);
}
}
if
(
sf
->
encoding
.
encoding
==
SOX_ENCODING_UNKNOWN
)
{
if
(
sf
->
encoding
.
encoding
==
SOX_ENCODING_UNKNOWN
)
{
throw
std
::
runtime_error
(
"Error loading audio file: unknown encoding."
);
throw
std
::
runtime_error
(
"Error loading audio file: unknown encoding."
);
}
}
}
}
void
validate_input_memfile
(
const
SoxFormat
&
sf
)
{
void
validate_input_memfile
(
const
SoxFormat
&
sf
)
{
return
validate_input_file
(
sf
,
"<in memory buffer>"
);
return
validate_input_file
(
sf
,
"<in memory buffer>"
);
}
}
std
::
string
get_encoding
(
sox_encoding_t
encoding
)
{
std
::
string
get_encoding
(
sox_encoding_t
encoding
)
{
switch
(
encoding
)
{
switch
(
encoding
)
{
case
SOX_ENCODING_UNKNOWN
:
case
SOX_ENCODING_UNKNOWN
:
return
"UNKNOWN"
;
return
"UNKNOWN"
;
case
SOX_ENCODING_SIGN2
:
case
SOX_ENCODING_SIGN2
:
return
"PCM_S"
;
return
"PCM_S"
;
case
SOX_ENCODING_UNSIGNED
:
case
SOX_ENCODING_UNSIGNED
:
return
"PCM_U"
;
return
"PCM_U"
;
case
SOX_ENCODING_FLOAT
:
case
SOX_ENCODING_FLOAT
:
return
"PCM_F"
;
return
"PCM_F"
;
case
SOX_ENCODING_FLAC
:
case
SOX_ENCODING_FLAC
:
return
"FLAC"
;
return
"FLAC"
;
case
SOX_ENCODING_ULAW
:
case
SOX_ENCODING_ULAW
:
return
"ULAW"
;
return
"ULAW"
;
case
SOX_ENCODING_ALAW
:
case
SOX_ENCODING_ALAW
:
return
"ALAW"
;
return
"ALAW"
;
case
SOX_ENCODING_MP3
:
case
SOX_ENCODING_MP3
:
return
"MP3"
;
return
"MP3"
;
case
SOX_ENCODING_VORBIS
:
case
SOX_ENCODING_VORBIS
:
return
"VORBIS"
;
return
"VORBIS"
;
case
SOX_ENCODING_AMR_WB
:
case
SOX_ENCODING_AMR_WB
:
return
"AMR_WB"
;
return
"AMR_WB"
;
case
SOX_ENCODING_AMR_NB
:
case
SOX_ENCODING_AMR_NB
:
return
"AMR_NB"
;
return
"AMR_NB"
;
case
SOX_ENCODING_OPUS
:
case
SOX_ENCODING_OPUS
:
return
"OPUS"
;
return
"OPUS"
;
case
SOX_ENCODING_GSM
:
case
SOX_ENCODING_GSM
:
return
"GSM"
;
return
"GSM"
;
default:
default:
return
"UNKNOWN"
;
return
"UNKNOWN"
;
}
}
}
}
}
// namespace paddleaudio
}
// namespace paddleaudio
}
// namespace sox_utils
}
// namespace sox_utils
audio/audio/csrc/pybind/sox/utils.h
浏览文件 @
a3911ab5
...
@@ -11,19 +11,19 @@ namespace sox_utils {
...
@@ -11,19 +11,19 @@ namespace sox_utils {
/// helper class to automatically close sox_format_t*
/// helper class to automatically close sox_format_t*
struct
SoxFormat
{
struct
SoxFormat
{
explicit
SoxFormat
(
sox_format_t
*
fd
)
noexcept
;
explicit
SoxFormat
(
sox_format_t
*
fd
)
noexcept
;
SoxFormat
(
const
SoxFormat
&
other
)
=
delete
;
SoxFormat
(
const
SoxFormat
&
other
)
=
delete
;
SoxFormat
(
SoxFormat
&&
other
)
=
delete
;
SoxFormat
(
SoxFormat
&&
other
)
=
delete
;
SoxFormat
&
operator
=
(
const
SoxFormat
&
other
)
=
delete
;
SoxFormat
&
operator
=
(
const
SoxFormat
&
other
)
=
delete
;
SoxFormat
&
operator
=
(
SoxFormat
&&
other
)
=
delete
;
SoxFormat
&
operator
=
(
SoxFormat
&&
other
)
=
delete
;
~
SoxFormat
();
~
SoxFormat
();
sox_format_t
*
operator
->
()
const
noexcept
;
sox_format_t
*
operator
->
()
const
noexcept
;
operator
sox_format_t
*
()
const
noexcept
;
operator
sox_format_t
*
()
const
noexcept
;
void
close
();
void
close
();
private:
private:
sox_format_t
*
fd_
;
sox_format_t
*
fd_
;
};
};
auto
read_fileobj
(
py
::
object
*
fileobj
,
uint64_t
size
,
char
*
buffer
)
->
uint64_t
;
auto
read_fileobj
(
py
::
object
*
fileobj
,
uint64_t
size
,
char
*
buffer
)
->
uint64_t
;
...
@@ -36,7 +36,7 @@ void validate_input_memfile(const SoxFormat &sf);
...
@@ -36,7 +36,7 @@ void validate_input_memfile(const SoxFormat &sf);
std
::
string
get_encoding
(
sox_encoding_t
encoding
);
std
::
string
get_encoding
(
sox_encoding_t
encoding
);
}
// namespace paddleaudio
}
// namespace paddleaudio
}
// namespace sox_utils
}
// namespace sox_utils
#endif
#endif
paddlespeech/__init__.py
浏览文件 @
a3911ab5
...
@@ -14,5 +14,3 @@
...
@@ -14,5 +14,3 @@
import
_locale
import
_locale
_locale
.
_getdefaultlocale
=
(
lambda
*
args
:
[
'en_US'
,
'utf8'
])
_locale
.
_getdefaultlocale
=
(
lambda
*
args
:
[
'en_US'
,
'utf8'
])
paddlespeech/audio/README.md
浏览文件 @
a3911ab5
...
@@ -28,4 +28,4 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
...
@@ -28,4 +28,4 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
```
```
\ No newline at end of file
paddlespeech/audio/__init__.py
浏览文件 @
a3911ab5
...
@@ -19,4 +19,4 @@ from . import io
...
@@ -19,4 +19,4 @@ from . import io
from
.
import
metric
from
.
import
metric
from
.
import
sox_effects
from
.
import
sox_effects
from
.backends
import
load
from
.backends
import
load
from
.backends
import
save
from
.backends
import
save
\ No newline at end of file
paddlespeech/audio/_class.py
浏览文件 @
a3911ab5
import
types
import
types
class
_ClassNamespace
(
types
.
ModuleType
):
class
_ClassNamespace
(
types
.
ModuleType
):
def
__init__
(
self
,
name
):
def
__init__
(
self
,
name
):
super
(
_ClassNamespace
,
self
).
__init__
(
'paddlespeech.classes'
+
name
)
super
(
_ClassNamespace
,
self
).
__init__
(
'paddlespeech.classes'
+
name
)
...
@@ -11,6 +12,7 @@ class _ClassNamespace(types.ModuleType):
...
@@ -11,6 +12,7 @@ class _ClassNamespace(types.ModuleType):
raise
RuntimeError
(
f
'Class
{
self
.
name
}
.
{
attr
}
not registered!'
)
raise
RuntimeError
(
f
'Class
{
self
.
name
}
.
{
attr
}
not registered!'
)
return
proxy
return
proxy
class
_Classes
(
types
.
ModuleType
):
class
_Classes
(
types
.
ModuleType
):
__file__
=
'_classes.py'
__file__
=
'_classes.py'
...
@@ -43,5 +45,6 @@ class _Classes(types.ModuleType):
...
@@ -43,5 +45,6 @@ class _Classes(types.ModuleType):
"""
"""
paddlespeech
.
ops
.
load_library
(
path
)
paddlespeech
.
ops
.
load_library
(
path
)
# The classes "namespace"
# The classes "namespace"
classes
=
_Classes
()
classes
=
_Classes
()
\ No newline at end of file
paddlespeech/audio/_extension.py
浏览文件 @
a3911ab5
...
@@ -64,7 +64,8 @@ def _init_ffmpeg():
...
@@ -64,7 +64,8 @@ def _init_ffmpeg():
try
:
try
:
_load_lib
(
"libpaddlleaudio_ffmpeg"
)
_load_lib
(
"libpaddlleaudio_ffmpeg"
)
except
OSError
as
err
:
except
OSError
as
err
:
raise
ImportError
(
"FFmpeg libraries are not found. Please install FFmpeg."
)
from
err
raise
ImportError
(
"FFmpeg libraries are not found. Please install FFmpeg."
)
from
err
import
paddllespeech
.
_paddlleaudio_ffmpeg
# noqa
import
paddllespeech
.
_paddlleaudio_ffmpeg
# noqa
...
@@ -95,4 +96,4 @@ def _init_extension():
...
@@ -95,4 +96,4 @@ def _init_extension():
pass
pass
_init_extension
()
_init_extension
()
\ No newline at end of file
paddlespeech/audio/_internal/module_utils.py
浏览文件 @
a3911ab5
...
@@ -3,6 +3,7 @@ import warnings
...
@@ -3,6 +3,7 @@ import warnings
from
functools
import
wraps
from
functools
import
wraps
from
typing
import
Optional
from
typing
import
Optional
def
is_module_available
(
*
modules
:
str
)
->
bool
:
def
is_module_available
(
*
modules
:
str
)
->
bool
:
r
"""Returns if a top-level module with :attr:`name` exists *without**
r
"""Returns if a top-level module with :attr:`name` exists *without**
importing it. This is generally safer than try-catch block around a
importing it. This is generally safer than try-catch block around a
...
@@ -26,19 +27,21 @@ def requires_module(*modules: str):
...
@@ -26,19 +27,21 @@ def requires_module(*modules: str):
return
func
return
func
else
:
else
:
req
=
f
"module:
{
missing
[
0
]
}
"
if
len
(
missing
)
==
1
else
f
"modules:
{
missing
}
"
req
=
f
"module:
{
missing
[
0
]
}
"
if
len
(
missing
)
==
1
else
f
"modules:
{
missing
}
"
def
decorator
(
func
):
def
decorator
(
func
):
@
wraps
(
func
)
@
wraps
(
func
)
def
wrapped
(
*
args
,
**
kwargs
):
def
wrapped
(
*
args
,
**
kwargs
):
raise
RuntimeError
(
f
"
{
func
.
__module__
}
.
{
func
.
__name__
}
requires
{
req
}
"
)
raise
RuntimeError
(
f
"
{
func
.
__module__
}
.
{
func
.
__name__
}
requires
{
req
}
"
)
return
wrapped
return
wrapped
return
decorator
return
decorator
def
deprecated
(
direction
:
str
,
version
:
Optional
[
str
]
=
None
):
def
deprecated
(
direction
:
str
,
version
:
Optional
[
str
]
=
None
):
"""Decorator to add deprecation message
"""Decorator to add deprecation message
Args:
Args:
direction (str): Migration steps to be given to users.
direction (str): Migration steps to be given to users.
...
@@ -51,8 +54,7 @@ def deprecated(direction: str, version: Optional[str] = None):
...
@@ -51,8 +54,7 @@ def deprecated(direction: str, version: Optional[str] = None):
message
=
(
message
=
(
f
"
{
func
.
__module__
}
.
{
func
.
__name__
}
has been deprecated "
f
"
{
func
.
__module__
}
.
{
func
.
__name__
}
has been deprecated "
f
'and will be removed from
{
"future"
if
version
is
None
else
version
}
release. '
f
'and will be removed from
{
"future"
if
version
is
None
else
version
}
release. '
f
"
{
direction
}
"
f
"
{
direction
}
"
)
)
warnings
.
warn
(
message
,
stacklevel
=
2
)
warnings
.
warn
(
message
,
stacklevel
=
2
)
return
func
(
*
args
,
**
kwargs
)
return
func
(
*
args
,
**
kwargs
)
...
@@ -62,7 +64,7 @@ def deprecated(direction: str, version: Optional[str] = None):
...
@@ -62,7 +64,7 @@ def deprecated(direction: str, version: Optional[str] = None):
def
is_kaldi_available
():
def
is_kaldi_available
():
return
is_module_available
(
"paddlespeech
"
.
_paddleaudio
") and paddlespeech.ops.paddleaudio.is_kaldi_available(
)
return
is_module_available
(
"paddlespeech
.audio._paddleaudio"
)
def
requires_kaldi
():
def
requires_kaldi
():
...
@@ -76,7 +78,8 @@ def requires_kaldi():
...
@@ -76,7 +78,8 @@ def requires_kaldi():
def
decorator
(
func
):
def
decorator
(
func
):
@
wraps
(
func
)
@
wraps
(
func
)
def
wrapped
(
*
args
,
**
kwargs
):
def
wrapped
(
*
args
,
**
kwargs
):
raise RuntimeError(f"
{
func
.
__module__
}.{
func
.
__name__
}
requires
kaldi
")
raise
RuntimeError
(
f
"
{
func
.
__module__
}
.
{
func
.
__name__
}
requires kaldi"
)
return
wrapped
return
wrapped
...
@@ -91,7 +94,8 @@ def _check_soundfile_importable():
...
@@ -91,7 +94,8 @@ def _check_soundfile_importable():
return
True
return
True
except
Exception
:
except
Exception
:
warnings.warn("
Failed
to
import
soundfile.
'soundfile'
backend
is
not
available
.
")
warnings
.
warn
(
"Failed to import soundfile. 'soundfile' backend is not available."
)
return
False
return
False
...
@@ -113,7 +117,8 @@ def requires_soundfile():
...
@@ -113,7 +117,8 @@ def requires_soundfile():
def
decorator
(
func
):
def
decorator
(
func
):
@
wraps
(
func
)
@
wraps
(
func
)
def
wrapped
(
*
args
,
**
kwargs
):
def
wrapped
(
*
args
,
**
kwargs
):
raise RuntimeError(f"
{
func
.
__module__
}.{
func
.
__name__
}
requires
soundfile
")
raise
RuntimeError
(
f
"
{
func
.
__module__
}
.
{
func
.
__name__
}
requires soundfile"
)
return
wrapped
return
wrapped
...
@@ -121,7 +126,7 @@ def requires_soundfile():
...
@@ -121,7 +126,7 @@ def requires_soundfile():
def
is_sox_available
():
def
is_sox_available
():
return is_module_available("
paddlespeech
.
_paddleaudio
") and paddlespeech.ops.paddleaudio.is_sox_available(
)
return
is_module_available
(
"paddlespeech.
audio._paddleaudio"
)
def
requires_sox
():
def
requires_sox
():
...
@@ -135,8 +140,9 @@ def requires_sox():
...
@@ -135,8 +140,9 @@ def requires_sox():
def
decorator
(
func
):
def
decorator
(
func
):
@
wraps
(
func
)
@
wraps
(
func
)
def
wrapped
(
*
args
,
**
kwargs
):
def
wrapped
(
*
args
,
**
kwargs
):
raise RuntimeError(f"
{
func
.
__module__
}.{
func
.
__name__
}
requires
sox
")
raise
RuntimeError
(
f
"
{
func
.
__module__
}
.
{
func
.
__name__
}
requires sox"
)
return
wrapped
return
wrapped
return
return
\ No newline at end of file
paddlespeech/audio/_ops.py
浏览文件 @
a3911ab5
import
contextlib
import
contextlib
import
ctypes
import
ctypes
import
sys
import
os
import
os
import
sys
import
types
import
types
# Query `hasattr` only once.
# Query `hasattr` only once.
_SET_GLOBAL_FLAGS
=
hasattr
(
sys
,
'getdlopenflags'
)
and
hasattr
(
sys
,
'setdlopenflags'
)
_SET_GLOBAL_FLAGS
=
hasattr
(
sys
,
'getdlopenflags'
)
and
hasattr
(
sys
,
'setdlopenflags'
)
@
contextlib
.
contextmanager
@
contextlib
.
contextmanager
...
@@ -22,7 +23,7 @@ def dl_open_guard():
...
@@ -22,7 +23,7 @@ def dl_open_guard():
if
_SET_GLOBAL_FLAGS
:
if
_SET_GLOBAL_FLAGS
:
sys
.
setdlopenflags
(
old_flags
)
sys
.
setdlopenflags
(
old_flags
)
def
resolve_library_path
(
path
:
str
)
->
str
:
def
resolve_library_path
(
path
:
str
)
->
str
:
return
os
.
path
.
realpath
(
path
)
return
os
.
path
.
realpath
(
path
)
...
@@ -59,4 +60,4 @@ class _Ops(types.ModuleType):
...
@@ -59,4 +60,4 @@ class _Ops(types.ModuleType):
# The ops "namespace"
# The ops "namespace"
ops
=
_Ops
()
ops
=
_Ops
()
\ No newline at end of file
paddlespeech/audio/src/optional/optional.hpp
浏览文件 @
a3911ab5
此差异已折叠。
点击以展开。
paddlespeech/audio/src/pybind/kaldi_frontend/feature_common.h
浏览文件 @
a3911ab5
...
@@ -14,9 +14,9 @@
...
@@ -14,9 +14,9 @@
#pragma once
#pragma once
#include "feat/feature-window.h"
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include "feat/feature-window.h"
namespace
paddleaudio
{
namespace
paddleaudio
{
...
@@ -27,18 +27,14 @@ class StreamingFeatureTpl {
...
@@ -27,18 +27,14 @@ class StreamingFeatureTpl {
public:
public:
typedef
typename
F
::
Options
Options
;
typedef
typename
F
::
Options
Options
;
StreamingFeatureTpl
(
const
Options
&
opts
);
StreamingFeatureTpl
(
const
Options
&
opts
);
bool
ComputeFeature
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
wav
,
bool
ComputeFeature
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
wav
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
void
Reset
()
{
void
Reset
()
{
remained_wav_
.
Resize
(
0
);
}
remained_wav_
.
Resize
(
0
);
}
int
Dim
()
{
int
Dim
()
{
return
computer_
.
Dim
();
}
return
computer_
.
Dim
();
}
private:
private:
bool
Compute
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
waves
,
bool
Compute
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
waves
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
Options
opts_
;
Options
opts_
;
kaldi
::
FeatureWindowFunction
window_function_
;
kaldi
::
FeatureWindowFunction
window_function_
;
...
@@ -49,4 +45,3 @@ class StreamingFeatureTpl {
...
@@ -49,4 +45,3 @@ class StreamingFeatureTpl {
}
// namespace ppspeech
}
// namespace ppspeech
#include "feature_common_inl.h"
#include "feature_common_inl.h"
paddlespeech/audio/src/pybind/kaldi_frontend/feature_common_inl.h
浏览文件 @
a3911ab5
...
@@ -17,16 +17,15 @@
...
@@ -17,16 +17,15 @@
namespace
paddleaudio
{
namespace
paddleaudio
{
template
<
class
F
>
template
<
class
F
>
StreamingFeatureTpl
<
F
>::
StreamingFeatureTpl
(
StreamingFeatureTpl
<
F
>::
StreamingFeatureTpl
(
const
Options
&
opts
)
const
Options
&
opts
)
:
opts_
(
opts
),
computer_
(
opts
),
window_function_
(
opts
.
frame_opts
)
{
:
opts_
(
opts
),
computer_
(
opts
),
// window_function_(computer_.GetFrameOptions()) { the opt set to zero
window_function_
(
opts
.
frame_opts
)
{
//window_function_(computer_.GetFrameOptions()) { the opt set to zero
}
}
template
<
class
F
>
template
<
class
F
>
bool
StreamingFeatureTpl
<
F
>::
ComputeFeature
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
wav
,
bool
StreamingFeatureTpl
<
F
>::
ComputeFeature
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
)
{
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
wav
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
)
{
// append remaned waves
// append remaned waves
kaldi
::
int32
wav_len
=
wav
.
Dim
();
kaldi
::
int32
wav_len
=
wav
.
Dim
();
if
(
wav_len
==
0
)
return
false
;
if
(
wav_len
==
0
)
return
false
;
...
@@ -61,7 +60,7 @@ bool StreamingFeatureTpl<F>::Compute(
...
@@ -61,7 +60,7 @@ bool StreamingFeatureTpl<F>::Compute(
kaldi
::
int32
frame_length
=
frame_opts
.
WindowSize
();
kaldi
::
int32
frame_length
=
frame_opts
.
WindowSize
();
kaldi
::
int32
sample_rate
=
frame_opts
.
samp_freq
;
kaldi
::
int32
sample_rate
=
frame_opts
.
samp_freq
;
if
(
num_samples
<
frame_length
)
{
if
(
num_samples
<
frame_length
)
{
return
false
;
return
false
;
}
}
kaldi
::
int32
num_frames
=
kaldi
::
NumFrames
(
num_samples
,
frame_opts
);
kaldi
::
int32
num_frames
=
kaldi
::
NumFrames
(
num_samples
,
frame_opts
);
...
...
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.cc
浏览文件 @
a3911ab5
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include "kaldi_feature_wrapper.h"
#include "kaldi_feature_wrapper.h"
namespace
py
=
pybind11
;
namespace
py
=
pybind11
;
bool
InitFbank
(
bool
InitFbank
(
float
samp_freq
,
// frame opts
float
samp_freq
,
// frame opts
float
frame_shift_ms
,
float
frame_shift_ms
,
float
frame_length_ms
,
float
frame_length_ms
,
float
dither
,
float
dither
,
float
preemph_coeff
,
float
preemph_coeff
,
bool
remove_dc_offset
,
bool
remove_dc_offset
,
std
::
string
window_type
,
// e.g. Hamming window
std
::
string
window_type
,
// e.g. Hamming window
bool
round_to_power_of_two
,
bool
round_to_power_of_two
,
float
blackman_coeff
,
float
blackman_coeff
,
bool
snip_edges
,
bool
snip_edges
,
bool
allow_downsample
,
bool
allow_downsample
,
bool
allow_upsample
,
bool
allow_upsample
,
int
max_feature_vectors
,
int
max_feature_vectors
,
int
num_bins
,
// mel opts
int
num_bins
,
// mel opts
float
low_freq
,
float
low_freq
,
float
high_freq
,
float
high_freq
,
float
vtln_low
,
float
vtln_low
,
float
vtln_high
,
float
vtln_high
,
bool
debug_mel
,
bool
debug_mel
,
bool
htk_mode
,
bool
htk_mode
,
bool
use_energy
,
// fbank opts
bool
use_energy
,
// fbank opts
float
energy_floor
,
float
energy_floor
,
bool
raw_energy
,
bool
raw_energy
,
bool
htk_compat
,
bool
htk_compat
,
bool
use_log_fbank
,
bool
use_log_fbank
,
bool
use_power
)
{
bool
use_power
)
{
kaldi
::
FbankOptions
opts
;
kaldi
::
FbankOptions
opts
;
opts
.
frame_opts
.
samp_freq
=
samp_freq
;
// frame opts
opts
.
frame_opts
.
samp_freq
=
samp_freq
;
// frame opts
opts
.
frame_opts
.
frame_shift_ms
=
frame_shift_ms
;
opts
.
frame_opts
.
frame_shift_ms
=
frame_shift_ms
;
opts
.
frame_opts
.
frame_length_ms
=
frame_length_ms
;
opts
.
frame_opts
.
frame_length_ms
=
frame_length_ms
;
opts
.
frame_opts
.
dither
=
dither
;
opts
.
frame_opts
.
dither
=
dither
;
opts
.
frame_opts
.
preemph_coeff
=
preemph_coeff
;
opts
.
frame_opts
.
preemph_coeff
=
preemph_coeff
;
opts
.
frame_opts
.
remove_dc_offset
=
remove_dc_offset
;
opts
.
frame_opts
.
remove_dc_offset
=
remove_dc_offset
;
opts
.
frame_opts
.
window_type
=
window_type
;
opts
.
frame_opts
.
window_type
=
window_type
;
opts
.
frame_opts
.
round_to_power_of_two
=
round_to_power_of_two
;
opts
.
frame_opts
.
round_to_power_of_two
=
round_to_power_of_two
;
opts
.
frame_opts
.
blackman_coeff
=
blackman_coeff
;
opts
.
frame_opts
.
blackman_coeff
=
blackman_coeff
;
opts
.
frame_opts
.
snip_edges
=
snip_edges
;
opts
.
frame_opts
.
snip_edges
=
snip_edges
;
...
@@ -48,7 +47,7 @@ bool InitFbank(
...
@@ -48,7 +47,7 @@ bool InitFbank(
opts
.
frame_opts
.
allow_upsample
=
allow_upsample
;
opts
.
frame_opts
.
allow_upsample
=
allow_upsample
;
opts
.
frame_opts
.
max_feature_vectors
=
max_feature_vectors
;
opts
.
frame_opts
.
max_feature_vectors
=
max_feature_vectors
;
opts
.
mel_opts
.
num_bins
=
num_bins
;
// mel opts
opts
.
mel_opts
.
num_bins
=
num_bins
;
// mel opts
opts
.
mel_opts
.
low_freq
=
low_freq
;
opts
.
mel_opts
.
low_freq
=
low_freq
;
opts
.
mel_opts
.
high_freq
=
high_freq
;
opts
.
mel_opts
.
high_freq
=
high_freq
;
opts
.
mel_opts
.
vtln_low
=
vtln_low
;
opts
.
mel_opts
.
vtln_low
=
vtln_low
;
...
@@ -56,7 +55,7 @@ bool InitFbank(
...
@@ -56,7 +55,7 @@ bool InitFbank(
opts
.
mel_opts
.
debug_mel
=
debug_mel
;
opts
.
mel_opts
.
debug_mel
=
debug_mel
;
opts
.
mel_opts
.
htk_mode
=
htk_mode
;
opts
.
mel_opts
.
htk_mode
=
htk_mode
;
opts
.
use_energy
=
use_energy
;
// fbank opts
opts
.
use_energy
=
use_energy
;
// fbank opts
opts
.
energy_floor
=
energy_floor
;
opts
.
energy_floor
=
energy_floor
;
opts
.
raw_energy
=
raw_energy
;
opts
.
raw_energy
=
raw_energy
;
opts
.
htk_compat
=
htk_compat
;
opts
.
htk_compat
=
htk_compat
;
...
@@ -67,71 +66,71 @@ bool InitFbank(
...
@@ -67,71 +66,71 @@ bool InitFbank(
}
}
py
::
array_t
<
double
>
ComputeFbankStreaming
(
const
py
::
array_t
<
double
>&
wav
)
{
py
::
array_t
<
double
>
ComputeFbankStreaming
(
const
py
::
array_t
<
double
>&
wav
)
{
return
paddleaudio
::
KaldiFeatureWrapper
::
GetInstance
()
->
ComputeFbank
(
wav
);
return
paddleaudio
::
KaldiFeatureWrapper
::
GetInstance
()
->
ComputeFbank
(
wav
);
}
}
py
::
array_t
<
double
>
ComputeFbank
(
py
::
array_t
<
double
>
ComputeFbank
(
float
samp_freq
,
// frame opts
float
samp_freq
,
// frame opts
float
frame_shift_ms
,
float
frame_shift_ms
,
float
frame_length_ms
,
float
frame_length_ms
,
float
dither
,
float
dither
,
float
preemph_coeff
,
float
preemph_coeff
,
bool
remove_dc_offset
,
bool
remove_dc_offset
,
std
::
string
window_type
,
// e.g. Hamming window
std
::
string
window_type
,
// e.g. Hamming window
bool
round_to_power_of_two
,
bool
round_to_power_of_two
,
float
blackman_coeff
,
float
blackman_coeff
,
bool
snip_edges
,
bool
snip_edges
,
bool
allow_downsample
,
bool
allow_downsample
,
bool
allow_upsample
,
bool
allow_upsample
,
int
max_feature_vectors
,
int
max_feature_vectors
,
int
num_bins
,
// mel opts
int
num_bins
,
// mel opts
float
low_freq
,
float
low_freq
,
float
high_freq
,
float
high_freq
,
float
vtln_low
,
float
vtln_low
,
float
vtln_high
,
float
vtln_high
,
bool
debug_mel
,
bool
debug_mel
,
bool
htk_mode
,
bool
htk_mode
,
bool
use_energy
,
// fbank opts
bool
use_energy
,
// fbank opts
float
energy_floor
,
float
energy_floor
,
bool
raw_energy
,
bool
raw_energy
,
bool
htk_compat
,
bool
htk_compat
,
bool
use_log_fbank
,
bool
use_log_fbank
,
bool
use_power
,
bool
use_power
,
const
py
::
array_t
<
double
>&
wav
)
{
const
py
::
array_t
<
double
>&
wav
)
{
InitFbank
(
samp_freq
,
// frame opts
InitFbank
(
samp_freq
,
// frame opts
frame_shift_ms
,
frame_shift_ms
,
frame_length_ms
,
frame_length_ms
,
dither
,
dither
,
preemph_coeff
,
preemph_coeff
,
remove_dc_offset
,
remove_dc_offset
,
window_type
,
// e.g. Hamming window
window_type
,
// e.g. Hamming window
round_to_power_of_two
,
round_to_power_of_two
,
blackman_coeff
,
blackman_coeff
,
snip_edges
,
snip_edges
,
allow_downsample
,
allow_downsample
,
allow_upsample
,
allow_upsample
,
max_feature_vectors
,
max_feature_vectors
,
num_bins
,
// mel opts
num_bins
,
// mel opts
low_freq
,
low_freq
,
high_freq
,
high_freq
,
vtln_low
,
vtln_low
,
vtln_high
,
vtln_high
,
debug_mel
,
debug_mel
,
htk_mode
,
htk_mode
,
use_energy
,
// fbank opts
use_energy
,
// fbank opts
energy_floor
,
energy_floor
,
raw_energy
,
raw_energy
,
htk_compat
,
htk_compat
,
use_log_fbank
,
use_log_fbank
,
use_power
);
use_power
);
py
::
array_t
<
double
>
result
=
ComputeFbankStreaming
(
wav
);
py
::
array_t
<
double
>
result
=
ComputeFbankStreaming
(
wav
);
paddleaudio
::
KaldiFeatureWrapper
::
GetInstance
()
->
ResetFbank
();
paddleaudio
::
KaldiFeatureWrapper
::
GetInstance
()
->
ResetFbank
();
return
result
;
return
result
;
}
}
void
ResetFbank
()
{
void
ResetFbank
()
{
paddleaudio
::
KaldiFeatureWrapper
::
GetInstance
()
->
ResetFbank
();
paddleaudio
::
KaldiFeatureWrapper
::
GetInstance
()
->
ResetFbank
();
}
}
PYBIND11_MODULE
(
kaldi_featurepy
,
m
)
{
PYBIND11_MODULE
(
kaldi_featurepy
,
m
)
{
...
@@ -139,5 +138,7 @@ PYBIND11_MODULE(kaldi_featurepy, m) {
...
@@ -139,5 +138,7 @@ PYBIND11_MODULE(kaldi_featurepy, m) {
m
.
def
(
"InitFbank"
,
&
InitFbank
,
"init fbank"
);
m
.
def
(
"InitFbank"
,
&
InitFbank
,
"init fbank"
);
m
.
def
(
"ResetFbank"
,
&
ResetFbank
,
"reset fbank"
);
m
.
def
(
"ResetFbank"
,
&
ResetFbank
,
"reset fbank"
);
m
.
def
(
"ComputeFbank"
,
&
ComputeFbank
,
"compute fbank"
);
m
.
def
(
"ComputeFbank"
,
&
ComputeFbank
,
"compute fbank"
);
m
.
def
(
"ComputeFbankStreaming"
,
&
ComputeFbankStreaming
,
"compute fbank streaming"
);
m
.
def
(
"ComputeFbankStreaming"
,
&
ComputeFbankStreaming
,
"compute fbank streaming"
);
}
}
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature.h
浏览文件 @
a3911ab5
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include "kaldi_feature_wrapper.h"
#include "kaldi_feature_wrapper.h"
namespace
py
=
pybind11
;
namespace
py
=
pybind11
;
bool
InitFbank
(
bool
InitFbank
(
float
samp_freq
,
// frame opts
float
samp_freq
,
// frame opts
float
frame_shift_ms
,
float
frame_shift_ms
,
float
frame_length_ms
,
float
frame_length_ms
,
float
dither
,
float
dither
,
float
preemph_coeff
,
float
preemph_coeff
,
bool
remove_dc_offset
,
bool
remove_dc_offset
,
std
::
string
window_type
,
// e.g. Hamming window
std
::
string
window_type
,
// e.g. Hamming window
bool
round_to_power_of_two
,
bool
round_to_power_of_two
,
float
blackman_coeff
,
float
blackman_coeff
,
bool
snip_edges
,
bool
snip_edges
,
bool
allow_downsample
,
bool
allow_downsample
,
bool
allow_upsample
,
bool
allow_upsample
,
int
max_feature_vectors
,
int
max_feature_vectors
,
int
num_bins
,
// mel opts
int
num_bins
,
// mel opts
float
low_freq
,
float
low_freq
,
float
high_freq
,
float
high_freq
,
float
vtln_low
,
float
vtln_low
,
float
vtln_high
,
float
vtln_high
,
bool
debug_mel
,
bool
debug_mel
,
bool
htk_mode
,
bool
htk_mode
,
bool
use_energy
,
// fbank opts
bool
use_energy
,
// fbank opts
float
energy_floor
,
float
energy_floor
,
bool
raw_energy
,
bool
raw_energy
,
bool
htk_compat
,
bool
htk_compat
,
bool
use_log_fbank
,
bool
use_log_fbank
,
bool
use_power
);
bool
use_power
);
py
::
array_t
<
double
>
ComputeFbank
(
py
::
array_t
<
double
>
ComputeFbank
(
float
samp_freq
,
// frame opts
float
samp_freq
,
// frame opts
float
frame_shift_ms
,
float
frame_shift_ms
,
float
frame_length_ms
,
float
frame_length_ms
,
float
dither
,
float
dither
,
float
preemph_coeff
,
float
preemph_coeff
,
bool
remove_dc_offset
,
bool
remove_dc_offset
,
std
::
string
window_type
,
// e.g. Hamming window
std
::
string
window_type
,
// e.g. Hamming window
bool
round_to_power_of_two
,
bool
round_to_power_of_two
,
kaldi
::
BaseFloat
blackman_coeff
,
kaldi
::
BaseFloat
blackman_coeff
,
bool
snip_edges
,
bool
snip_edges
,
bool
allow_downsample
,
bool
allow_downsample
,
bool
allow_upsample
,
bool
allow_upsample
,
int
max_feature_vectors
,
int
max_feature_vectors
,
int
num_bins
,
// mel opts
int
num_bins
,
// mel opts
float
low_freq
,
float
low_freq
,
float
high_freq
,
float
high_freq
,
float
vtln_low
,
float
vtln_low
,
float
vtln_high
,
float
vtln_high
,
bool
debug_mel
,
bool
debug_mel
,
bool
htk_mode
,
bool
htk_mode
,
bool
use_energy
,
// fbank opts
bool
use_energy
,
// fbank opts
float
energy_floor
,
float
energy_floor
,
bool
raw_energy
,
bool
raw_energy
,
bool
htk_compat
,
bool
htk_compat
,
...
...
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.cc
浏览文件 @
a3911ab5
...
@@ -8,17 +8,18 @@ KaldiFeatureWrapper* KaldiFeatureWrapper::GetInstance() {
...
@@ -8,17 +8,18 @@ KaldiFeatureWrapper* KaldiFeatureWrapper::GetInstance() {
}
}
bool
KaldiFeatureWrapper
::
InitFbank
(
kaldi
::
FbankOptions
opts
)
{
bool
KaldiFeatureWrapper
::
InitFbank
(
kaldi
::
FbankOptions
opts
)
{
fbank_
.
reset
(
new
Fbank
(
opts
));
fbank_
.
reset
(
new
Fbank
(
opts
));
return
true
;
return
true
;
}
}
py
::
array_t
<
double
>
KaldiFeatureWrapper
::
ComputeFbank
(
const
py
::
array_t
<
double
>
wav
)
{
py
::
array_t
<
double
>
KaldiFeatureWrapper
::
ComputeFbank
(
const
py
::
array_t
<
double
>
wav
)
{
py
::
buffer_info
info
=
wav
.
request
();
py
::
buffer_info
info
=
wav
.
request
();
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
input_wav
(
info
.
size
);
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
input_wav
(
info
.
size
);
double
*
wav_ptr
=
(
double
*
)
info
.
ptr
;
double
*
wav_ptr
=
(
double
*
)
info
.
ptr
;
for
(
int
idx
=
0
;
idx
<
info
.
size
;
++
idx
)
{
for
(
int
idx
=
0
;
idx
<
info
.
size
;
++
idx
)
{
input_wav
(
idx
)
=
*
wav_ptr
;
input_wav
(
idx
)
=
*
wav_ptr
;
wav_ptr
++
;
wav_ptr
++
;
}
}
...
@@ -28,8 +29,8 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(const py::array_t<double>
...
@@ -28,8 +29,8 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(const py::array_t<double>
auto
result
=
py
::
array_t
<
double
>
(
feats
.
Dim
());
auto
result
=
py
::
array_t
<
double
>
(
feats
.
Dim
());
py
::
buffer_info
xs
=
result
.
request
();
py
::
buffer_info
xs
=
result
.
request
();
for
(
int
idx
=
0
;
idx
<
10
;
++
idx
)
{
for
(
int
idx
=
0
;
idx
<
10
;
++
idx
)
{
float
val
=
feats
(
idx
);
float
val
=
feats
(
idx
);
std
::
cout
<<
val
<<
" "
;
std
::
cout
<<
val
<<
" "
;
}
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
double
*
res_ptr
=
(
double
*
)
xs
.
ptr
;
double
*
res_ptr
=
(
double
*
)
xs
.
ptr
;
...
@@ -38,20 +39,21 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(const py::array_t<double>
...
@@ -38,20 +39,21 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(const py::array_t<double>
res_ptr
++
;
res_ptr
++
;
}
}
return
result
.
reshape
({
feats
.
Dim
()
/
Dim
(),
Dim
()});
return
result
.
reshape
({
feats
.
Dim
()
/
Dim
(),
Dim
()});
/*
/*
py::buffer_info info = wav.request();
py::buffer_info info = wav.request();
std::cout << info.size << std::endl;
std::cout << info.size << std::endl;
auto result = py::array_t<double>(info.size);
auto result = py::array_t<double>(info.size);
//kaldi::Vector<kaldi::BaseFloat> input_wav(info.size);
//kaldi::Vector<kaldi::BaseFloat> input_wav(info.size);
kaldi::Vector<double> input_wav(info.size);
kaldi::Vector<double> input_wav(info.size);
py::buffer_info info_re = result.request();
py::buffer_info info_re = result.request();
memcpy(input_wav.Data(), (double*)info.ptr, wav.nbytes());
memcpy(input_wav.Data(), (double*)info.ptr, wav.nbytes());
memcpy((double*)info_re.ptr, input_wav.Data(), input_wav.Dim()* sizeof(double));
memcpy((double*)info_re.ptr, input_wav.Data(), input_wav.Dim()*
return result;
sizeof(double));
*/
return result;
*/
}
}
}
// namespace paddleaudio
}
// namespace paddleaudio
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h
浏览文件 @
a3911ab5
#include "base/kaldi-common.h"
#include "base/kaldi-common.h"
#include "feature_common.h"
#include "feat/feature-fbank.h"
#include "feat/feature-fbank.h"
#include "feature_common.h"
#pragma once
#pragma once
...
@@ -14,12 +14,8 @@ class KaldiFeatureWrapper {
...
@@ -14,12 +14,8 @@ class KaldiFeatureWrapper {
static
KaldiFeatureWrapper
*
GetInstance
();
static
KaldiFeatureWrapper
*
GetInstance
();
bool
InitFbank
(
kaldi
::
FbankOptions
opts
);
bool
InitFbank
(
kaldi
::
FbankOptions
opts
);
py
::
array_t
<
double
>
ComputeFbank
(
const
py
::
array_t
<
double
>
wav
);
py
::
array_t
<
double
>
ComputeFbank
(
const
py
::
array_t
<
double
>
wav
);
int
Dim
()
{
int
Dim
()
{
return
fbank_
->
Dim
();
}
return
fbank_
->
Dim
();
void
ResetFbank
()
{
fbank_
->
Reset
();
}
}
void
ResetFbank
()
{
fbank_
->
Reset
();
}
private:
private:
std
::
unique_ptr
<
paddleaudio
::
Fbank
>
fbank_
;
std
::
unique_ptr
<
paddleaudio
::
Fbank
>
fbank_
;
...
...
paddlespeech/audio/src/pybind/pybind.cpp
浏览文件 @
a3911ab5
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//All rights reserved.
//
All rights reserved.
#include "paddlespeech/audio/src/pybind/sox/io.h"
#include "paddlespeech/audio/src/pybind/sox/io.h"
PYBIND11_MODULE
(
_paddleaudio
,
m
)
{
PYBIND11_MODULE
(
_paddleaudio
,
m
)
{
m
.
def
(
"get_info_file"
,
&
paddleaudio
::
sox_io
::
get_info_file
,
m
.
def
(
"get_info_file"
,
"Get metadata of audio file."
);
&
paddleaudio
::
sox_io
::
get_info_file
,
m
.
def
(
"get_info_fileobj"
,
&
paddleaudio
::
sox_io
::
get_info_fileobj
,
"Get metadata of audio file."
);
"Get metadata of audio in file object."
);
m
.
def
(
"get_info_fileobj"
,
&
paddleaudio
::
sox_io
::
get_info_fileobj
,
"Get metadata of audio in file object."
);
}
}
\ No newline at end of file
paddlespeech/audio/src/pybind/sox/io.cpp
浏览文件 @
a3911ab5
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//All rights reserved.
//
All rights reserved.
#include "paddlespeech/audio/src/pybind/sox/io.h"
#include "paddlespeech/audio/src/pybind/sox/io.h"
#include "paddlespeech/audio/src/pybind/sox/utils.h"
#include "paddlespeech/audio/src/pybind/sox/utils.h"
...
@@ -11,51 +11,54 @@ namespace sox_io {
...
@@ -11,51 +11,54 @@ namespace sox_io {
auto
get_info_file
(
const
std
::
string
&
path
,
const
std
::
string
&
format
)
auto
get_info_file
(
const
std
::
string
&
path
,
const
std
::
string
&
format
)
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
{
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
{
SoxFormat
sf
(
sox_open_read
(
path
.
data
(),
SoxFormat
sf
(
/*signal=*/
nullptr
,
sox_open_read
(
path
.
data
(),
/*encoding=*/
nullptr
,
/*signal=*/
nullptr
,
/*filetype=*/
format
.
empty
()
?
nullptr
:
format
.
data
()));
/*encoding=*/
nullptr
,
/*filetype=*/
format
.
empty
()
?
nullptr
:
format
.
data
()));
validate_input_file
(
sf
,
path
);
validate_input_file
(
sf
,
path
);
return
std
::
make_tuple
(
static_cast
<
int64_t
>
(
sf
->
signal
.
rate
),
return
std
::
make_tuple
(
static_cast
<
int64_t
>
(
sf
->
signal
.
length
/
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
signal
.
rate
),
static_cast
<
int64_t
>
(
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
signal
.
length
/
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
encoding
.
bits_per_sample
),
static_cast
<
int64_t
>
(
sf
->
signal
.
channels
),
get_encoding
(
sf
->
encoding
.
encoding
));
static_cast
<
int64_t
>
(
sf
->
encoding
.
bits_per_sample
),
get_encoding
(
sf
->
encoding
.
encoding
));
}
}
auto
get_info_fileobj
(
py
::
object
fileobj
,
const
std
::
string
&
format
)
auto
get_info_fileobj
(
py
::
object
fileobj
,
const
std
::
string
&
format
)
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
{
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
{
const
auto
capacity
=
[
&
]()
{
const
auto
capacity
=
[
&
]()
{
const
auto
bufsiz
=
get_buffer_size
();
const
auto
bufsiz
=
get_buffer_size
();
const
int64_t
kDefaultCapacityInBytes
=
4096
;
const
int64_t
kDefaultCapacityInBytes
=
4096
;
return
(
bufsiz
>
kDefaultCapacityInBytes
)
?
bufsiz
return
(
bufsiz
>
kDefaultCapacityInBytes
)
?
bufsiz
:
kDefaultCapacityInBytes
;
:
kDefaultCapacityInBytes
;
}();
}();
std
::
string
buffer
(
capacity
,
'\0'
);
std
::
string
buffer
(
capacity
,
'\0'
);
auto
*
buf
=
const_cast
<
char
*>
(
buffer
.
data
());
auto
*
buf
=
const_cast
<
char
*>
(
buffer
.
data
());
auto
num_read
=
read_fileobj
(
&
fileobj
,
capacity
,
buf
);
auto
num_read
=
read_fileobj
(
&
fileobj
,
capacity
,
buf
);
// If the file is shorter than 256, then libsox cannot read the header.
// If the file is shorter than 256, then libsox cannot read the header.
auto
buf_size
=
(
num_read
>
256
)
?
num_read
:
256
;
auto
buf_size
=
(
num_read
>
256
)
?
num_read
:
256
;
SoxFormat
sf
(
sox_open_mem_read
(
buf
,
buf_size
,
SoxFormat
sf
(
sox_open_mem_read
(
/*signal=*/
nullptr
,
buf
,
/*encoding=*/
nullptr
,
buf_size
,
/*filetype=*/
format
.
empty
()
?
nullptr
:
format
.
data
()));
/*signal=*/
nullptr
,
/*encoding=*/
nullptr
,
// In case of streamed data, length can be 0
/*filetype=*/
format
.
empty
()
?
nullptr
:
format
.
data
()));
validate_input_memfile
(
sf
);
// In case of streamed data, length can be 0
return
std
::
make_tuple
(
validate_input_memfile
(
sf
);
static_cast
<
int64_t
>
(
sf
->
signal
.
rate
),
static_cast
<
int64_t
>
(
sf
->
signal
.
length
/
sf
->
signal
.
channels
),
return
std
::
make_tuple
(
static_cast
<
int64_t
>
(
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
signal
.
rate
),
static_cast
<
int64_t
>
(
sf
->
encoding
.
bits_per_sample
),
static_cast
<
int64_t
>
(
sf
->
signal
.
length
/
sf
->
signal
.
channels
),
get_encoding
(
sf
->
encoding
.
encoding
));
static_cast
<
int64_t
>
(
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
encoding
.
bits_per_sample
),
get_encoding
(
sf
->
encoding
.
encoding
));
}
}
}
// namespace paddleaudio
}
// namespace paddleaudio
}
// namespace sox_io
}
// namespace sox_io
paddlespeech/audio/src/pybind/sox/io.h
浏览文件 @
a3911ab5
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//All rights reserved.
//
All rights reserved.
#ifndef PADDLEAUDIO_PYBIND_SOX_IO_H
#ifndef PADDLEAUDIO_PYBIND_SOX_IO_H
#define PADDLEAUDIO_PYBIND_SOX_IO_H
#define PADDLEAUDIO_PYBIND_SOX_IO_H
...
@@ -15,7 +15,7 @@ auto get_info_file(const std::string &path, const std::string &format)
...
@@ -15,7 +15,7 @@ auto get_info_file(const std::string &path, const std::string &format)
auto
get_info_fileobj
(
py
::
object
fileobj
,
const
std
::
string
&
format
)
auto
get_info_fileobj
(
py
::
object
fileobj
,
const
std
::
string
&
format
)
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
;
->
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
;
}
// namespace paddleaudio
}
// namespace paddleaudio
}
// namespace sox_io
}
// namespace sox_io
#endif
#endif
paddlespeech/audio/src/pybind/sox/utils.cpp
浏览文件 @
a3911ab5
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//All rights reserved.
//
All rights reserved.
#include "paddlespeech/audio/src/pybind/sox/utils.h"
#include "paddlespeech/audio/src/pybind/sox/utils.h"
...
@@ -15,86 +15,87 @@ sox_format_t *SoxFormat::operator->() const noexcept { return fd_; }
...
@@ -15,86 +15,87 @@ sox_format_t *SoxFormat::operator->() const noexcept { return fd_; }
SoxFormat
::
operator
sox_format_t
*
()
const
noexcept
{
return
fd_
;
}
SoxFormat
::
operator
sox_format_t
*
()
const
noexcept
{
return
fd_
;
}
void
SoxFormat
::
close
()
{
void
SoxFormat
::
close
()
{
if
(
fd_
!=
nullptr
)
{
if
(
fd_
!=
nullptr
)
{
sox_close
(
fd_
);
sox_close
(
fd_
);
fd_
=
nullptr
;
fd_
=
nullptr
;
}
}
}
}
auto
read_fileobj
(
py
::
object
*
fileobj
,
const
uint64_t
size
,
char
*
buffer
)
auto
read_fileobj
(
py
::
object
*
fileobj
,
const
uint64_t
size
,
char
*
buffer
)
->
uint64_t
{
->
uint64_t
{
uint64_t
num_read
=
0
;
uint64_t
num_read
=
0
;
while
(
num_read
<
size
)
{
while
(
num_read
<
size
)
{
auto
request
=
size
-
num_read
;
auto
request
=
size
-
num_read
;
auto
chunk
=
static_cast
<
std
::
string
>
(
auto
chunk
=
static_cast
<
std
::
string
>
(
static_cast
<
py
::
bytes
>
(
fileobj
->
attr
(
"read"
)(
request
)));
static_cast
<
py
::
bytes
>
(
fileobj
->
attr
(
"read"
)(
request
)));
auto
chunk_len
=
chunk
.
length
();
auto
chunk_len
=
chunk
.
length
();
if
(
chunk_len
==
0
)
{
if
(
chunk_len
==
0
)
{
break
;
break
;
}
}
if
(
chunk_len
>
request
)
{
if
(
chunk_len
>
request
)
{
std
::
ostringstream
message
;
std
::
ostringstream
message
;
message
<<
"Requested up to "
<<
request
<<
" bytes but, "
message
<<
"received "
<<
chunk_len
<<
" bytes. "
<<
"Requested up to "
<<
request
<<
" bytes but, "
<<
"The given object does not confirm to read protocol of file "
<<
"received "
<<
chunk_len
<<
" bytes. "
"object."
;
<<
"The given object does not confirm to read protocol of file "
throw
std
::
runtime_error
(
message
.
str
());
"object."
;
throw
std
::
runtime_error
(
message
.
str
());
}
memcpy
(
buffer
,
chunk
.
data
(),
chunk_len
);
buffer
+=
chunk_len
;
num_read
+=
chunk_len
;
}
}
memcpy
(
buffer
,
chunk
.
data
(),
chunk_len
);
return
num_read
;
buffer
+=
chunk_len
;
num_read
+=
chunk_len
;
}
return
num_read
;
}
}
int64_t
get_buffer_size
()
{
return
sox_get_globals
()
->
bufsiz
;
}
int64_t
get_buffer_size
()
{
return
sox_get_globals
()
->
bufsiz
;
}
void
validate_input_file
(
const
SoxFormat
&
sf
,
const
std
::
string
&
path
)
{
void
validate_input_file
(
const
SoxFormat
&
sf
,
const
std
::
string
&
path
)
{
if
(
static_cast
<
sox_format_t
*>
(
sf
)
==
nullptr
)
{
if
(
static_cast
<
sox_format_t
*>
(
sf
)
==
nullptr
)
{
throw
std
::
runtime_error
(
"Error loading audio file: failed to open file "
+
throw
std
::
runtime_error
(
path
);
"Error loading audio file: failed to open file "
+
path
);
}
}
if
(
sf
->
encoding
.
encoding
==
SOX_ENCODING_UNKNOWN
)
{
if
(
sf
->
encoding
.
encoding
==
SOX_ENCODING_UNKNOWN
)
{
throw
std
::
runtime_error
(
"Error loading audio file: unknown encoding."
);
throw
std
::
runtime_error
(
"Error loading audio file: unknown encoding."
);
}
}
}
}
void
validate_input_memfile
(
const
SoxFormat
&
sf
)
{
void
validate_input_memfile
(
const
SoxFormat
&
sf
)
{
return
validate_input_file
(
sf
,
"<in memory buffer>"
);
return
validate_input_file
(
sf
,
"<in memory buffer>"
);
}
}
std
::
string
get_encoding
(
sox_encoding_t
encoding
)
{
std
::
string
get_encoding
(
sox_encoding_t
encoding
)
{
switch
(
encoding
)
{
switch
(
encoding
)
{
case
SOX_ENCODING_UNKNOWN
:
case
SOX_ENCODING_UNKNOWN
:
return
"UNKNOWN"
;
return
"UNKNOWN"
;
case
SOX_ENCODING_SIGN2
:
case
SOX_ENCODING_SIGN2
:
return
"PCM_S"
;
return
"PCM_S"
;
case
SOX_ENCODING_UNSIGNED
:
case
SOX_ENCODING_UNSIGNED
:
return
"PCM_U"
;
return
"PCM_U"
;
case
SOX_ENCODING_FLOAT
:
case
SOX_ENCODING_FLOAT
:
return
"PCM_F"
;
return
"PCM_F"
;
case
SOX_ENCODING_FLAC
:
case
SOX_ENCODING_FLAC
:
return
"FLAC"
;
return
"FLAC"
;
case
SOX_ENCODING_ULAW
:
case
SOX_ENCODING_ULAW
:
return
"ULAW"
;
return
"ULAW"
;
case
SOX_ENCODING_ALAW
:
case
SOX_ENCODING_ALAW
:
return
"ALAW"
;
return
"ALAW"
;
case
SOX_ENCODING_MP3
:
case
SOX_ENCODING_MP3
:
return
"MP3"
;
return
"MP3"
;
case
SOX_ENCODING_VORBIS
:
case
SOX_ENCODING_VORBIS
:
return
"VORBIS"
;
return
"VORBIS"
;
case
SOX_ENCODING_AMR_WB
:
case
SOX_ENCODING_AMR_WB
:
return
"AMR_WB"
;
return
"AMR_WB"
;
case
SOX_ENCODING_AMR_NB
:
case
SOX_ENCODING_AMR_NB
:
return
"AMR_NB"
;
return
"AMR_NB"
;
case
SOX_ENCODING_OPUS
:
case
SOX_ENCODING_OPUS
:
return
"OPUS"
;
return
"OPUS"
;
case
SOX_ENCODING_GSM
:
case
SOX_ENCODING_GSM
:
return
"GSM"
;
return
"GSM"
;
default:
default:
return
"UNKNOWN"
;
return
"UNKNOWN"
;
}
}
}
}
}
// namespace paddleaudio
}
// namespace paddleaudio
}
// namespace sox_utils
}
// namespace sox_utils
paddlespeech/audio/src/pybind/sox/utils.h
浏览文件 @
a3911ab5
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//All rights reserved.
//
All rights reserved.
#ifndef PADDLEAUDIO_PYBIND_SOX_UTILS_H
#ifndef PADDLEAUDIO_PYBIND_SOX_UTILS_H
#define PADDLEAUDIO_PYBIND_SOX_UTILS_H
#define PADDLEAUDIO_PYBIND_SOX_UTILS_H
...
@@ -14,19 +14,19 @@ namespace sox_utils {
...
@@ -14,19 +14,19 @@ namespace sox_utils {
/// helper class to automatically close sox_format_t*
/// helper class to automatically close sox_format_t*
struct
SoxFormat
{
struct
SoxFormat
{
explicit
SoxFormat
(
sox_format_t
*
fd
)
noexcept
;
explicit
SoxFormat
(
sox_format_t
*
fd
)
noexcept
;
SoxFormat
(
const
SoxFormat
&
other
)
=
delete
;
SoxFormat
(
const
SoxFormat
&
other
)
=
delete
;
SoxFormat
(
SoxFormat
&&
other
)
=
delete
;
SoxFormat
(
SoxFormat
&&
other
)
=
delete
;
SoxFormat
&
operator
=
(
const
SoxFormat
&
other
)
=
delete
;
SoxFormat
&
operator
=
(
const
SoxFormat
&
other
)
=
delete
;
SoxFormat
&
operator
=
(
SoxFormat
&&
other
)
=
delete
;
SoxFormat
&
operator
=
(
SoxFormat
&&
other
)
=
delete
;
~
SoxFormat
();
~
SoxFormat
();
sox_format_t
*
operator
->
()
const
noexcept
;
sox_format_t
*
operator
->
()
const
noexcept
;
operator
sox_format_t
*
()
const
noexcept
;
operator
sox_format_t
*
()
const
noexcept
;
void
close
();
void
close
();
private:
private:
sox_format_t
*
fd_
;
sox_format_t
*
fd_
;
};
};
auto
read_fileobj
(
py
::
object
*
fileobj
,
uint64_t
size
,
char
*
buffer
)
->
uint64_t
;
auto
read_fileobj
(
py
::
object
*
fileobj
,
uint64_t
size
,
char
*
buffer
)
->
uint64_t
;
...
@@ -39,7 +39,7 @@ void validate_input_memfile(const SoxFormat &sf);
...
@@ -39,7 +39,7 @@ void validate_input_memfile(const SoxFormat &sf);
std
::
string
get_encoding
(
sox_encoding_t
encoding
);
std
::
string
get_encoding
(
sox_encoding_t
encoding
);
}
// namespace paddleaudio
}
// namespace paddleaudio
}
// namespace sox_utils
}
// namespace sox_utils
#endif
#endif
paddlespeech/audio/src/sox/io.cpp
浏览文件 @
a3911ab5
...
@@ -11,54 +11,53 @@ namespace paddleaudio {
...
@@ -11,54 +11,53 @@ namespace paddleaudio {
namespace
sox_io
{
namespace
sox_io
{
tl
::
optional
<
MetaDataTuple
>
get_info_file
(
tl
::
optional
<
MetaDataTuple
>
get_info_file
(
const
std
::
string
&
path
,
const
std
::
string
&
path
,
const
tl
::
optional
<
std
::
string
>&
format
)
{
const
tl
::
optional
<
std
::
string
>&
format
)
{
SoxFormat
sf
(
sox_open_read
(
SoxFormat
sf
(
sox_open_read
(
path
.
c_str
(),
path
.
c_str
(),
/*signal=*/
nullptr
,
/*signal=*/
nullptr
,
/*encoding=*/
nullptr
,
/*encoding=*/
nullptr
,
/*filetype=*/
format
.
has_value
()
?
format
.
value
().
c_str
()
:
nullptr
));
/*filetype=*/
format
.
has_value
()
?
format
.
value
().
c_str
()
:
nullptr
));
if
(
static_cast
<
sox_format_t
*>
(
sf
)
==
nullptr
||
if
(
static_cast
<
sox_format_t
*>
(
sf
)
==
nullptr
||
sf
->
encoding
.
encoding
==
SOX_ENCODING_UNKNOWN
)
{
sf
->
encoding
.
encoding
==
SOX_ENCODING_UNKNOWN
)
{
return
{};
return
{};
}
}
return
std
::
forward_as_tuple
(
return
std
::
forward_as_tuple
(
static_cast
<
int64_t
>
(
sf
->
signal
.
rate
),
static_cast
<
int64_t
>
(
sf
->
signal
.
rate
),
static_cast
<
int64_t
>
(
sf
->
signal
.
length
/
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
signal
.
length
/
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
signal
.
channels
),
static_cast
<
int64_t
>
(
sf
->
encoding
.
bits_per_sample
),
static_cast
<
int64_t
>
(
sf
->
encoding
.
bits_per_sample
),
get_encoding
(
sf
->
encoding
.
encoding
));
get_encoding
(
sf
->
encoding
.
encoding
));
}
}
std
::
vector
<
std
::
vector
<
std
::
string
>>
get_effects
(
std
::
vector
<
std
::
vector
<
std
::
string
>>
get_effects
(
const
tl
::
optional
<
int64_t
>&
frame_offset
,
const
tl
::
optional
<
int64_t
>&
frame_offset
,
const
tl
::
optional
<
int64_t
>&
num_frames
)
{
const
tl
::
optional
<
int64_t
>&
num_frames
)
{
const
auto
offset
=
frame_offset
.
value_or
(
0
);
const
auto
offset
=
frame_offset
.
value_or
(
0
);
if
(
offset
<
0
)
{
if
(
offset
<
0
)
{
throw
std
::
runtime_error
(
throw
std
::
runtime_error
(
"Invalid argument: frame_offset must be non-negative."
);
"Invalid argument: frame_offset must be non-negative."
);
}
}
const
auto
frames
=
num_frames
.
value_or
(
-
1
);
const
auto
frames
=
num_frames
.
value_or
(
-
1
);
if
(
frames
==
0
||
frames
<
-
1
)
{
if
(
frames
==
0
||
frames
<
-
1
)
{
throw
std
::
runtime_error
(
throw
std
::
runtime_error
(
"Invalid argument: num_frames must be -1 or greater than 0."
);
"Invalid argument: num_frames must be -1 or greater than 0."
);
}
}
std
::
vector
<
std
::
vector
<
std
::
string
>>
effects
;
std
::
vector
<
std
::
vector
<
std
::
string
>>
effects
;
if
(
frames
!=
-
1
)
{
if
(
frames
!=
-
1
)
{
std
::
ostringstream
os_offset
,
os_frames
;
std
::
ostringstream
os_offset
,
os_frames
;
os_offset
<<
offset
<<
"s"
;
os_offset
<<
offset
<<
"s"
;
os_frames
<<
"+"
<<
frames
<<
"s"
;
os_frames
<<
"+"
<<
frames
<<
"s"
;
effects
.
emplace_back
(
effects
.
emplace_back
(
std
::
vector
<
std
::
string
>
{
"trim"
,
os_offset
.
str
(),
os_frames
.
str
()});
std
::
vector
<
std
::
string
>
{
"trim"
,
os_offset
.
str
(),
os_frames
.
str
()});
}
else
if
(
offset
!=
0
)
{
}
else
if
(
offset
!=
0
)
{
std
::
ostringstream
os_offset
;
std
::
ostringstream
os_offset
;
os_offset
<<
offset
<<
"s"
;
os_offset
<<
offset
<<
"s"
;
effects
.
emplace_back
(
std
::
vector
<
std
::
string
>
{
"trim"
,
os_offset
.
str
()});
effects
.
emplace_back
(
std
::
vector
<
std
::
string
>
{
"trim"
,
os_offset
.
str
()});
}
}
return
effects
;
return
effects
;
}
}
tl
::
optional
<
std
::
tuple
<
torch
::
Tensor
,
int64_t
>>
load_audio_file
(
tl
::
optional
<
std
::
tuple
<
torch
::
Tensor
,
int64_t
>>
load_audio_file
(
...
@@ -68,79 +67,73 @@ tl::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
...
@@ -68,79 +67,73 @@ tl::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
tl
::
optional
<
bool
>
normalize
,
tl
::
optional
<
bool
>
normalize
,
tl
::
optional
<
bool
>
channels_first
,
tl
::
optional
<
bool
>
channels_first
,
const
tl
::
optional
<
std
::
string
>&
format
)
{
const
tl
::
optional
<
std
::
string
>&
format
)
{
auto
effects
=
get_effects
(
frame_offset
,
num_frames
);
auto
effects
=
get_effects
(
frame_offset
,
num_frames
);
return
paddleaudio
::
sox_effects
::
apply_effects_file
(
return
paddleaudio
::
sox_effects
::
apply_effects_file
(
path
,
effects
,
normalize
,
channels_first
,
format
);
path
,
effects
,
normalize
,
channels_first
,
format
);
}
}
void
save_audio_file
(
void
save_audio_file
(
const
std
::
string
&
path
,
const
std
::
string
&
path
,
torch
::
Tensor
tensor
,
torch
::
Tensor
tensor
,
int64_t
sample_rate
,
int64_t
sample_rate
,
bool
channels_first
,
bool
channels_first
,
tl
::
optional
<
double
>
compression
,
tl
::
optional
<
double
>
compression
,
tl
::
optional
<
std
::
string
>
format
,
tl
::
optional
<
std
::
string
>
format
,
tl
::
optional
<
std
::
string
>
encoding
,
tl
::
optional
<
std
::
string
>
encoding
,
tl
::
optional
<
int64_t
>
bits_per_sample
)
{
tl
::
optional
<
int64_t
>
bits_per_sample
)
{
validate_input_tensor
(
tensor
);
validate_input_tensor
(
tensor
);
const
auto
filetype
=
[
&
]()
{
const
auto
filetype
=
[
&
]()
{
if
(
format
.
has_value
())
return
format
.
value
();
if
(
format
.
has_value
())
return
get_filetype
(
path
);
return
format
.
value
();
}();
return
get_filetype
(
path
);
}();
if
(
filetype
==
"amr-nb"
)
{
const
auto
num_channels
=
tensor
.
size
(
channels_first
?
0
:
1
);
if
(
filetype
==
"amr-nb"
)
{
TORCH_CHECK
(
num_channels
==
1
,
const
auto
num_channels
=
tensor
.
size
(
channels_first
?
0
:
1
);
"amr-nb format only supports single channel audio."
);
TORCH_CHECK
(
}
else
if
(
filetype
==
"htk"
)
{
num_channels
==
1
,
"amr-nb format only supports single channel audio."
);
const
auto
num_channels
=
tensor
.
size
(
channels_first
?
0
:
1
);
}
else
if
(
filetype
==
"htk"
)
{
TORCH_CHECK
(
num_channels
==
1
,
const
auto
num_channels
=
tensor
.
size
(
channels_first
?
0
:
1
);
"htk format only supports single channel audio."
);
TORCH_CHECK
(
}
else
if
(
filetype
==
"gsm"
)
{
num_channels
==
1
,
"htk format only supports single channel audio."
);
const
auto
num_channels
=
tensor
.
size
(
channels_first
?
0
:
1
);
}
else
if
(
filetype
==
"gsm"
)
{
TORCH_CHECK
(
num_channels
==
1
,
const
auto
num_channels
=
tensor
.
size
(
channels_first
?
0
:
1
);
"gsm format only supports single channel audio."
);
TORCH_CHECK
(
TORCH_CHECK
(
sample_rate
==
8000
,
num_channels
==
1
,
"gsm format only supports single channel audio."
);
"gsm format only supports a sampling rate of 8kHz."
);
TORCH_CHECK
(
}
sample_rate
==
8000
,
const
auto
signal_info
=
"gsm format only supports a sampling rate of 8kHz."
);
get_signalinfo
(
&
tensor
,
sample_rate
,
filetype
,
channels_first
);
}
const
auto
encoding_info
=
get_encodinginfo_for_save
(
const
auto
signal_info
=
filetype
,
tensor
.
dtype
(),
compression
,
encoding
,
bits_per_sample
);
get_signalinfo
(
&
tensor
,
sample_rate
,
filetype
,
channels_first
);
const
auto
encoding_info
=
get_encodinginfo_for_save
(
SoxFormat
sf
(
sox_open_write
(
path
.
c_str
(),
filetype
,
tensor
.
dtype
(),
compression
,
encoding
,
bits_per_sample
);
&
signal_info
,
&
encoding_info
,
SoxFormat
sf
(
sox_open_write
(
/*filetype=*/
filetype
.
c_str
(),
path
.
c_str
(),
/*oob=*/
nullptr
,
&
signal_info
,
/*overwrite_permitted=*/
nullptr
));
&
encoding_info
,
/*filetype=*/
filetype
.
c_str
(),
if
(
static_cast
<
sox_format_t
*>
(
sf
)
==
nullptr
)
{
/*oob=*/
nullptr
,
throw
std
::
runtime_error
(
/*overwrite_permitted=*/
nullptr
));
"Error saving audio file: failed to open file "
+
path
);
}
if
(
static_cast
<
sox_format_t
*>
(
sf
)
==
nullptr
)
{
throw
std
::
runtime_error
(
paddleaudio
::
sox_effects_chain
::
SoxEffectsChain
chain
(
"Error saving audio file: failed to open file "
+
path
);
/*input_encoding=*/
get_tensor_encodinginfo
(
tensor
.
dtype
()),
}
/*output_encoding=*/
sf
->
encoding
);
chain
.
addInputTensor
(
&
tensor
,
sample_rate
,
channels_first
);
paddleaudio
::
sox_effects_chain
::
SoxEffectsChain
chain
(
chain
.
addOutputFile
(
sf
);
/*input_encoding=*/
get_tensor_encodinginfo
(
tensor
.
dtype
()),
chain
.
run
();
/*output_encoding=*/
sf
->
encoding
);
chain
.
addInputTensor
(
&
tensor
,
sample_rate
,
channels_first
);
chain
.
addOutputFile
(
sf
);
chain
.
run
();
}
}
TORCH_LIBRARY_FRAGMENT
(
paddleaudio
,
m
)
{
TORCH_LIBRARY_FRAGMENT
(
paddleaudio
,
m
)
{
m
.
def
(
"paddleaudio::sox_io_get_info"
,
&
paddleaudio
::
sox_io
::
get_info_file
);
m
.
def
(
"paddleaudio::sox_io_get_info"
,
&
paddleaudio
::
sox_io
::
get_info_file
);
m
.
def
(
m
.
def
(
"paddleaudio::sox_io_load_audio_file"
,
"paddleaudio::sox_io_load_audio_file"
,
&
paddleaudio
::
sox_io
::
load_audio_file
);
&
paddleaudio
::
sox_io
::
load_audio_file
);
m
.
def
(
"paddleaudio::sox_io_save_audio_file"
,
m
.
def
(
&
paddleaudio
::
sox_io
::
save_audio_file
);
"paddleaudio::sox_io_save_audio_file"
,
&
paddleaudio
::
sox_io
::
save_audio_file
);
}
}
}
// namespace sox_io
}
// namespace sox_io
}
// namespace paddleaudio
}
// namespace paddleaudio
\ No newline at end of file
\ No newline at end of file
paddlespeech/audio/src/sox/io.h
浏览文件 @
a3911ab5
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//
Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
//All rights reserved.
//
All rights reserved.
#ifndef PADDLEAUDIO_SOX_IO_H
#ifndef PADDLEAUDIO_SOX_IO_H
#define PADDLEAUDIO_SOX_IO_H
#define PADDLEAUDIO_SOX_IO_H
...
@@ -11,17 +11,15 @@
...
@@ -11,17 +11,15 @@
namespace
paddleaudio
{
namespace
paddleaudio
{
namespace
sox_io
{
namespace
sox_io
{
auto
get_effects
(
auto
get_effects
(
const
tl
::
optional
<
int64_t
>&
frame_offset
,
const
tl
::
optional
<
int64_t
>&
frame_offset
,
const
tl
::
optional
<
int64_t
>&
num_frames
)
const
tl
::
optional
<
int64_t
>&
num_frames
)
->
std
::
vector
<
std
::
vector
<
std
::
string
>>
;
->
std
::
vector
<
std
::
vector
<
std
::
string
>>
;
using
MetaDataTuple
=
using
MetaDataTuple
=
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
;
std
::
tuple
<
int64_t
,
int64_t
,
int64_t
,
int64_t
,
std
::
string
>
;
tl
::
optional
<
MetaDataTuple
>
get_info_file
(
tl
::
optional
<
MetaDataTuple
>
get_info_file
(
const
std
::
string
&
path
,
const
std
::
string
&
path
,
const
tl
::
optional
<
std
::
string
>&
format
);
const
tl
::
optional
<
std
::
string
>&
format
);
tl
::
optional
<
std
::
tuple
<
torch
::
Tensor
,
int64_t
>>
load_audio_file
(
tl
::
optional
<
std
::
tuple
<
torch
::
Tensor
,
int64_t
>>
load_audio_file
(
const
std
::
string
&
path
,
const
std
::
string
&
path
,
...
@@ -31,17 +29,16 @@ tl::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
...
@@ -31,17 +29,16 @@ tl::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
tl
::
optional
<
bool
>
channels_first
,
tl
::
optional
<
bool
>
channels_first
,
const
tl
::
optional
<
std
::
string
>&
format
);
const
tl
::
optional
<
std
::
string
>&
format
);
void
save_audio_file
(
void
save_audio_file
(
const
std
::
string
&
path
,
const
std
::
string
&
path
,
torch
::
Tensor
tensor
,
torch
::
Tensor
tensor
,
int64_t
sample_rate
,
int64_t
sample_rate
,
bool
channels_first
,
bool
channels_first
,
tl
::
optional
<
double
>
compression
,
tl
::
optional
<
double
>
compression
,
tl
::
optional
<
std
::
string
>
format
,
tl
::
optional
<
std
::
string
>
format
,
tl
::
optional
<
std
::
string
>
encoding
,
tl
::
optional
<
std
::
string
>
encoding
,
tl
::
optional
<
int64_t
>
bits_per_sample
);
tl
::
optional
<
int64_t
>
bits_per_sample
);
}
// namespace sox_io
}
// namespace sox_io
}
// namespace paddleaudio
}
// namespace paddleaudio
#endif
#endif
\ No newline at end of file
paddlespeech/audio/src/utils.cpp
浏览文件 @
a3911ab5
...
@@ -4,17 +4,17 @@ namespace {
...
@@ -4,17 +4,17 @@ namespace {
bool
is_sox_available
()
{
bool
is_sox_available
()
{
#ifdef INCLUDE_SOX
#ifdef INCLUDE_SOX
return
true
;
return
true
;
#else
#else
return
false
;
return
false
;
#endif
#endif
}
}
bool
is_kaldi_available
()
{
bool
is_kaldi_available
()
{
#ifdef INCLUDE_KALDI
#ifdef INCLUDE_KALDI
return
true
;
return
true
;
#else
#else
return
false
;
return
false
;
#endif
#endif
}
}
...
@@ -22,12 +22,12 @@ bool is_kaldi_available() {
...
@@ -22,12 +22,12 @@ bool is_kaldi_available() {
// not the runtime availability.
// not the runtime availability.
bool
is_ffmpeg_available
()
{
bool
is_ffmpeg_available
()
{
#ifdef USE_FFMPEG
#ifdef USE_FFMPEG
return
true
;
return
true
;
#else
#else
return
false
;
return
false
;
#endif
#endif
}
}
}
// namespace
}
// namespace
}
// namespace paddleaudio
}
// namespace paddleaudio
\ No newline at end of file
\ No newline at end of file
setup.py
浏览文件 @
a3911ab5
此差异已折叠。
点击以展开。
speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py
浏览文件 @
a3911ab5
此差异已折叠。
点击以展开。
tools/setup_helpers/__init__.py
浏览文件 @
a3911ab5
from
.extension
import
*
from
.extension
import
*
\ No newline at end of file
tools/setup_helpers/extension.py
浏览文件 @
a3911ab5
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录