Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
38c55e44
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
38c55e44
编写于
7月 18, 2022
作者:
Y
Yang Zhou
浏览文件
操作
浏览文件
下载
差异文件
merge audio
上级
cffe555c
a2e8b76a
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
73 addition
and
58 deletion
+73
-58
CMakeLists.txt
CMakeLists.txt
+1
-1
cmake/summary.cmake
cmake/summary.cmake
+5
-1
paddlespeech/audio/_internal/module_utils.py
paddlespeech/audio/_internal/module_utils.py
+1
-0
paddlespeech/audio/backends/no_backend.py
paddlespeech/audio/backends/no_backend.py
+14
-10
paddlespeech/audio/backends/sox_io_backend.py
paddlespeech/audio/backends/sox_io_backend.py
+12
-10
paddlespeech/audio/backends/utils.py
paddlespeech/audio/backends/utils.py
+4
-2
paddlespeech/audio/kaldi/kaldi.py
paddlespeech/audio/kaldi/kaldi.py
+33
-31
paddlespeech/audio/src/CMakeLists.txt
paddlespeech/audio/src/CMakeLists.txt
+1
-1
paddlespeech/audio/src/pybind/kaldi/feature_common.h
paddlespeech/audio/src/pybind/kaldi/feature_common.h
+2
-2
未找到文件。
CMakeLists.txt
浏览文件 @
38c55e44
...
@@ -57,7 +57,7 @@ include(openblas)
...
@@ -57,7 +57,7 @@ include(openblas)
# packages
# packages
find_package
(
Python3 COMPONENTS Interpreter Development
)
find_package
(
Python3 COMPONENTS Interpreter Development
)
find_package
(
pybind11 CONFIG
)
find_package
(
pybind11 CONFIG
REQUIRED
)
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O0 -Wall -g")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O0 -Wall -g")
...
...
cmake/summary.cmake
浏览文件 @
38c55e44
...
@@ -37,5 +37,9 @@ function (onnx_print_configuration_summary)
...
@@ -37,5 +37,9 @@ function (onnx_print_configuration_summary)
message
(
STATUS
" Python executable :
${
Python_EXECUTABLE
}
"
)
message
(
STATUS
" Python executable :
${
Python_EXECUTABLE
}
"
)
message
(
STATUS
" Python includes :
${
Python_INCLUDE_DIR
}
"
)
message
(
STATUS
" Python includes :
${
Python_INCLUDE_DIR
}
"
)
message
(
STATUS
" Python libraries :
${
Python_LIBRARY
}
"
)
message
(
STATUS
" Python libraries :
${
Python_LIBRARY
}
"
)
message
(
STATUS
" PYBIND11 :
${
pybind11_FOUND
}
"
)
message
(
STATUS
" Pybind11 version :
${
pybind11_VERSION
}
"
)
message
(
STATUS
" Pybind11 include :
${
pybind11_INCLUDE_DIR
}
"
)
message
(
STATUS
" Pybind11 includes :
${
pybind11_INCLUDE_DIRS
}
"
)
message
(
STATUS
" Pybind11 libraries :
${
pybind11_LIBRARIES
}
"
)
endfunction
()
endfunction
()
\ No newline at end of file
paddlespeech/audio/_internal/module_utils.py
浏览文件 @
38c55e44
...
@@ -5,6 +5,7 @@ from typing import Optional
...
@@ -5,6 +5,7 @@ from typing import Optional
#code is from https://github.com/pytorch/audio/blob/main/torchaudio/_internal/module_utils.py
#code is from https://github.com/pytorch/audio/blob/main/torchaudio/_internal/module_utils.py
def
is_module_available
(
*
modules
:
str
)
->
bool
:
def
is_module_available
(
*
modules
:
str
)
->
bool
:
r
"""Returns if a top-level module with :attr:`name` exists *without**
r
"""Returns if a top-level module with :attr:`name` exists *without**
importing it. This is generally safer than try-catch block around a
importing it. This is generally safer than try-catch block around a
...
...
paddlespeech/audio/backends/no_backend.py
浏览文件 @
38c55e44
...
@@ -8,21 +8,25 @@ from paddle import Tensor
...
@@ -8,21 +8,25 @@ from paddle import Tensor
#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/no_backend.py
#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/no_backend.py
def
load
(
def
load
(
filepath
:
Union
[
str
,
Path
],
filepath
:
Union
[
str
,
Path
],
out
:
Optional
[
Tensor
]
=
None
,
out
:
Optional
[
Tensor
]
=
None
,
normalization
:
Union
[
bool
,
float
,
Callable
]
=
True
,
normalization
:
Union
[
bool
,
float
,
Callable
]
=
True
,
channels_first
:
bool
=
True
,
channels_first
:
bool
=
True
,
num_frames
:
int
=
0
,
num_frames
:
int
=
0
,
offset
:
int
=
0
,
offset
:
int
=
0
,
filetype
:
Optional
[
str
]
=
None
,
filetype
:
Optional
[
str
]
=
None
,
)
->
Tuple
[
Tensor
,
int
]:
)
->
Tuple
[
Tensor
,
int
]:
raise
RuntimeError
(
"No audio I/O backend is available."
)
raise
RuntimeError
(
"No audio I/O backend is available."
)
def
save
(
filepath
:
str
,
src
:
Tensor
,
sample_rate
:
int
,
precision
:
int
=
16
,
channels_first
:
bool
=
True
)
->
None
:
def
save
(
filepath
:
str
,
src
:
Tensor
,
sample_rate
:
int
,
precision
:
int
=
16
,
channels_first
:
bool
=
True
)
->
None
:
raise
RuntimeError
(
"No audio I/O backend is available."
)
raise
RuntimeError
(
"No audio I/O backend is available."
)
def
info
(
filepath
:
str
)
->
None
:
def
info
(
filepath
:
str
)
->
None
:
raise
RuntimeError
(
"No audio I/O backend is available."
)
raise
RuntimeError
(
"No audio I/O backend is available."
)
\ No newline at end of file
paddlespeech/audio/backends/sox_io_backend.py
浏览文件 @
38c55e44
from
pathlib
import
Path
from
pathlib
import
Path
from
typing
import
Callable
from
typing
import
Callable
from
typing
import
Optional
from
typing
import
Optional
...
@@ -43,17 +42,20 @@ _fallback_load = _fail_load
...
@@ -43,17 +42,20 @@ _fallback_load = _fail_load
_fallback_load_filebj
=
_fail_load_fileobj
_fallback_load_filebj
=
_fail_load_fileobj
def
load
(
def
load
(
filepath
:
Union
[
str
,
Path
],
filepath
:
Union
[
str
,
Path
],
out
:
Optional
[
Tensor
]
=
None
,
out
:
Optional
[
Tensor
]
=
None
,
normalization
:
Union
[
bool
,
float
,
Callable
]
=
True
,
normalization
:
Union
[
bool
,
float
,
Callable
]
=
True
,
channels_first
:
bool
=
True
,
channels_first
:
bool
=
True
,
num_frames
:
int
=
0
,
num_frames
:
int
=
0
,
offset
:
int
=
0
,
offset
:
int
=
0
,
filetype
:
Optional
[
str
]
=
None
,
filetype
:
Optional
[
str
]
=
None
,
)
->
Tuple
[
Tensor
,
int
]:
)
->
Tuple
[
Tensor
,
int
]:
raise
RuntimeError
(
"No audio I/O backend is available."
)
raise
RuntimeError
(
"No audio I/O backend is available."
)
def
save
(
filepath
:
str
,
src
:
Tensor
,
sample_rate
:
int
,
precision
:
int
=
16
,
channels_first
:
bool
=
True
)
->
None
:
def
save
(
filepath
:
str
,
src
:
Tensor
,
sample_rate
:
int
,
precision
:
int
=
16
,
channels_first
:
bool
=
True
)
->
None
:
raise
RuntimeError
(
"No audio I/O backend is available."
)
raise
RuntimeError
(
"No audio I/O backend is available."
)
@
_mod_utils
.
requires_sox
()
@
_mod_utils
.
requires_sox
()
...
...
paddlespeech/audio/backends/utils.py
浏览文件 @
38c55e44
...
@@ -40,7 +40,8 @@ def set_audio_backend(backend: Optional[str]):
...
@@ -40,7 +40,8 @@ def set_audio_backend(backend: Optional[str]):
of the system. If ``None`` is provided the current backend is unassigned.
of the system. If ``None`` is provided the current backend is unassigned.
"""
"""
if
backend
is
not
None
and
backend
not
in
list_audio_backends
():
if
backend
is
not
None
and
backend
not
in
list_audio_backends
():
raise
RuntimeError
(
f
'Backend "
{
backend
}
" is not one of '
f
"available backends:
{
list_audio_backends
()
}
."
)
raise
RuntimeError
(
f
'Backend "
{
backend
}
" is not one of '
f
"available backends:
{
list_audio_backends
()
}
."
)
if
backend
is
None
:
if
backend
is
None
:
module
=
no_backend
module
=
no_backend
...
@@ -76,6 +77,7 @@ def _init_audio_backend():
...
@@ -76,6 +77,7 @@ def _init_audio_backend():
warnings
.
warn
(
"No audio backend is available."
)
warnings
.
warn
(
"No audio backend is available."
)
set_audio_backend
(
None
)
set_audio_backend
(
None
)
def
get_audio_backend
()
->
Optional
[
str
]:
def
get_audio_backend
()
->
Optional
[
str
]:
"""Get the name of the current backend
"""Get the name of the current backend
...
@@ -88,4 +90,4 @@ def get_audio_backend() -> Optional[str]:
...
@@ -88,4 +90,4 @@ def get_audio_backend() -> Optional[str]:
return
"sox_io"
return
"sox_io"
if
paddlespeech
.
audio
.
load
==
soundfile_backend
.
load
:
if
paddlespeech
.
audio
.
load
==
soundfile_backend
.
load
:
return
"soundfile"
return
"soundfile"
raise
ValueError
(
"Unknown backend."
)
raise
ValueError
(
"Unknown backend."
)
\ No newline at end of file
paddlespeech/audio/kaldi/kaldi.py
浏览文件 @
38c55e44
...
@@ -27,37 +27,38 @@ __all__ = [
...
@@ -27,37 +27,38 @@ __all__ = [
@
module_utils
.
requires_kaldi
()
@
module_utils
.
requires_kaldi
()
def
fbank
(
wav
,
def
fbank
(
samp_freq
:
int
=
16000
,
wav
,
frame_shift_ms
:
float
=
10.0
,
samp_freq
:
int
=
16000
,
frame_length_ms
:
float
=
25.0
,
frame_shift_ms
:
float
=
10.0
,
dither
:
float
=
0.0
,
frame_length_ms
:
float
=
25.0
,
preemph_coeff
:
float
=
0.97
,
dither
:
float
=
0.0
,
remove_dc_offset
:
bool
=
True
,
preemph_coeff
:
float
=
0.97
,
window_type
:
str
=
'povey'
,
remove_dc_offset
:
bool
=
True
,
round_to_power_of_two
:
bool
=
True
,
window_type
:
str
=
'povey'
,
blackman_coeff
:
float
=
0.42
,
round_to_power_of_two
:
bool
=
True
,
snip_edges
:
bool
=
True
,
blackman_coeff
:
float
=
0.42
,
allow_downsample
:
bool
=
False
,
snip_edges
:
bool
=
True
,
allow_upsample
:
bool
=
False
,
allow_downsample
:
bool
=
False
,
max_feature_vectors
:
int
=-
1
,
allow_upsample
:
bool
=
False
,
num_bins
:
int
=
23
,
max_feature_vectors
:
int
=-
1
,
low_freq
:
float
=
20
,
num_bins
:
int
=
23
,
high_freq
:
float
=
0
,
low_freq
:
float
=
20
,
vtln_low
:
float
=
100
,
high_freq
:
float
=
0
,
vtln_high
:
float
=-
500
,
vtln_low
:
float
=
100
,
debug_mel
:
bool
=
False
,
vtln_high
:
float
=-
500
,
htk_mode
:
bool
=
False
,
debug_mel
:
bool
=
False
,
use_energy
:
bool
=
False
,
# fbank opts
htk_mode
:
bool
=
False
,
energy_floor
:
float
=
0.0
,
use_energy
:
bool
=
False
,
# fbank opts
raw_energy
:
bool
=
True
,
energy_floor
:
float
=
0.0
,
htk_compat
:
bool
=
False
,
raw_energy
:
bool
=
True
,
use_log_fbank
:
bool
=
True
,
htk_compat
:
bool
=
False
,
use_power
:
bool
=
True
):
use_log_fbank
:
bool
=
True
,
use_power
:
bool
=
True
):
frame_opts
=
FrameExtractionOptions
()
frame_opts
=
FrameExtractionOptions
()
mel_opts
=
MelBanksOptions
()
mel_opts
=
MelBanksOptions
()
fbank_opts
=
FbankOptions
()
fbank_opts
=
FbankOptions
()
frame_opts
.
samp_freq
=
samp_freq
frame_opts
.
samp_freq
=
samp_freq
frame_opts
.
frame_shift_ms
=
frame_shift_ms
frame_opts
.
frame_shift_ms
=
frame_shift_ms
frame_opts
.
frame_length_ms
=
frame_length_ms
frame_opts
.
frame_length_ms
=
frame_length_ms
frame_opts
.
dither
=
dither
frame_opts
.
dither
=
dither
...
@@ -71,7 +72,7 @@ def fbank(wav,
...
@@ -71,7 +72,7 @@ def fbank(wav,
frame_opts
.
allow_upsample
=
allow_upsample
frame_opts
.
allow_upsample
=
allow_upsample
frame_opts
.
max_feature_vectors
=
max_feature_vectors
frame_opts
.
max_feature_vectors
=
max_feature_vectors
mel_opts
.
num_bins
=
num_bins
mel_opts
.
num_bins
=
num_bins
mel_opts
.
low_freq
=
low_freq
mel_opts
.
low_freq
=
low_freq
mel_opts
.
high_freq
=
high_freq
mel_opts
.
high_freq
=
high_freq
mel_opts
.
vtln_low
=
vtln_low
mel_opts
.
vtln_low
=
vtln_low
...
@@ -79,7 +80,7 @@ def fbank(wav,
...
@@ -79,7 +80,7 @@ def fbank(wav,
mel_opts
.
debug_mel
=
debug_mel
mel_opts
.
debug_mel
=
debug_mel
mel_opts
.
htk_mode
=
htk_mode
mel_opts
.
htk_mode
=
htk_mode
fbank_opts
.
use_energy
=
use_energy
fbank_opts
.
use_energy
=
use_energy
fbank_opts
.
energy_floor
=
energy_floor
fbank_opts
.
energy_floor
=
energy_floor
fbank_opts
.
raw_energy
=
raw_energy
fbank_opts
.
raw_energy
=
raw_energy
fbank_opts
.
htk_compat
=
htk_compat
fbank_opts
.
htk_compat
=
htk_compat
...
@@ -88,6 +89,7 @@ def fbank(wav,
...
@@ -88,6 +89,7 @@ def fbank(wav,
feat
=
ComputeFbank
(
frame_opts
,
mel_opts
,
fbank_opts
,
wav
)
feat
=
ComputeFbank
(
frame_opts
,
mel_opts
,
fbank_opts
,
wav
)
return
feat
return
feat
@
module_utils
.
requires_kaldi
()
@
module_utils
.
requires_kaldi
()
def
pitch
(
wav
,
def
pitch
(
wav
,
samp_freq
:
int
=
16000
,
samp_freq
:
int
=
16000
,
...
@@ -114,7 +116,7 @@ def pitch(wav,
...
@@ -114,7 +116,7 @@ def pitch(wav,
pitch_opts
.
samp_freq
=
samp_freq
pitch_opts
.
samp_freq
=
samp_freq
pitch_opts
.
frame_shift_ms
=
frame_shift_ms
pitch_opts
.
frame_shift_ms
=
frame_shift_ms
pitch_opts
.
frame_length_ms
=
frame_length_ms
pitch_opts
.
frame_length_ms
=
frame_length_ms
pitch_opts
.
preemph_coeff
=
preemph_coeff
pitch_opts
.
preemph_coeff
=
preemph_coeff
pitch_opts
.
min_f0
=
min_f0
pitch_opts
.
min_f0
=
min_f0
pitch_opts
.
max_f0
=
max_f0
pitch_opts
.
max_f0
=
max_f0
pitch_opts
.
soft_min_f0
=
soft_min_f0
pitch_opts
.
soft_min_f0
=
soft_min_f0
...
...
paddlespeech/audio/src/CMakeLists.txt
浏览文件 @
38c55e44
...
@@ -105,7 +105,7 @@ function(define_extension name sources include_dirs libraries definitions)
...
@@ -105,7 +105,7 @@ function(define_extension name sources include_dirs libraries definitions)
add_library
(
${
name
}
SHARED
${
sources
}
)
add_library
(
${
name
}
SHARED
${
sources
}
)
target_compile_definitions
(
${
name
}
PRIVATE
"
${
definitions
}
"
)
target_compile_definitions
(
${
name
}
PRIVATE
"
${
definitions
}
"
)
target_include_directories
(
target_include_directories
(
${
name
}
PRIVATE
${
PROJECT_SOURCE_DIR
}
${
Python_INCLUDE_DIR
}
${
include_dirs
}
)
${
name
}
PRIVATE
${
PROJECT_SOURCE_DIR
}
${
Python_INCLUDE_DIR
}
${
pybind11_INCLUDE_DIR
}
${
include_dirs
}
)
target_link_libraries
(
target_link_libraries
(
${
name
}
${
name
}
${
libraries
}
${
libraries
}
...
...
paddlespeech/audio/src/pybind/kaldi/feature_common.h
浏览文件 @
38c55e44
...
@@ -14,8 +14,8 @@
...
@@ -14,8 +14,8 @@
#pragma once
#pragma once
#include
<pybind11/numpy.h>
#include
"pybind11/pybind11.h"
#include
<pybind11/pybind11.h>
#include
"pybind11/numpy.h"
#include "feat/feature-window.h"
#include "feat/feature-window.h"
namespace
paddleaudio
{
namespace
paddleaudio
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录