Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
84e5bc03
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
84e5bc03
编写于
7月 15, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
format
上级
a7181bcd
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
67 addition
and
55 deletion
+67
-55
paddlespeech/audio/_internal/module_utils.py
paddlespeech/audio/_internal/module_utils.py
+1
-0
paddlespeech/audio/backends/no_backend.py
paddlespeech/audio/backends/no_backend.py
+14
-10
paddlespeech/audio/backends/sox_io_backend.py
paddlespeech/audio/backends/sox_io_backend.py
+14
-11
paddlespeech/audio/backends/utils.py
paddlespeech/audio/backends/utils.py
+4
-2
paddlespeech/audio/kaldi/kaldi.py
paddlespeech/audio/kaldi/kaldi.py
+34
-32
未找到文件。
paddlespeech/audio/_internal/module_utils.py
浏览文件 @
84e5bc03
...
@@ -5,6 +5,7 @@ from typing import Optional
...
@@ -5,6 +5,7 @@ from typing import Optional
#code is from https://github.com/pytorch/audio/blob/main/torchaudio/_internal/module_utils.py
#code is from https://github.com/pytorch/audio/blob/main/torchaudio/_internal/module_utils.py
def
is_module_available
(
*
modules
:
str
)
->
bool
:
def
is_module_available
(
*
modules
:
str
)
->
bool
:
r
"""Returns if a top-level module with :attr:`name` exists *without**
r
"""Returns if a top-level module with :attr:`name` exists *without**
importing it. This is generally safer than try-catch block around a
importing it. This is generally safer than try-catch block around a
...
...
paddlespeech/audio/backends/no_backend.py
浏览文件 @
84e5bc03
...
@@ -8,21 +8,25 @@ from paddle import Tensor
...
@@ -8,21 +8,25 @@ from paddle import Tensor
#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/no_backend.py
#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/no_backend.py
def
load
(
def
load
(
filepath
:
Union
[
str
,
Path
],
filepath
:
Union
[
str
,
Path
],
out
:
Optional
[
Tensor
]
=
None
,
out
:
Optional
[
Tensor
]
=
None
,
normalization
:
Union
[
bool
,
float
,
Callable
]
=
True
,
normalization
:
Union
[
bool
,
float
,
Callable
]
=
True
,
channels_first
:
bool
=
True
,
channels_first
:
bool
=
True
,
num_frames
:
int
=
0
,
num_frames
:
int
=
0
,
offset
:
int
=
0
,
offset
:
int
=
0
,
filetype
:
Optional
[
str
]
=
None
,
filetype
:
Optional
[
str
]
=
None
,
)
->
Tuple
[
Tensor
,
int
]:
)
->
Tuple
[
Tensor
,
int
]:
raise
RuntimeError
(
"No audio I/O backend is available."
)
raise
RuntimeError
(
"No audio I/O backend is available."
)
def
save
(
filepath
:
str
,
src
:
Tensor
,
sample_rate
:
int
,
precision
:
int
=
16
,
channels_first
:
bool
=
True
)
->
None
:
def
save
(
filepath
:
str
,
src
:
Tensor
,
sample_rate
:
int
,
precision
:
int
=
16
,
channels_first
:
bool
=
True
)
->
None
:
raise
RuntimeError
(
"No audio I/O backend is available."
)
raise
RuntimeError
(
"No audio I/O backend is available."
)
def
info
(
filepath
:
str
)
->
None
:
def
info
(
filepath
:
str
)
->
None
:
raise
RuntimeError
(
"No audio I/O backend is available."
)
raise
RuntimeError
(
"No audio I/O backend is available."
)
\ No newline at end of file
paddlespeech/audio/backends/sox_io_backend.py
浏览文件 @
84e5bc03
from
pathlib
import
Path
from
pathlib
import
Path
from
typing
import
Callable
from
typing
import
Callable
from
typing
import
Optional
from
typing
import
Optional
...
@@ -9,21 +8,25 @@ from paddle import Tensor
...
@@ -9,21 +8,25 @@ from paddle import Tensor
#https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py
#https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py
def
load
(
def
load
(
filepath
:
Union
[
str
,
Path
],
filepath
:
Union
[
str
,
Path
],
out
:
Optional
[
Tensor
]
=
None
,
out
:
Optional
[
Tensor
]
=
None
,
normalization
:
Union
[
bool
,
float
,
Callable
]
=
True
,
normalization
:
Union
[
bool
,
float
,
Callable
]
=
True
,
channels_first
:
bool
=
True
,
channels_first
:
bool
=
True
,
num_frames
:
int
=
0
,
num_frames
:
int
=
0
,
offset
:
int
=
0
,
offset
:
int
=
0
,
filetype
:
Optional
[
str
]
=
None
,
filetype
:
Optional
[
str
]
=
None
,
)
->
Tuple
[
Tensor
,
int
]:
)
->
Tuple
[
Tensor
,
int
]:
raise
RuntimeError
(
"No audio I/O backend is available."
)
raise
RuntimeError
(
"No audio I/O backend is available."
)
def
save
(
filepath
:
str
,
src
:
Tensor
,
sample_rate
:
int
,
precision
:
int
=
16
,
channels_first
:
bool
=
True
)
->
None
:
def
save
(
filepath
:
str
,
src
:
Tensor
,
sample_rate
:
int
,
precision
:
int
=
16
,
channels_first
:
bool
=
True
)
->
None
:
raise
RuntimeError
(
"No audio I/O backend is available."
)
raise
RuntimeError
(
"No audio I/O backend is available."
)
def
info
(
filepath
:
str
)
->
None
:
def
info
(
filepath
:
str
)
->
None
:
raise
RuntimeError
(
"No audio I/O backend is available."
)
raise
RuntimeError
(
"No audio I/O backend is available."
)
\ No newline at end of file
paddlespeech/audio/backends/utils.py
浏览文件 @
84e5bc03
...
@@ -40,7 +40,8 @@ def set_audio_backend(backend: Optional[str]):
...
@@ -40,7 +40,8 @@ def set_audio_backend(backend: Optional[str]):
of the system. If ``None`` is provided the current backend is unassigned.
of the system. If ``None`` is provided the current backend is unassigned.
"""
"""
if
backend
is
not
None
and
backend
not
in
list_audio_backends
():
if
backend
is
not
None
and
backend
not
in
list_audio_backends
():
raise
RuntimeError
(
f
'Backend "
{
backend
}
" is not one of '
f
"available backends:
{
list_audio_backends
()
}
."
)
raise
RuntimeError
(
f
'Backend "
{
backend
}
" is not one of '
f
"available backends:
{
list_audio_backends
()
}
."
)
if
backend
is
None
:
if
backend
is
None
:
module
=
no_backend
module
=
no_backend
...
@@ -76,6 +77,7 @@ def _init_audio_backend():
...
@@ -76,6 +77,7 @@ def _init_audio_backend():
warnings
.
warn
(
"No audio backend is available."
)
warnings
.
warn
(
"No audio backend is available."
)
set_audio_backend
(
None
)
set_audio_backend
(
None
)
def
get_audio_backend
()
->
Optional
[
str
]:
def
get_audio_backend
()
->
Optional
[
str
]:
"""Get the name of the current backend
"""Get the name of the current backend
...
@@ -88,4 +90,4 @@ def get_audio_backend() -> Optional[str]:
...
@@ -88,4 +90,4 @@ def get_audio_backend() -> Optional[str]:
return
"sox_io"
return
"sox_io"
if
paddlespeech
.
audio
.
load
==
soundfile_backend
.
load
:
if
paddlespeech
.
audio
.
load
==
soundfile_backend
.
load
:
return
"soundfile"
return
"soundfile"
raise
ValueError
(
"Unknown backend."
)
raise
ValueError
(
"Unknown backend."
)
\ No newline at end of file
paddlespeech/audio/kaldi/kaldi.py
浏览文件 @
84e5bc03
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
paddlespeech.audio._internal
import
module_utils
from
paddlespeech.audio._internal
import
module_utils
import
paddlespeech.audio.ops.paddleaudio.ComputeFbank
as
ComputeFbank
import
paddlespeech.audio.ops.paddleaudio.ComputeFbank
as
ComputeFbank
import
paddlespeech.audio.ops.paddleaudio.PitchExtractionOptions
as
PitchExtractionOptions
import
paddlespeech.audio.ops.paddleaudio.PitchExtractionOptions
as
PitchExtractionOptions
import
paddlespeech.audio.ops.paddleaudio.FrameExtractionOptions
as
FrameExtractionOptions
import
paddlespeech.audio.ops.paddleaudio.FrameExtractionOptions
as
FrameExtractionOptions
...
@@ -27,37 +27,38 @@ __all__ = [
...
@@ -27,37 +27,38 @@ __all__ = [
@
module_utils
.
requires_kaldi
()
@
module_utils
.
requires_kaldi
()
def
fbank
(
wav
,
def
fbank
(
samp_freq
:
int
=
16000
,
wav
,
frame_shift_ms
:
float
=
10.0
,
samp_freq
:
int
=
16000
,
frame_length_ms
:
float
=
25.0
,
frame_shift_ms
:
float
=
10.0
,
dither
:
float
=
0.0
,
frame_length_ms
:
float
=
25.0
,
preemph_coeff
:
float
=
0.97
,
dither
:
float
=
0.0
,
remove_dc_offset
:
bool
=
True
,
preemph_coeff
:
float
=
0.97
,
window_type
:
str
=
'povey'
,
remove_dc_offset
:
bool
=
True
,
round_to_power_of_two
:
bool
=
True
,
window_type
:
str
=
'povey'
,
blackman_coeff
:
float
=
0.42
,
round_to_power_of_two
:
bool
=
True
,
snip_edges
:
bool
=
True
,
blackman_coeff
:
float
=
0.42
,
allow_downsample
:
bool
=
False
,
snip_edges
:
bool
=
True
,
allow_upsample
:
bool
=
False
,
allow_downsample
:
bool
=
False
,
max_feature_vectors
:
int
=-
1
,
allow_upsample
:
bool
=
False
,
num_bins
:
int
=
23
,
max_feature_vectors
:
int
=-
1
,
low_freq
:
float
=
20
,
num_bins
:
int
=
23
,
high_freq
:
float
=
0
,
low_freq
:
float
=
20
,
vtln_low
:
float
=
100
,
high_freq
:
float
=
0
,
vtln_high
:
float
=-
500
,
vtln_low
:
float
=
100
,
debug_mel
:
bool
=
False
,
vtln_high
:
float
=-
500
,
htk_mode
:
bool
=
False
,
debug_mel
:
bool
=
False
,
use_energy
:
bool
=
False
,
# fbank opts
htk_mode
:
bool
=
False
,
energy_floor
:
float
=
0.0
,
use_energy
:
bool
=
False
,
# fbank opts
raw_energy
:
bool
=
True
,
energy_floor
:
float
=
0.0
,
htk_compat
:
bool
=
False
,
raw_energy
:
bool
=
True
,
use_log_fbank
:
bool
=
True
,
htk_compat
:
bool
=
False
,
use_power
:
bool
=
True
):
use_log_fbank
:
bool
=
True
,
use_power
:
bool
=
True
):
frame_opts
=
FrameExtractionOptions
()
frame_opts
=
FrameExtractionOptions
()
mel_opts
=
MelBanksOptions
()
mel_opts
=
MelBanksOptions
()
fbank_opts
=
FbankOptions
()
fbank_opts
=
FbankOptions
()
frame_opts
.
samp_freq
=
samp_freq
frame_opts
.
samp_freq
=
samp_freq
frame_opts
.
frame_shift_ms
=
frame_shift_ms
frame_opts
.
frame_shift_ms
=
frame_shift_ms
frame_opts
.
frame_length_ms
=
frame_length_ms
frame_opts
.
frame_length_ms
=
frame_length_ms
frame_opts
.
dither
=
dither
frame_opts
.
dither
=
dither
...
@@ -71,7 +72,7 @@ def fbank(wav,
...
@@ -71,7 +72,7 @@ def fbank(wav,
frame_opts
.
allow_upsample
=
allow_upsample
frame_opts
.
allow_upsample
=
allow_upsample
frame_opts
.
max_feature_vectors
=
max_feature_vectors
frame_opts
.
max_feature_vectors
=
max_feature_vectors
mel_opts
.
num_bins
=
num_bins
mel_opts
.
num_bins
=
num_bins
mel_opts
.
low_freq
=
low_freq
mel_opts
.
low_freq
=
low_freq
mel_opts
.
high_freq
=
high_freq
mel_opts
.
high_freq
=
high_freq
mel_opts
.
vtln_low
=
vtln_low
mel_opts
.
vtln_low
=
vtln_low
...
@@ -79,7 +80,7 @@ def fbank(wav,
...
@@ -79,7 +80,7 @@ def fbank(wav,
mel_opts
.
debug_mel
=
debug_mel
mel_opts
.
debug_mel
=
debug_mel
mel_opts
.
htk_mode
=
htk_mode
mel_opts
.
htk_mode
=
htk_mode
fbank_opts
.
use_energy
=
use_energy
fbank_opts
.
use_energy
=
use_energy
fbank_opts
.
energy_floor
=
energy_floor
fbank_opts
.
energy_floor
=
energy_floor
fbank_opts
.
raw_energy
=
raw_energy
fbank_opts
.
raw_energy
=
raw_energy
fbank_opts
.
htk_compat
=
htk_compat
fbank_opts
.
htk_compat
=
htk_compat
...
@@ -88,6 +89,7 @@ def fbank(wav,
...
@@ -88,6 +89,7 @@ def fbank(wav,
feat
=
ComputeFbank
(
frame_opts
,
mel_opts
,
fbank_opts
,
wav
)
feat
=
ComputeFbank
(
frame_opts
,
mel_opts
,
fbank_opts
,
wav
)
return
feat
return
feat
@
module_utils
.
requires_kaldi
()
@
module_utils
.
requires_kaldi
()
def
pitch
(
wav
,
def
pitch
(
wav
,
samp_freq
:
int
=
16000
,
samp_freq
:
int
=
16000
,
...
@@ -114,7 +116,7 @@ def pitch(wav,
...
@@ -114,7 +116,7 @@ def pitch(wav,
pitch_opts
.
samp_freq
=
samp_freq
pitch_opts
.
samp_freq
=
samp_freq
pitch_opts
.
frame_shift_ms
=
frame_shift_ms
pitch_opts
.
frame_shift_ms
=
frame_shift_ms
pitch_opts
.
frame_length_ms
=
frame_length_ms
pitch_opts
.
frame_length_ms
=
frame_length_ms
pitch_opts
.
preemph_coeff
=
preemph_coeff
pitch_opts
.
preemph_coeff
=
preemph_coeff
pitch_opts
.
min_f0
=
min_f0
pitch_opts
.
min_f0
=
min_f0
pitch_opts
.
max_f0
=
max_f0
pitch_opts
.
max_f0
=
max_f0
pitch_opts
.
soft_min_f0
=
soft_min_f0
pitch_opts
.
soft_min_f0
=
soft_min_f0
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录