Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
220fe203
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
220fe203
编写于
6月 15, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test with dither, remove dc offset, preermphs
上级
42f93b2c
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
50 addition
and
43 deletion
+50
-43
third_party/paddle_audio/frontend/kaldi.py
third_party/paddle_audio/frontend/kaldi.py
+25
-24
third_party/paddle_audio/frontend/kaldi_test.py
third_party/paddle_audio/frontend/kaldi_test.py
+25
-19
未找到文件。
third_party/paddle_audio/frontend/kaldi.py
浏览文件 @
220fe203
...
...
@@ -92,7 +92,8 @@ def dither(signal:Tensor, dither_value=1.0)->Tensor:
Returns:
Tensor: [B, T, D]
"""
signal
+=
paddle
.
normal
(
shape
=
[
1
,
1
,
signal
.
shape
[
-
1
]])
*
dither_value
D
=
paddle
.
shape
(
signal
)[
-
1
]
signal
+=
paddle
.
normal
(
shape
=
[
1
,
1
,
D
])
*
dither_value
return
signal
...
...
@@ -105,7 +106,7 @@ def remove_dc_offset(signal:Tensor)->Tensor:
Returns:
Tensor: [B, T, D]
"""
signal
-=
paddle
.
mean
(
signal
,
axis
=-
1
)
signal
-=
paddle
.
mean
(
signal
,
axis
=-
1
,
keepdim
=
True
)
return
signal
def
preemphasis
(
signal
:
Tensor
,
coeff
=
0.97
)
->
Tensor
:
...
...
@@ -151,7 +152,7 @@ class STFT(nn.Layer):
sr
:
int
,
win_length
:
float
,
stride_length
:
float
,
dither
:
float
=
1
.0
,
dither
:
float
=
0
.0
,
preemph_coeff
:
float
=
0.97
,
remove_dc_offset
:
bool
=
True
,
window_type
:
str
=
'povey'
,
...
...
@@ -203,7 +204,7 @@ class STFT(nn.Layer):
batch_size
=
paddle
.
shape
(
num_samples
)
F
,
nframe
=
frames
(
x
,
num_samples
,
self
.
sr
,
self
.
win_length
,
self
.
stride_length
,
clip
=
self
.
clip
)
if
self
.
dither
:
F
=
dither
(
F
,
dither
)
F
=
dither
(
F
,
self
.
dither
)
if
self
.
remove_dc_offset
:
F
=
remove_dc_offset
(
F
)
if
self
.
preemph_coeff
:
...
...
third_party/paddle_audio/frontend/kaldi_test.py
浏览文件 @
220fe203
...
...
@@ -397,20 +397,18 @@ class TestKaldiFE(unittest.TestCase):
self
.
assertEqual
(
t_nframe
.
item
(),
fs
.
shape
[
0
])
self
.
assertTrue
(
np
.
allclose
(
t_fs
.
numpy
(),
fs
))
def
test_stft
(
self
):
sr
,
wav
=
kaldi
.
read
(
self
.
wavpath
)
wav
=
wav
[:,
0
]
for
wintype
in
[
''
,
'hamm'
,
'hann'
,
'povey'
]:
print
(
wintype
)
self
.
wintype
=
wintype
_
,
stft_c_win
,
_
,
_
=
stft_with_window
(
wav
,
samplerate
=
sr
,
winlen
=
self
.
winlen
,
winstep
=
self
.
winstep
,
nfilt
=
self
.
nfilt
,
nfft
=
self
.
nfft
,
lowfreq
=
self
.
lowfreq
,
highfreq
=
self
.
highfreq
,
wintype
=
self
.
wintype
)
print
(
'py'
,
stft_c_win
.
real
)
print
(
'py'
,
stft_c_win
.
imag
)
t_wav
=
paddle
.
to_tensor
([
wav
],
dtype
=
'float32'
)
t_wavlen
=
paddle
.
to_tensor
([
len
(
wav
)])
...
...
@@ -420,33 +418,26 @@ class TestKaldiFE(unittest.TestCase):
t_stft
=
t_stft
.
astype
(
stft_c_win
.
real
.
dtype
)[
0
]
t_real
=
t_stft
[:,
:,
0
]
t_imag
=
t_stft
[:,
:,
1
]
print
(
'pd'
,
t_real
.
numpy
())
print
(
'pd'
,
t_imag
.
numpy
())
self
.
assertEqual
(
t_nframe
.
item
(),
stft_c_win
.
real
.
shape
[
0
])
self
.
assertLess
(
np
.
sum
(
t_real
.
numpy
())
-
np
.
sum
(
stft_c_win
.
real
),
1
)
print
(
np
.
sum
(
t_real
.
numpy
()))
print
(
np
.
sum
(
stft_c_win
.
real
))
self
.
assertTrue
(
np
.
allclose
(
t_real
.
numpy
(),
stft_c_win
.
real
,
atol
=
1e-1
))
self
.
assertLess
(
np
.
sum
(
t_imag
.
numpy
())
-
np
.
sum
(
stft_c_win
.
imag
),
1
)
print
(
np
.
sum
(
t_imag
.
numpy
()))
print
(
np
.
sum
(
stft_c_win
.
imag
))
self
.
assertTrue
(
np
.
allclose
(
t_imag
.
numpy
(),
stft_c_win
.
imag
,
atol
=
1e-1
))
def
test_magspec
(
self
):
sr
,
wav
=
kaldi
.
read
(
self
.
wavpath
)
wav
=
wav
[:,
0
]
for
wintype
in
[
''
,
'hamm'
,
'hann'
,
'povey'
]:
print
(
wintype
)
self
.
wintype
=
wintype
stft_win
,
_
,
_
,
_
=
stft_with_window
(
wav
,
samplerate
=
sr
,
winlen
=
self
.
winlen
,
winstep
=
self
.
winstep
,
nfilt
=
self
.
nfilt
,
nfft
=
self
.
nfft
,
lowfreq
=
self
.
lowfreq
,
highfreq
=
self
.
highfreq
,
wintype
=
self
.
wintype
)
print
(
'py'
,
stft_win
)
t_wav
=
paddle
.
to_tensor
([
wav
],
dtype
=
'float32'
)
t_wavlen
=
paddle
.
to_tensor
([
len
(
wav
)])
...
...
@@ -455,20 +446,39 @@ class TestKaldiFE(unittest.TestCase):
t_stft
,
t_nframe
=
stft_class
(
t_wav
,
t_wavlen
)
t_stft
=
t_stft
.
astype
(
stft_win
.
dtype
)
t_spec
=
kaldi
.
magspec
(
t_stft
)[
0
]
print
(
'pd'
,
t_spec
.
numpy
())
self
.
assertEqual
(
t_nframe
.
item
(),
stft_win
.
shape
[
0
])
self
.
assertLess
(
np
.
sum
(
t_spec
.
numpy
())
-
np
.
sum
(
stft_win
),
1
)
print
(
np
.
sum
(
t_spec
.
numpy
()))
print
(
np
.
sum
(
stft_win
))
self
.
assertTrue
(
np
.
allclose
(
t_spec
.
numpy
(),
stft_win
,
atol
=
1e-1
))
def
test_magsepc_winprocess
(
self
):
sr
,
wav
=
kaldi
.
read
(
self
.
wavpath
)
wav
=
wav
[:,
0
]
fs
,
_
=
framesig
(
wav
,
self
.
winlen
*
sr
,
self
.
winstep
*
sr
,
dither
=
0.0
,
preemph
=
0.97
,
remove_dc_offset
=
True
,
wintype
=
'povey'
,
stride_trick
=
True
)
spec
=
magspec
(
fs
,
self
.
nfft
)
# nearly the same until this part
t_wav
=
paddle
.
to_tensor
([
wav
],
dtype
=
'float32'
)
t_wavlen
=
paddle
.
to_tensor
([
len
(
wav
)])
stft_class
=
kaldi
.
STFT
(
self
.
nfft
,
sr
,
self
.
winlen
,
self
.
winstep
,
window_type
=
'povey'
,
dither
=
0.0
,
preemph_coeff
=
0.97
,
remove_dc_offset
=
True
,
clip
=
False
)
t_stft
,
t_nframe
=
stft_class
(
t_wav
,
t_wavlen
)
t_stft
=
t_stft
.
astype
(
spec
.
dtype
)
t_spec
=
kaldi
.
magspec
(
t_stft
)[
0
]
self
.
assertEqual
(
t_nframe
.
item
(),
fs
.
shape
[
0
])
self
.
assertLess
(
np
.
sum
(
t_spec
.
numpy
())
-
np
.
sum
(
spec
),
1
)
self
.
assertTrue
(
np
.
allclose
(
t_spec
.
numpy
(),
spec
,
atol
=
1e-1
))
def
test_powspec
(
self
):
sr
,
wav
=
kaldi
.
read
(
self
.
wavpath
)
wav
=
wav
[:,
0
]
for
wintype
in
[
''
,
'hamm'
,
'hann'
,
'povey'
]:
print
(
wintype
)
self
.
wintype
=
wintype
stft_win
,
_
,
_
,
_
=
stft_with_window
(
wav
,
samplerate
=
sr
,
winlen
=
self
.
winlen
,
winstep
=
self
.
winstep
,
...
...
@@ -476,7 +486,6 @@ class TestKaldiFE(unittest.TestCase):
lowfreq
=
self
.
lowfreq
,
highfreq
=
self
.
highfreq
,
wintype
=
self
.
wintype
)
stft_win
=
np
.
square
(
stft_win
)
print
(
'py'
,
stft_win
)
t_wav
=
paddle
.
to_tensor
([
wav
],
dtype
=
'float32'
)
t_wavlen
=
paddle
.
to_tensor
([
len
(
wav
)])
...
...
@@ -485,13 +494,10 @@ class TestKaldiFE(unittest.TestCase):
t_stft
,
t_nframe
=
stft_class
(
t_wav
,
t_wavlen
)
t_stft
=
t_stft
.
astype
(
stft_win
.
dtype
)
t_spec
=
kaldi
.
powspec
(
t_stft
)[
0
]
print
(
'pd'
,
t_spec
.
numpy
())
self
.
assertEqual
(
t_nframe
.
item
(),
stft_win
.
shape
[
0
])
self
.
assertLess
(
np
.
sum
(
t_spec
.
numpy
()
-
stft_win
),
5e4
)
print
(
np
.
sum
(
t_spec
.
numpy
()))
print
(
np
.
sum
(
stft_win
))
self
.
assertTrue
(
np
.
allclose
(
t_spec
.
numpy
(),
stft_win
,
atol
=
1e2
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录