Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
a85250cf
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a85250cf
编写于
3月 09, 2022
作者:
K
KP
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add librosa and soundfile unittest.
上级
169040b4
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
363 addition
and
1 deletion
+363
-1
paddleaudio/setup.py
paddleaudio/setup.py
+3
-1
paddleaudio/tests/backends/__init__.py
paddleaudio/tests/backends/__init__.py
+13
-0
paddleaudio/tests/backends/base.py
paddleaudio/tests/backends/base.py
+34
-0
paddleaudio/tests/backends/soundfile/__init__.py
paddleaudio/tests/backends/soundfile/__init__.py
+13
-0
paddleaudio/tests/backends/soundfile/test_io.py
paddleaudio/tests/backends/soundfile/test_io.py
+73
-0
paddleaudio/tests/features/test_librosa.py
paddleaudio/tests/features/test_librosa.py
+227
-0
未找到文件。
paddleaudio/setup.py
浏览文件 @
a85250cf
...
@@ -82,7 +82,9 @@ setuptools.setup(
...
@@ -82,7 +82,9 @@ setuptools.setup(
'dtaidistance >= 2.3.6'
,
'dtaidistance >= 2.3.6'
,
'mcd >= 0.4'
,
'mcd >= 0.4'
,
],
],
setup_requires
=
[
'nose'
],
setup_requires
=
[
'nose'
,
'librosa==0.8.1'
,
'soundfile==0.10.3.post1'
,
'filecmp'
],
cmdclass
=
{
cmdclass
=
{
'install'
:
InstallCommand
,
'install'
:
InstallCommand
,
'test'
:
NoseTestCommand
,
'test'
:
NoseTestCommand
,
...
...
paddleaudio/tests/backends/__init__.py
0 → 100644
浏览文件 @
a85250cf
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
paddleaudio/tests/backends/base.py
0 → 100644
浏览文件 @
a85250cf
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
unittest
import
urllib.request
mono_channel_wav
=
'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
multi_channels_wav
=
'https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav'
class
BackendTest
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
initWavInput
()
def
initWavInput
(
self
):
self
.
files
=
[]
for
url
in
[
mono_channel_wav
,
multi_channels_wav
]:
if
not
os
.
path
.
isfile
(
os
.
path
.
basename
(
url
)):
urllib
.
request
.
urlretrieve
(
url
,
os
.
path
.
basename
(
url
))
self
.
files
.
append
(
os
.
path
.
basename
(
url
))
def
initParmas
(
self
):
raise
NotImplementedError
paddleaudio/tests/backends/soundfile/__init__.py
0 → 100644
浏览文件 @
a85250cf
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
paddleaudio/tests/backends/soundfile/test_io.py
0 → 100644
浏览文件 @
a85250cf
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
filecmp
import
os
import
unittest
import
numpy
as
np
import
soundfile
as
sf
import
paddleaudio
from
..base
import
BackendTest
class
TestIO
(
BackendTest
):
def
test_load_mono_channel
(
self
):
sf_data
,
sf_sr
=
sf
.
read
(
self
.
files
[
0
])
pa_data
,
pa_sr
=
paddleaudio
.
load
(
self
.
files
[
0
],
normal
=
False
,
dtype
=
'float64'
)
self
.
assertEqual
(
sf_data
.
dtype
,
pa_data
.
dtype
)
self
.
assertEqual
(
sf_sr
,
pa_sr
)
np
.
testing
.
assert_array_almost_equal
(
sf_data
,
pa_data
)
def
test_load_multi_channels
(
self
):
sf_data
,
sf_sr
=
sf
.
read
(
self
.
files
[
1
])
sf_data
=
sf_data
.
T
# Channel dim first
pa_data
,
pa_sr
=
paddleaudio
.
load
(
self
.
files
[
1
],
mono
=
False
,
normal
=
False
,
dtype
=
'float64'
)
self
.
assertEqual
(
sf_data
.
dtype
,
pa_data
.
dtype
)
self
.
assertEqual
(
sf_sr
,
pa_sr
)
np
.
testing
.
assert_array_almost_equal
(
sf_data
,
pa_data
)
def
test_save_mono_channel
(
self
):
waveform
,
sr
=
np
.
random
.
randint
(
low
=-
32768
,
high
=
32768
,
size
=
(
48000
),
dtype
=
np
.
int16
),
16000
sf_tmp_file
=
'sf_tmp.wav'
pa_tmp_file
=
'pa_tmp.wav'
sf
.
write
(
sf_tmp_file
,
waveform
,
sr
)
paddleaudio
.
save
(
waveform
,
sr
,
pa_tmp_file
)
self
.
assertTrue
(
filecmp
.
cmp
(
sf_tmp_file
,
pa_tmp_file
))
for
file
in
[
sf_tmp_file
,
pa_tmp_file
]:
os
.
remove
(
file
)
def
test_save_multi_channels
(
self
):
waveform
,
sr
=
np
.
random
.
randint
(
low
=-
32768
,
high
=
32768
,
size
=
(
2
,
48000
),
dtype
=
np
.
int16
),
16000
sf_tmp_file
=
'sf_tmp.wav'
pa_tmp_file
=
'pa_tmp.wav'
sf
.
write
(
sf_tmp_file
,
waveform
.
T
,
sr
)
paddleaudio
.
save
(
waveform
.
T
,
sr
,
pa_tmp_file
)
self
.
assertTrue
(
filecmp
.
cmp
(
sf_tmp_file
,
pa_tmp_file
))
for
file
in
[
sf_tmp_file
,
pa_tmp_file
]:
os
.
remove
(
file
)
if
__name__
==
'__main__'
:
unittest
.
main
()
paddleaudio/tests/features/test_librosa.py
0 → 100644
浏览文件 @
a85250cf
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
librosa
import
numpy
as
np
import
paddle
import
paddleaudio
from
.base
import
FeatTest
from
paddleaudio.functional.window
import
get_window
class
TestLibrosa
(
FeatTest
):
def
initParmas
(
self
):
self
.
n_fft
=
512
self
.
hop_length
=
128
self
.
n_mels
=
40
self
.
fmin
=
0.0
self
.
window_str
=
'hann'
self
.
pad_mode
=
'reflect'
def
test_stft
(
self
):
if
len
(
self
.
waveform
.
shape
)
==
2
:
# (C, T)
self
.
waveform
=
self
.
waveform
.
squeeze
(
0
)
# 1D input for librosa.feature.melspectrogram
feature_librosa
=
librosa
.
core
.
stft
(
y
=
self
.
waveform
,
n_fft
=
self
.
n_fft
,
hop_length
=
self
.
hop_length
,
win_length
=
None
,
window
=
self
.
window_str
,
center
=
True
,
dtype
=
None
,
pad_mode
=
self
.
pad_mode
,
)
x
=
paddle
.
to_tensor
(
self
.
waveform
).
unsqueeze
(
0
)
window
=
get_window
(
self
.
window_str
,
self
.
n_fft
,
dtype
=
x
.
dtype
)
feature_paddle
=
paddle
.
signal
.
stft
(
x
=
x
,
n_fft
=
self
.
n_fft
,
hop_length
=
self
.
hop_length
,
win_length
=
None
,
window
=
window
,
center
=
True
,
pad_mode
=
self
.
pad_mode
,
normalized
=
False
,
onesided
=
True
,
).
squeeze
(
0
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddle
,
decimal
=
5
)
def
test_istft
(
self
):
if
len
(
self
.
waveform
.
shape
)
==
2
:
# (C, T)
self
.
waveform
=
self
.
waveform
.
squeeze
(
0
)
# 1D input for librosa.feature.melspectrogram
# Get stft result from librosa.
stft_matrix
=
librosa
.
core
.
stft
(
y
=
self
.
waveform
,
n_fft
=
self
.
n_fft
,
hop_length
=
self
.
hop_length
,
win_length
=
None
,
window
=
self
.
window_str
,
center
=
True
,
pad_mode
=
self
.
pad_mode
,
)
feature_librosa
=
librosa
.
core
.
istft
(
stft_matrix
=
stft_matrix
,
hop_length
=
self
.
hop_length
,
win_length
=
None
,
window
=
self
.
window_str
,
center
=
True
,
dtype
=
None
,
length
=
None
,
)
x
=
paddle
.
to_tensor
(
stft_matrix
).
unsqueeze
(
0
)
window
=
get_window
(
self
.
window_str
,
self
.
n_fft
,
dtype
=
paddle
.
to_tensor
(
self
.
waveform
).
dtype
)
feature_paddle
=
paddle
.
signal
.
istft
(
x
=
x
,
n_fft
=
self
.
n_fft
,
hop_length
=
self
.
hop_length
,
win_length
=
None
,
window
=
window
,
center
=
True
,
normalized
=
False
,
onesided
=
True
,
length
=
None
,
return_complex
=
False
,
).
squeeze
(
0
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddle
,
decimal
=
5
)
def
test_mel
(
self
):
feature_librosa
=
librosa
.
filters
.
mel
(
sr
=
self
.
sr
,
n_fft
=
self
.
n_fft
,
n_mels
=
self
.
n_mels
,
fmin
=
self
.
fmin
,
fmax
=
None
,
htk
=
False
,
norm
=
'slaney'
,
dtype
=
self
.
waveform
.
dtype
,
)
feature_compliance
=
paddleaudio
.
compliance
.
librosa
.
compute_fbank_matrix
(
sr
=
self
.
sr
,
n_fft
=
self
.
n_fft
,
n_mels
=
self
.
n_mels
,
fmin
=
self
.
fmin
,
fmax
=
None
,
htk
=
False
,
norm
=
'slaney'
,
dtype
=
self
.
waveform
.
dtype
,
)
x
=
paddle
.
to_tensor
(
self
.
waveform
)
feature_functional
=
paddleaudio
.
functional
.
compute_fbank_matrix
(
sr
=
self
.
sr
,
n_fft
=
self
.
n_fft
,
n_mels
=
self
.
n_mels
,
f_min
=
self
.
fmin
,
f_max
=
None
,
htk
=
False
,
norm
=
'slaney'
,
dtype
=
x
.
dtype
,
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_compliance
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_functional
)
def
test_melspect
(
self
):
if
len
(
self
.
waveform
.
shape
)
==
2
:
# (C, T)
self
.
waveform
=
self
.
waveform
.
squeeze
(
0
)
# 1D input for librosa.feature.melspectrogram
# librosa:
feature_librosa
=
librosa
.
feature
.
melspectrogram
(
y
=
self
.
waveform
,
sr
=
self
.
sr
,
n_fft
=
self
.
n_fft
,
hop_length
=
self
.
hop_length
,
n_mels
=
self
.
n_mels
,
fmin
=
self
.
fmin
)
# paddleaudio.compliance.librosa:
feature_compliance
=
paddleaudio
.
compliance
.
librosa
.
melspectrogram
(
x
=
self
.
waveform
,
sr
=
self
.
sr
,
window_size
=
self
.
n_fft
,
hop_length
=
self
.
hop_length
,
n_mels
=
self
.
n_mels
,
fmin
=
self
.
fmin
,
to_db
=
False
)
# paddleaudio.features.layer
x
=
paddle
.
to_tensor
(
self
.
waveform
,
dtype
=
paddle
.
float64
).
unsqueeze
(
0
)
# Add batch dim.
feature_extractor
=
paddleaudio
.
features
.
MelSpectrogram
(
sr
=
self
.
sr
,
n_fft
=
self
.
n_fft
,
hop_length
=
self
.
hop_length
,
n_mels
=
self
.
n_mels
,
f_min
=
self
.
fmin
,
dtype
=
x
.
dtype
)
feature_layer
=
feature_extractor
(
x
).
squeeze
(
0
).
numpy
()
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_compliance
,
decimal
=
5
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_layer
,
decimal
=
5
)
def
test_log_melspect
(
self
):
if
len
(
self
.
waveform
.
shape
)
==
2
:
# (C, T)
self
.
waveform
=
self
.
waveform
.
squeeze
(
0
)
# 1D input for librosa.feature.melspectrogram
# librosa:
feature_librosa
=
librosa
.
feature
.
melspectrogram
(
y
=
self
.
waveform
,
sr
=
self
.
sr
,
n_fft
=
self
.
n_fft
,
hop_length
=
self
.
hop_length
,
n_mels
=
self
.
n_mels
,
fmin
=
self
.
fmin
)
feature_librosa
=
librosa
.
power_to_db
(
feature_librosa
,
top_db
=
None
)
# paddleaudio.compliance.librosa:
feature_compliance
=
paddleaudio
.
compliance
.
librosa
.
melspectrogram
(
x
=
self
.
waveform
,
sr
=
self
.
sr
,
window_size
=
self
.
n_fft
,
hop_length
=
self
.
hop_length
,
n_mels
=
self
.
n_mels
,
fmin
=
self
.
fmin
)
# paddleaudio.features.layer
x
=
paddle
.
to_tensor
(
self
.
waveform
,
dtype
=
paddle
.
float64
).
unsqueeze
(
0
)
# Add batch dim.
feature_extractor
=
paddleaudio
.
features
.
LogMelSpectrogram
(
sr
=
self
.
sr
,
n_fft
=
self
.
n_fft
,
hop_length
=
self
.
hop_length
,
n_mels
=
self
.
n_mels
,
f_min
=
self
.
fmin
,
dtype
=
x
.
dtype
)
feature_layer
=
feature_extractor
(
x
).
squeeze
(
0
).
numpy
()
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_compliance
,
decimal
=
5
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_layer
,
decimal
=
4
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录