Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
f8301e6f
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
f8301e6f
编写于
3月 15, 2022
作者:
K
KP
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add benchmark.
上级
c3e0a8dd
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
265 addition
and
144 deletion
+265
-144
paddleaudio/setup.py
paddleaudio/setup.py
+16
-5
paddleaudio/tests/benchmark/README.md
paddleaudio/tests/benchmark/README.md
+13
-21
paddleaudio/tests/benchmark/log_melspectrogram.py
paddleaudio/tests/benchmark/log_melspectrogram.py
+6
-118
paddleaudio/tests/benchmark/melspectrogram.py
paddleaudio/tests/benchmark/melspectrogram.py
+108
-0
paddleaudio/tests/benchmark/mfcc.py
paddleaudio/tests/benchmark/mfcc.py
+122
-0
未找到文件。
paddleaudio/setup.py
浏览文件 @
f8301e6f
...
@@ -11,26 +11,37 @@
...
@@ -11,26 +11,37 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
glob
import
os
import
setuptools
import
setuptools
from
setuptools.command.install
import
install
from
setuptools.command.install
import
install
from
setuptools.command.test
import
test
as
TestCommand
from
setuptools.command.test
import
test
# set the version here
# set the version here
VERSION
=
'0.2.0'
VERSION
=
'0.2.0'
# Inspired by the example at https://pytest.org/latest/goodpractises.html
# Inspired by the example at https://pytest.org/latest/goodpractises.html
class
NoseTestCommand
(
TestCommand
):
class
TestCommand
(
test
):
def
finalize_options
(
self
):
def
finalize_options
(
self
):
TestCommand
.
finalize_options
(
self
)
test
.
finalize_options
(
self
)
self
.
test_args
=
[]
self
.
test_args
=
[]
self
.
test_suite
=
True
self
.
test_suite
=
True
def
run
(
self
):
self
.
run_benchmark
()
super
(
TestCommand
,
self
).
run
()
def
run_tests
(
self
):
def
run_tests
(
self
):
# Run nose ensuring that argv simulates running nosetests directly
# Run nose ensuring that argv simulates running nosetests directly
import
nose
import
nose
nose
.
run_exit
(
argv
=
[
'nosetests'
,
'-w'
,
'tests'
])
nose
.
run_exit
(
argv
=
[
'nosetests'
,
'-w'
,
'tests'
])
def
run_benchmark
(
self
):
for
benchmark_item
in
glob
.
glob
(
'tests/benchmark/*py'
):
os
.
system
(
f
'pytest
{
benchmark_item
}
'
)
class
InstallCommand
(
install
):
class
InstallCommand
(
install
):
def
run
(
self
):
def
run
(
self
):
...
@@ -84,11 +95,11 @@ setuptools.setup(
...
@@ -84,11 +95,11 @@ setuptools.setup(
],
],
setup_requires
=
[
setup_requires
=
[
'nose'
,
'librosa==0.8.1'
,
'soundfile==0.10.3.post1'
,
'nose'
,
'librosa==0.8.1'
,
'soundfile==0.10.3.post1'
,
'torchaudio==0.10.2'
'torchaudio==0.10.2'
,
'pytest-benchmark'
],
],
cmdclass
=
{
cmdclass
=
{
'install'
:
InstallCommand
,
'install'
:
InstallCommand
,
'test'
:
Nose
TestCommand
,
'test'
:
TestCommand
,
},
)
},
)
remove_version_py
()
remove_version_py
()
paddleaudio/tests/benchmark/README.md
浏览文件 @
f8301e6f
...
@@ -7,7 +7,7 @@ pip install pytest-benchmark
...
@@ -7,7 +7,7 @@ pip install pytest-benchmark
# 2. Run
# 2. Run
Run the specific script for profiling.
Run the specific script for profiling.
```
sh
```
sh
pytest
features
.py
pytest
melspectrogram
.py
```
```
Result:
Result:
...
@@ -17,31 +17,23 @@ platform linux -- Python 3.7.7, pytest-7.0.1, pluggy-1.0.0
...
@@ -17,31 +17,23 @@ platform linux -- Python 3.7.7, pytest-7.0.1, pluggy-1.0.0
benchmark: 3.4.1
(
defaults:
timer
=
time.perf_counter
disable_gc
=
False
min_rounds
=
5
min_time
=
0.000005
max_time
=
1.0
calibration_precision
=
10
warmup
=
False
warmup_iterations
=
100000
)
benchmark: 3.4.1
(
defaults:
timer
=
time.perf_counter
disable_gc
=
False
min_rounds
=
5
min_time
=
0.000005
max_time
=
1.0
calibration_precision
=
10
warmup
=
False
warmup_iterations
=
100000
)
rootdir: /ssd3/chenxiaojie06/PaddleSpeech/DeepSpeech/paddleaudio
rootdir: /ssd3/chenxiaojie06/PaddleSpeech/DeepSpeech/paddleaudio
plugins: typeguard-2.12.1, benchmark-3.4.1, anyio-3.5.0
plugins: typeguard-2.12.1, benchmark-3.4.1, anyio-3.5.0
collected
12
items
collected
4
items
features.py ............
[
100%]
melspectrogram.py ....
[
100%]
--------------------------------------------------------------------------------------------------
--
benchmark: 12 tests
---
-------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------
benchmark: 4 tests
-------------------------------------------------------------------------------------------------
Name
(
time
in
us
)
Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
Name
(
time
in
us
)
Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_melspect_gpu_torchaudio 210.7229
(
1.0
)
338.5879
(
1.0
)
217.4949
(
1.0
)
11.3591
(
1.02
)
214.0319
(
1.0
)
8.3707
(
1.0
)
6
;
5 4,597.8093
(
1.0
)
186 1
test_melspect_gpu_torchaudio 202.0765
(
1.0
)
360.6230
(
1.0
)
218.1168
(
1.0
)
16.3022
(
1.0
)
214.2871
(
1.0
)
21.8451
(
1.0
)
40
;
3 4,584.7001
(
1.0
)
286 1
test_log_melspect_gpu_torchaudio 375.4422
(
1.78
)
1,024.8050
(
3.03
)
387.3589
(
1.78
)
18.7080
(
1.69
)
385.2872
(
1.80
)
9.4259
(
1.13
)
31
;
31 2,581.5853
(
0.56
)
1420 1
test_melspect_gpu 657.8509
(
3.26
)
908.0470
(
2.52
)
724.2545
(
3.32
)
106.5771
(
6.54
)
669.9096
(
3.13
)
113.4719
(
5.19
)
1
;
0 1,380.7300
(
0.30
)
5 1
test_mfcc_gpu_torchaudio 422.4107
(
2.00
)
700.7364
(
2.07
)
454.9903
(
2.09
)
47.3926
(
4.27
)
436.6031
(
2.04
)
15.4376
(
1.84
)
159
;
193 2,197.8493
(
0.48
)
1078 1
test_melspect_cpu_torchaudio 1,247.6053
(
6.17
)
2,892.5799
(
8.02
)
1,443.2853
(
6.62
)
345.3732
(
21.19
)
1,262.7263
(
5.89
)
221.6385
(
10.15
)
56
;
53 692.8637
(
0.15
)
399 1
test_melspect_gpu 819.3776
(
3.89
)
1,161.9311
(
3.43
)
900.9168
(
4.14
)
147.0245
(
13.26
)
830.7453
(
3.88
)
115.4500
(
13.79
)
1
;
1 1,109.9805
(
0.24
)
5 1
test_melspect_cpu 20,326.2549
(
100.59
)
20,607.8682
(
57.15
)
20,473.4125
(
93.86
)
63.8654
(
3.92
)
20,467.0429
(
95.51
)
68.4294
(
3.13
)
8
;
1 48.8438
(
0.01
)
29 1
test_log_melspect_gpu 1,197.9323
(
5.68
)
1,280.0004
(
3.78
)
1,214.0182
(
5.58
)
11.0918
(
1.0
)
1,211.6358
(
5.66
)
10.0820
(
1.20
)
84
;
31 823.7109
(
0.18
)
533 1
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_mfcc_gpu 1,337.0719
(
6.35
)
1,601.5675
(
4.73
)
1,355.4527
(
6.23
)
26.4458
(
2.38
)
1,348.6911
(
6.30
)
13.1410
(
1.57
)
16
;
17 737.7609
(
0.16
)
193 1
test_melspect_cpu_torchaudio 1,374.8817
(
6.52
)
3,937.5033
(
11.63
)
1,574.8930
(
7.24
)
355.4223
(
32.04
)
1,409.1432
(
6.58
)
193.7435
(
23.15
)
36
;
49 634.9638
(
0.14
)
291 1
test_log_melspect_cpu_torchaudio 1,390.2634
(
6.60
)
2,121.2976
(
6.27
)
1,559.3045
(
7.17
)
220.3090
(
19.86
)
1,409.4356
(
6.59
)
349.1524
(
41.71
)
106
;
0 641.3116
(
0.14
)
445 1
test_mfcc_cpu_torchaudio 1,445.6678
(
6.86
)
3,801.8432
(
11.23
)
1,680.8559
(
7.73
)
395.5443
(
35.66
)
1,469.8748
(
6.87
)
305.6149
(
36.51
)
38
;
35 594.9350
(
0.13
)
469 1
test_melspect_cpu 20,620.2641
(
97.85
)
20,984.0760
(
61.98
)
20,721.4942
(
95.27
)
70.2757
(
6.34
)
20,717.8025
(
96.80
)
57.8668
(
6.91
)
6
;
2 48.2591
(
0.01
)
30 1
test_log_melspect_cpu 21,025.3932
(
99.78
)
48,894.0198
(
144.41
)
23,057.7049
(
106.01
)
5,440.3207
(
490.48
)
21,190.5045
(
99.01
)
190.0699
(
22.71
)
4
;
9 43.3695
(
0.01
)
44 1
test_mfcc_cpu 21,127.2798
(
100.26
)
45,811.5358
(
135.30
)
23,176.4022
(
106.56
)
5,041.0751
(
454.49
)
21,319.1714
(
99.61
)
149.0396
(
17.80
)
5
;
9 43.1473
(
0.01
)
44 1
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Legend:
Legend:
Outliers: 1 Standard Deviation from Mean
;
1.5 IQR
(
InterQuartile Range
)
from 1st Quartile and 3rd Quartile.
Outliers: 1 Standard Deviation from Mean
;
1.5 IQR
(
InterQuartile Range
)
from 1st Quartile and 3rd Quartile.
OPS: Operations Per Second, computed as 1 / Mean
OPS: Operations Per Second, computed as 1 / Mean
==========================================================================
12 passed
in
26.81s
==========================================================================
==========================================================================
4 passed
in
21.12s
=
==========================================================================
```
```
paddleaudio/tests/benchmark/
features
.py
→
paddleaudio/tests/benchmark/
log_melspectrogram
.py
浏览文件 @
f8301e6f
...
@@ -37,11 +37,6 @@ mel_conf = {
...
@@ -37,11 +37,6 @@ mel_conf = {
'hop_length'
:
128
,
'hop_length'
:
128
,
'n_mels'
:
40
,
'n_mels'
:
40
,
}
}
mfcc_conf
=
{
'n_mfcc'
:
20
,
'top_db'
:
80.0
,
}
mfcc_conf
.
update
(
mel_conf
)
mel_conf_torchaudio
=
{
mel_conf_torchaudio
=
{
'sample_rate'
:
sr
,
'sample_rate'
:
sr
,
...
@@ -51,10 +46,6 @@ mel_conf_torchaudio = {
...
@@ -51,10 +46,6 @@ mel_conf_torchaudio = {
'norm'
:
'slaney'
,
'norm'
:
'slaney'
,
'mel_scale'
:
'slaney'
,
'mel_scale'
:
'slaney'
,
}
}
mfcc_conf_torchaudio
=
{
'sample_rate'
:
sr
,
'n_mfcc'
:
20
,
}
def
enable_cpu_device
():
def
enable_cpu_device
():
...
@@ -65,58 +56,6 @@ def enable_gpu_device():
...
@@ -65,58 +56,6 @@ def enable_gpu_device():
paddle
.
set_device
(
'gpu'
)
paddle
.
set_device
(
'gpu'
)
mel_extractor
=
paddleaudio
.
features
.
MelSpectrogram
(
**
mel_conf
,
f_min
=
0.0
,
dtype
=
waveform_tensor
.
dtype
)
def
melspectrogram
():
return
mel_extractor
(
waveform_tensor
).
squeeze
(
0
)
def
test_melspect_cpu
(
benchmark
):
enable_cpu_device
()
feature_paddleaudio
=
benchmark
(
melspectrogram
)
feature_librosa
=
librosa
.
feature
.
melspectrogram
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddleaudio
,
decimal
=
3
)
def
test_melspect_gpu
(
benchmark
):
enable_gpu_device
()
feature_paddleaudio
=
benchmark
(
melspectrogram
)
feature_librosa
=
librosa
.
feature
.
melspectrogram
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddleaudio
,
decimal
=
3
)
mel_extractor_torchaudio
=
torchaudio
.
transforms
.
MelSpectrogram
(
**
mel_conf_torchaudio
,
f_min
=
0.0
)
def
melspectrogram_torchaudio
():
return
mel_extractor_torchaudio
(
waveform_tensor_torch
).
squeeze
(
0
)
def
test_melspect_cpu_torchaudio
(
benchmark
):
global
waveform_tensor_torch
,
mel_extractor_torchaudio
mel_extractor_torchaudio
=
mel_extractor_torchaudio
.
to
(
'cpu'
)
waveform_tensor_torch
=
waveform_tensor_torch
.
to
(
'cpu'
)
feature_paddleaudio
=
benchmark
(
melspectrogram_torchaudio
)
feature_librosa
=
librosa
.
feature
.
melspectrogram
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddleaudio
,
decimal
=
3
)
def
test_melspect_gpu_torchaudio
(
benchmark
):
global
waveform_tensor_torch
,
mel_extractor_torchaudio
mel_extractor_torchaudio
=
mel_extractor_torchaudio
.
to
(
'cuda'
)
waveform_tensor_torch
=
waveform_tensor_torch
.
to
(
'cuda'
)
feature_torchaudio
=
benchmark
(
melspectrogram_torchaudio
)
feature_librosa
=
librosa
.
feature
.
melspectrogram
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_torchaudio
.
cpu
(),
decimal
=
3
)
log_mel_extractor
=
paddleaudio
.
features
.
LogMelSpectrogram
(
log_mel_extractor
=
paddleaudio
.
features
.
LogMelSpectrogram
(
**
mel_conf
,
f_min
=
0.0
,
top_db
=
80.0
,
dtype
=
waveform_tensor
.
dtype
)
**
mel_conf
,
f_min
=
0.0
,
top_db
=
80.0
,
dtype
=
waveform_tensor
.
dtype
)
...
@@ -143,9 +82,15 @@ def test_log_melspect_gpu(benchmark):
...
@@ -143,9 +82,15 @@ def test_log_melspect_gpu(benchmark):
feature_librosa
,
feature_paddleaudio
,
decimal
=
2
)
feature_librosa
,
feature_paddleaudio
,
decimal
=
2
)
mel_extractor_torchaudio
=
torchaudio
.
transforms
.
MelSpectrogram
(
**
mel_conf_torchaudio
,
f_min
=
0.0
)
amplitude_to_DB
=
torchaudio
.
transforms
.
AmplitudeToDB
(
'power'
,
top_db
=
80.0
)
amplitude_to_DB
=
torchaudio
.
transforms
.
AmplitudeToDB
(
'power'
,
top_db
=
80.0
)
def
melspectrogram_torchaudio
():
return
mel_extractor_torchaudio
(
waveform_tensor_torch
).
squeeze
(
0
)
def
log_melspectrogram_torchaudio
():
def
log_melspectrogram_torchaudio
():
mel_specgram
=
mel_extractor_torchaudio
(
waveform_tensor_torch
)
mel_specgram
=
mel_extractor_torchaudio
(
waveform_tensor_torch
)
return
amplitude_to_DB
(
mel_specgram
).
squeeze
(
0
)
return
amplitude_to_DB
(
mel_specgram
).
squeeze
(
0
)
...
@@ -177,60 +122,3 @@ def test_log_melspect_gpu_torchaudio(benchmark):
...
@@ -177,60 +122,3 @@ def test_log_melspect_gpu_torchaudio(benchmark):
feature_librosa
=
librosa
.
power_to_db
(
feature_librosa
,
top_db
=
80.0
)
feature_librosa
=
librosa
.
power_to_db
(
feature_librosa
,
top_db
=
80.0
)
np
.
testing
.
assert_array_almost_equal
(
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_torchaudio
.
cpu
(),
decimal
=
2
)
feature_librosa
,
feature_torchaudio
.
cpu
(),
decimal
=
2
)
mfcc_extractor
=
paddleaudio
.
features
.
MFCC
(
**
mfcc_conf
,
f_min
=
0.0
,
dtype
=
waveform_tensor
.
dtype
)
def
mfcc
():
return
mfcc_extractor
(
waveform_tensor
).
squeeze
(
0
)
def
test_mfcc_cpu
(
benchmark
):
enable_cpu_device
()
feature_paddleaudio
=
benchmark
(
mfcc
)
feature_librosa
=
librosa
.
feature
.
mfcc
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddleaudio
,
decimal
=
3
)
def
test_mfcc_gpu
(
benchmark
):
enable_gpu_device
()
feature_paddleaudio
=
benchmark
(
mfcc
)
feature_librosa
=
librosa
.
feature
.
mfcc
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddleaudio
,
decimal
=
3
)
del
mel_conf_torchaudio
[
'sample_rate'
]
mfcc_extractor_torchaudio
=
torchaudio
.
transforms
.
MFCC
(
**
mfcc_conf_torchaudio
,
melkwargs
=
mel_conf_torchaudio
)
def
mfcc_torchaudio
():
return
mfcc_extractor_torchaudio
(
waveform_tensor_torch
).
squeeze
(
0
)
def
test_mfcc_cpu_torchaudio
(
benchmark
):
global
waveform_tensor_torch
,
mfcc_extractor_torchaudio
mel_extractor_torchaudio
=
mfcc_extractor_torchaudio
.
to
(
'cpu'
)
waveform_tensor_torch
=
waveform_tensor_torch
.
to
(
'cpu'
)
feature_paddleaudio
=
benchmark
(
mfcc_torchaudio
)
feature_librosa
=
librosa
.
feature
.
mfcc
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddleaudio
,
decimal
=
3
)
def
test_mfcc_gpu_torchaudio
(
benchmark
):
global
waveform_tensor_torch
,
mfcc_extractor_torchaudio
mel_extractor_torchaudio
=
mfcc_extractor_torchaudio
.
to
(
'cuda'
)
waveform_tensor_torch
=
waveform_tensor_torch
.
to
(
'cuda'
)
feature_torchaudio
=
benchmark
(
mfcc_torchaudio
)
feature_librosa
=
librosa
.
feature
.
mfcc
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_torchaudio
.
cpu
(),
decimal
=
3
)
paddleaudio/tests/benchmark/melspectrogram.py
0 → 100644
浏览文件 @
f8301e6f
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
urllib.request
import
librosa
import
numpy
as
np
import
paddle
import
torch
import
torchaudio
import
paddleaudio
wav_url
=
'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
if
not
os
.
path
.
isfile
(
os
.
path
.
basename
(
wav_url
)):
urllib
.
request
.
urlretrieve
(
wav_url
,
os
.
path
.
basename
(
wav_url
))
waveform
,
sr
=
paddleaudio
.
load
(
os
.
path
.
abspath
(
os
.
path
.
basename
(
wav_url
)))
waveform_tensor
=
paddle
.
to_tensor
(
waveform
).
unsqueeze
(
0
)
waveform_tensor_torch
=
torch
.
from_numpy
(
waveform
).
unsqueeze
(
0
)
# Feature conf
mel_conf
=
{
'sr'
:
sr
,
'n_fft'
:
512
,
'hop_length'
:
128
,
'n_mels'
:
40
,
}
mel_conf_torchaudio
=
{
'sample_rate'
:
sr
,
'n_fft'
:
512
,
'hop_length'
:
128
,
'n_mels'
:
40
,
'norm'
:
'slaney'
,
'mel_scale'
:
'slaney'
,
}
def
enable_cpu_device
():
paddle
.
set_device
(
'cpu'
)
def
enable_gpu_device
():
paddle
.
set_device
(
'gpu'
)
mel_extractor
=
paddleaudio
.
features
.
MelSpectrogram
(
**
mel_conf
,
f_min
=
0.0
,
dtype
=
waveform_tensor
.
dtype
)
def
melspectrogram
():
return
mel_extractor
(
waveform_tensor
).
squeeze
(
0
)
def
test_melspect_cpu
(
benchmark
):
enable_cpu_device
()
feature_paddleaudio
=
benchmark
(
melspectrogram
)
feature_librosa
=
librosa
.
feature
.
melspectrogram
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddleaudio
,
decimal
=
3
)
def
test_melspect_gpu
(
benchmark
):
enable_gpu_device
()
feature_paddleaudio
=
benchmark
(
melspectrogram
)
feature_librosa
=
librosa
.
feature
.
melspectrogram
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddleaudio
,
decimal
=
3
)
mel_extractor_torchaudio
=
torchaudio
.
transforms
.
MelSpectrogram
(
**
mel_conf_torchaudio
,
f_min
=
0.0
)
def
melspectrogram_torchaudio
():
return
mel_extractor_torchaudio
(
waveform_tensor_torch
).
squeeze
(
0
)
def
test_melspect_cpu_torchaudio
(
benchmark
):
global
waveform_tensor_torch
,
mel_extractor_torchaudio
mel_extractor_torchaudio
=
mel_extractor_torchaudio
.
to
(
'cpu'
)
waveform_tensor_torch
=
waveform_tensor_torch
.
to
(
'cpu'
)
feature_paddleaudio
=
benchmark
(
melspectrogram_torchaudio
)
feature_librosa
=
librosa
.
feature
.
melspectrogram
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddleaudio
,
decimal
=
3
)
def
test_melspect_gpu_torchaudio
(
benchmark
):
global
waveform_tensor_torch
,
mel_extractor_torchaudio
mel_extractor_torchaudio
=
mel_extractor_torchaudio
.
to
(
'cuda'
)
waveform_tensor_torch
=
waveform_tensor_torch
.
to
(
'cuda'
)
feature_torchaudio
=
benchmark
(
melspectrogram_torchaudio
)
feature_librosa
=
librosa
.
feature
.
melspectrogram
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_torchaudio
.
cpu
(),
decimal
=
3
)
paddleaudio/tests/benchmark/mfcc.py
0 → 100644
浏览文件 @
f8301e6f
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
urllib.request
import
librosa
import
numpy
as
np
import
paddle
import
torch
import
torchaudio
import
paddleaudio
wav_url
=
'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
if
not
os
.
path
.
isfile
(
os
.
path
.
basename
(
wav_url
)):
urllib
.
request
.
urlretrieve
(
wav_url
,
os
.
path
.
basename
(
wav_url
))
waveform
,
sr
=
paddleaudio
.
load
(
os
.
path
.
abspath
(
os
.
path
.
basename
(
wav_url
)))
waveform_tensor
=
paddle
.
to_tensor
(
waveform
).
unsqueeze
(
0
)
waveform_tensor_torch
=
torch
.
from_numpy
(
waveform
).
unsqueeze
(
0
)
# Feature conf
mel_conf
=
{
'sr'
:
sr
,
'n_fft'
:
512
,
'hop_length'
:
128
,
'n_mels'
:
40
,
}
mfcc_conf
=
{
'n_mfcc'
:
20
,
'top_db'
:
80.0
,
}
mfcc_conf
.
update
(
mel_conf
)
mel_conf_torchaudio
=
{
'sample_rate'
:
sr
,
'n_fft'
:
512
,
'hop_length'
:
128
,
'n_mels'
:
40
,
'norm'
:
'slaney'
,
'mel_scale'
:
'slaney'
,
}
mfcc_conf_torchaudio
=
{
'sample_rate'
:
sr
,
'n_mfcc'
:
20
,
}
def
enable_cpu_device
():
paddle
.
set_device
(
'cpu'
)
def
enable_gpu_device
():
paddle
.
set_device
(
'gpu'
)
mfcc_extractor
=
paddleaudio
.
features
.
MFCC
(
**
mfcc_conf
,
f_min
=
0.0
,
dtype
=
waveform_tensor
.
dtype
)
def
mfcc
():
return
mfcc_extractor
(
waveform_tensor
).
squeeze
(
0
)
def
test_mfcc_cpu
(
benchmark
):
enable_cpu_device
()
feature_paddleaudio
=
benchmark
(
mfcc
)
feature_librosa
=
librosa
.
feature
.
mfcc
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddleaudio
,
decimal
=
3
)
def
test_mfcc_gpu
(
benchmark
):
enable_gpu_device
()
feature_paddleaudio
=
benchmark
(
mfcc
)
feature_librosa
=
librosa
.
feature
.
mfcc
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddleaudio
,
decimal
=
3
)
del
mel_conf_torchaudio
[
'sample_rate'
]
mfcc_extractor_torchaudio
=
torchaudio
.
transforms
.
MFCC
(
**
mfcc_conf_torchaudio
,
melkwargs
=
mel_conf_torchaudio
)
def
mfcc_torchaudio
():
return
mfcc_extractor_torchaudio
(
waveform_tensor_torch
).
squeeze
(
0
)
def
test_mfcc_cpu_torchaudio
(
benchmark
):
global
waveform_tensor_torch
,
mfcc_extractor_torchaudio
mel_extractor_torchaudio
=
mfcc_extractor_torchaudio
.
to
(
'cpu'
)
waveform_tensor_torch
=
waveform_tensor_torch
.
to
(
'cpu'
)
feature_paddleaudio
=
benchmark
(
mfcc_torchaudio
)
feature_librosa
=
librosa
.
feature
.
mfcc
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_paddleaudio
,
decimal
=
3
)
def
test_mfcc_gpu_torchaudio
(
benchmark
):
global
waveform_tensor_torch
,
mfcc_extractor_torchaudio
mel_extractor_torchaudio
=
mfcc_extractor_torchaudio
.
to
(
'cuda'
)
waveform_tensor_torch
=
waveform_tensor_torch
.
to
(
'cuda'
)
feature_torchaudio
=
benchmark
(
mfcc_torchaudio
)
feature_librosa
=
librosa
.
feature
.
mfcc
(
waveform
,
**
mel_conf
)
np
.
testing
.
assert_array_almost_equal
(
feature_librosa
,
feature_torchaudio
.
cpu
(),
decimal
=
3
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录