提交 c3e0a8dd 编写于 作者: K KP

Add benchmark.

上级 052d329c
......@@ -17,24 +17,31 @@ platform linux -- Python 3.7.7, pytest-7.0.1, pluggy-1.0.0
benchmark: 3.4.1 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
rootdir: /ssd3/chenxiaojie06/PaddleSpeech/DeepSpeech/paddleaudio
plugins: typeguard-2.12.1, benchmark-3.4.1, anyio-3.5.0
collected 6 items
collected 12 items
features.py ...... [100%]
features.py ............ [100%]
------------------------------------------------------------------------------------------------- benchmark: 6 tests ------------------------------------------------------------------------------------------------
Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_melspect_gpu 632.2041 (1.0) 898.7449 (1.0) 709.3824 (1.0) 109.7022 (6.91) 676.1923 (1.0) 115.2642 (22.19) 1;0 1,409.6768 (1.0) 5 1
test_log_melspect_gpu 912.9159 (1.44) 1,222.0535 (1.36) 931.2489 (1.31) 34.4270 (2.17) 924.9896 (1.37) 5.1949 (1.0) 4;13 1,073.8268 (0.76) 82 1
test_mfcc_gpu 1,244.8374 (1.97) 1,321.3232 (1.47) 1,262.1319 (1.78) 15.8698 (1.0) 1,258.3155 (1.86) 14.1086 (2.72) 17;9 792.3102 (0.56) 91 1
test_melspect_cpu 19,106.5744 (30.22) 46,194.2125 (51.40) 27,458.7850 (38.71) 9,786.1071 (616.65) 23,830.0692 (35.24) 14,344.4724 (>1000.0) 3;0 36.4182 (0.03) 14 1
test_log_melspect_cpu 19,513.7132 (30.87) 20,367.2443 (22.66) 19,765.4018 (27.86) 167.1289 (10.53) 19,750.2729 (29.21) 188.9346 (36.37) 16;1 50.5935 (0.04) 49 1
test_mfcc_cpu 19,881.3528 (31.45) 20,427.2158 (22.73) 20,104.6574 (28.34) 129.5621 (8.16) 20,075.8977 (29.69) 150.9022 (29.05) 12;2 49.7397 (0.04) 48 1
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------------------- benchmark: 12 tests ----------------------------------------------------------------------------------------------------
Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_melspect_gpu_torchaudio 210.7229 (1.0) 338.5879 (1.0) 217.4949 (1.0) 11.3591 (1.02) 214.0319 (1.0) 8.3707 (1.0) 6;5 4,597.8093 (1.0) 186 1
test_log_melspect_gpu_torchaudio 375.4422 (1.78) 1,024.8050 (3.03) 387.3589 (1.78) 18.7080 (1.69) 385.2872 (1.80) 9.4259 (1.13) 31;31 2,581.5853 (0.56) 1420 1
test_mfcc_gpu_torchaudio 422.4107 (2.00) 700.7364 (2.07) 454.9903 (2.09) 47.3926 (4.27) 436.6031 (2.04) 15.4376 (1.84) 159;193 2,197.8493 (0.48) 1078 1
test_melspect_gpu 819.3776 (3.89) 1,161.9311 (3.43) 900.9168 (4.14) 147.0245 (13.26) 830.7453 (3.88) 115.4500 (13.79) 1;1 1,109.9805 (0.24) 5 1
test_log_melspect_gpu 1,197.9323 (5.68) 1,280.0004 (3.78) 1,214.0182 (5.58) 11.0918 (1.0) 1,211.6358 (5.66) 10.0820 (1.20) 84;31 823.7109 (0.18) 533 1
test_mfcc_gpu 1,337.0719 (6.35) 1,601.5675 (4.73) 1,355.4527 (6.23) 26.4458 (2.38) 1,348.6911 (6.30) 13.1410 (1.57) 16;17 737.7609 (0.16) 193 1
test_melspect_cpu_torchaudio 1,374.8817 (6.52) 3,937.5033 (11.63) 1,574.8930 (7.24) 355.4223 (32.04) 1,409.1432 (6.58) 193.7435 (23.15) 36;49 634.9638 (0.14) 291 1
test_log_melspect_cpu_torchaudio 1,390.2634 (6.60) 2,121.2976 (6.27) 1,559.3045 (7.17) 220.3090 (19.86) 1,409.4356 (6.59) 349.1524 (41.71) 106;0 641.3116 (0.14) 445 1
test_mfcc_cpu_torchaudio 1,445.6678 (6.86) 3,801.8432 (11.23) 1,680.8559 (7.73) 395.5443 (35.66) 1,469.8748 (6.87) 305.6149 (36.51) 38;35 594.9350 (0.13) 469 1
test_melspect_cpu 20,620.2641 (97.85) 20,984.0760 (61.98) 20,721.4942 (95.27) 70.2757 (6.34) 20,717.8025 (96.80) 57.8668 (6.91) 6;2 48.2591 (0.01) 30 1
test_log_melspect_cpu 21,025.3932 (99.78) 48,894.0198 (144.41) 23,057.7049 (106.01) 5,440.3207 (490.48) 21,190.5045 (99.01) 190.0699 (22.71) 4;9 43.3695 (0.01) 44 1
test_mfcc_cpu 21,127.2798 (100.26) 45,811.5358 (135.30) 23,176.4022 (106.56) 5,041.0751 (454.49) 21,319.1714 (99.61) 149.0396 (17.80) 5;9 43.1473 (0.01) 44 1
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Legend:
Outliers: 1 Standard Deviation from Mean; 1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile.
OPS: Operations Per Second, computed as 1 / Mean
========================================================================== 6 passed in 20.51s ===========================================================================
========================================================================== 12 passed in 26.81s ==========================================================================
```
......@@ -11,15 +11,28 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import urllib.request
import librosa
import numpy as np
import paddle
import torch
import torchaudio
import paddleaudio
wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
if not os.path.isfile(os.path.basename(wav_url)):
urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url)))
waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)
# Feature conf
mel_conf = {
'sr': 16000,
'sr': sr,
'n_fft': 512,
'hop_length': 128,
'n_mels': 40,
......@@ -30,9 +43,18 @@ mfcc_conf = {
}
mfcc_conf.update(mel_conf)
input_shape = (48000)
waveform = np.random.random(size=input_shape)
waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
mel_conf_torchaudio = {
'sample_rate': sr,
'n_fft': 512,
'hop_length': 128,
'n_mels': 40,
'norm': 'slaney',
'mel_scale': 'slaney',
}
mfcc_conf_torchaudio = {
'sample_rate': sr,
'n_mfcc': 20,
}
def enable_cpu_device():
......@@ -56,7 +78,7 @@ def test_melspect_cpu(benchmark):
feature_paddleaudio = benchmark(melspectrogram)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=4)
feature_librosa, feature_paddleaudio, decimal=3)
def test_melspect_gpu(benchmark):
......@@ -64,11 +86,39 @@ def test_melspect_gpu(benchmark):
feature_paddleaudio = benchmark(melspectrogram)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=4)
feature_librosa, feature_paddleaudio, decimal=3)
mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram(
**mel_conf_torchaudio, f_min=0.0)
def melspectrogram_torchaudio():
return mel_extractor_torchaudio(waveform_tensor_torch).squeeze(0)
def test_melspect_cpu_torchaudio(benchmark):
global waveform_tensor_torch, mel_extractor_torchaudio
mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu')
waveform_tensor_torch = waveform_tensor_torch.to('cpu')
feature_paddleaudio = benchmark(melspectrogram_torchaudio)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=3)
def test_melspect_gpu_torchaudio(benchmark):
global waveform_tensor_torch, mel_extractor_torchaudio
mel_extractor_torchaudio = mel_extractor_torchaudio.to('cuda')
waveform_tensor_torch = waveform_tensor_torch.to('cuda')
feature_torchaudio = benchmark(melspectrogram_torchaudio)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_torchaudio.cpu(), decimal=3)
log_mel_extractor = paddleaudio.features.LogMelSpectrogram(
**mel_conf, f_min=0.0, dtype=waveform_tensor.dtype)
**mel_conf, f_min=0.0, top_db=80.0, dtype=waveform_tensor.dtype)
def log_melspectrogram():
......@@ -79,18 +129,54 @@ def test_log_melspect_cpu(benchmark):
enable_cpu_device()
feature_paddleaudio = benchmark(log_melspectrogram)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
feature_librosa = librosa.power_to_db(feature_librosa, top_db=None)
feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=4)
feature_librosa, feature_paddleaudio, decimal=3)
def test_log_melspect_gpu(benchmark):
enable_gpu_device()
feature_paddleaudio = benchmark(log_melspectrogram)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
feature_librosa = librosa.power_to_db(feature_librosa, top_db=None)
feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=2)
amplitude_to_DB = torchaudio.transforms.AmplitudeToDB('power', top_db=80.0)
def log_melspectrogram_torchaudio():
mel_specgram = mel_extractor_torchaudio(waveform_tensor_torch)
return amplitude_to_DB(mel_specgram).squeeze(0)
def test_log_melspect_cpu_torchaudio(benchmark):
global waveform_tensor_torch, mel_extractor_torchaudio, amplitude_to_DB
mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu')
waveform_tensor_torch = waveform_tensor_torch.to('cpu')
amplitude_to_DB = amplitude_to_DB.to('cpu')
feature_paddleaudio = benchmark(log_melspectrogram_torchaudio)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=3)
def test_log_melspect_gpu_torchaudio(benchmark):
global waveform_tensor_torch, mel_extractor_torchaudio, amplitude_to_DB
mel_extractor_torchaudio = mel_extractor_torchaudio.to('cuda')
waveform_tensor_torch = waveform_tensor_torch.to('cuda')
amplitude_to_DB = amplitude_to_DB.to('cuda')
feature_torchaudio = benchmark(log_melspectrogram_torchaudio)
feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=4)
feature_librosa, feature_torchaudio.cpu(), decimal=2)
mfcc_extractor = paddleaudio.features.MFCC(
......@@ -106,7 +192,7 @@ def test_mfcc_cpu(benchmark):
feature_paddleaudio = benchmark(mfcc)
feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=4)
feature_librosa, feature_paddleaudio, decimal=3)
def test_mfcc_gpu(benchmark):
......@@ -114,4 +200,37 @@ def test_mfcc_gpu(benchmark):
feature_paddleaudio = benchmark(mfcc)
feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=4)
feature_librosa, feature_paddleaudio, decimal=3)
del mel_conf_torchaudio['sample_rate']
mfcc_extractor_torchaudio = torchaudio.transforms.MFCC(
**mfcc_conf_torchaudio, melkwargs=mel_conf_torchaudio)
def mfcc_torchaudio():
return mfcc_extractor_torchaudio(waveform_tensor_torch).squeeze(0)
def test_mfcc_cpu_torchaudio(benchmark):
global waveform_tensor_torch, mfcc_extractor_torchaudio
mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cpu')
waveform_tensor_torch = waveform_tensor_torch.to('cpu')
feature_paddleaudio = benchmark(mfcc_torchaudio)
feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_paddleaudio, decimal=3)
def test_mfcc_gpu_torchaudio(benchmark):
global waveform_tensor_torch, mfcc_extractor_torchaudio
mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cuda')
waveform_tensor_torch = waveform_tensor_torch.to('cuda')
feature_torchaudio = benchmark(mfcc_torchaudio)
feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
np.testing.assert_array_almost_equal(
feature_librosa, feature_torchaudio.cpu(), decimal=3)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册