未验证 提交 f93e9a58 编写于 作者: Y YangZhou 提交者: GitHub

[Cherry-pick][audio] rm kaiser window in audio get_window function && rm audio...

[Cherry-pick][audio] rm kaiser window in audio get_window function && rm audio utils(#47469) (#47479)

* [audio] rm kaiser window in audio get_window function && rm audio utils (#47469)

* rm kaiser window in audio window function

* rm paddle audio utils which is redundant

* rm kaiser in test_audio_functions.py
Conflicts:
	python/paddle/audio/utils/error.py
	python/paddle/tests/test_audio_functions.py

* format
上级 12b9b03e
...@@ -231,16 +231,6 @@ def _tukey( ...@@ -231,16 +231,6 @@ def _tukey(
return _truncate(w, needs_trunc) return _truncate(w, needs_trunc)
@window_function_register.register()
def _kaiser(
M: int, beta: float, sym: bool = True, dtype: str = 'float64'
) -> Tensor:
"""Compute a Kaiser window.
The Kaiser window is a taper formed by using a Bessel function.
"""
raise NotImplementedError()
@window_function_register.register() @window_function_register.register()
def _gaussian( def _gaussian(
M: int, std: float, sym: bool = True, dtype: str = 'float64' M: int, std: float, sym: bool = True, dtype: str = 'float64'
...@@ -346,7 +336,7 @@ def get_window( ...@@ -346,7 +336,7 @@ def get_window(
"""Return a window of a given length and type. """Return a window of a given length and type.
Args: Args:
window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'general_gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'general_gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'.
win_length (int): Number of samples. win_length (int): Number of samples.
fftbins (bool, optional): If True, create a "periodic" window. Otherwise, create a "symmetric" window, for use in filter design. Defaults to True. fftbins (bool, optional): If True, create a "periodic" window. Otherwise, create a "symmetric" window, for use in filter design. Defaults to True.
dtype (str, optional): The data type of the return window. Defaults to 'float64'. dtype (str, optional): The data type of the return window. Defaults to 'float64'.
...@@ -363,7 +353,7 @@ def get_window( ...@@ -363,7 +353,7 @@ def get_window(
cosine_window = paddle.audio.functional.get_window('cosine', n_fft) cosine_window = paddle.audio.functional.get_window('cosine', n_fft)
std = 7 std = 7
gussian_window = paddle.audio.functional.get_window(('gaussian',std), n_fft) gaussian_window = paddle.audio.functional.get_window(('gaussian',std), n_fft)
""" """
sym = not fftbins sym = not fftbins
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ['ParameterError']
class ParameterError(Exception):
"""Exception class for Parameter checking"""
pass
...@@ -29,12 +29,10 @@ def parameterize(*params): ...@@ -29,12 +29,10 @@ def parameterize(*params):
class TestAudioFuncitons(unittest.TestCase): class TestAudioFuncitons(unittest.TestCase):
def setUp(self): def setUp(self):
self.initParmas() self.initParmas()
def initParmas(self): def initParmas(self):
def get_wav_data(dtype: str, num_channels: int, num_frames: int): def get_wav_data(dtype: str, num_channels: int, num_frames: int):
dtype_ = getattr(paddle, dtype) dtype_ = getattr(paddle, dtype)
base = paddle.linspace(-1.0, 1.0, num_frames, dtype=dtype_) * 0.1 base = paddle.linspace(-1.0, 1.0, num_frames, dtype=dtype_) * 0.1
...@@ -54,49 +52,56 @@ class TestAudioFuncitons(unittest.TestCase): ...@@ -54,49 +52,56 @@ class TestAudioFuncitons(unittest.TestCase):
self.sr = 16000 self.sr = 16000
self.dtype = "float32" self.dtype = "float32"
self.window_size = 1024 self.window_size = 1024
waveform_tensor = get_wav_data(self.dtype, waveform_tensor = get_wav_data(
self.num_channels, self.dtype, self.num_channels, num_frames=self.duration * self.sr
num_frames=self.duration * self.sr) )
self.waveform = waveform_tensor.numpy() self.waveform = waveform_tensor.numpy()
@parameterize([1.0, 3.0, 9.0, 25.0], [True, False]) @parameterize([1.0, 3.0, 9.0, 25.0], [True, False])
def test_audio_function(self, val: float, htk_flag: bool): def test_audio_function(self, val: float, htk_flag: bool):
mel_paddle = paddle.audio.functional.hz_to_mel(val, htk_flag) mel_paddle = paddle.audio.functional.hz_to_mel(val, htk_flag)
mel_paddle_tensor = paddle.audio.functional.hz_to_mel( mel_paddle_tensor = paddle.audio.functional.hz_to_mel(
paddle.to_tensor(val), htk_flag) paddle.to_tensor(val), htk_flag
)
mel_librosa = librosa.hz_to_mel(val, htk_flag) mel_librosa = librosa.hz_to_mel(val, htk_flag)
np.testing.assert_almost_equal(mel_paddle, mel_librosa, decimal=5) np.testing.assert_almost_equal(mel_paddle, mel_librosa, decimal=5)
np.testing.assert_almost_equal(mel_paddle_tensor.numpy(), np.testing.assert_almost_equal(
mel_librosa, mel_paddle_tensor.numpy(), mel_librosa, decimal=4
decimal=4) )
hz_paddle = paddle.audio.functional.mel_to_hz(val, htk_flag) hz_paddle = paddle.audio.functional.mel_to_hz(val, htk_flag)
hz_paddle_tensor = paddle.audio.functional.mel_to_hz( hz_paddle_tensor = paddle.audio.functional.mel_to_hz(
paddle.to_tensor(val), htk_flag) paddle.to_tensor(val), htk_flag
)
hz_librosa = librosa.mel_to_hz(val, htk_flag) hz_librosa = librosa.mel_to_hz(val, htk_flag)
np.testing.assert_almost_equal(hz_paddle, hz_librosa, decimal=4) np.testing.assert_almost_equal(hz_paddle, hz_librosa, decimal=4)
np.testing.assert_almost_equal(hz_paddle_tensor.numpy(), np.testing.assert_almost_equal(
hz_librosa, hz_paddle_tensor.numpy(), hz_librosa, decimal=4
decimal=4) )
decibel_paddle = paddle.audio.functional.power_to_db( decibel_paddle = paddle.audio.functional.power_to_db(
paddle.to_tensor(val)) paddle.to_tensor(val)
)
decibel_librosa = librosa.power_to_db(val) decibel_librosa = librosa.power_to_db(val)
np.testing.assert_almost_equal(decibel_paddle.numpy(), np.testing.assert_almost_equal(
decibel_paddle, decibel_paddle.numpy(), decibel_paddle, decimal=5
decimal=5) )
@parameterize([64, 128, 256], [0.0, 0.5, 1.0], [10000, 11025], @parameterize(
[False, True]) [64, 128, 256], [0.0, 0.5, 1.0], [10000, 11025], [False, True]
def test_audio_function_mel(self, n_mels: int, f_min: float, f_max: float, )
htk_flag: bool): def test_audio_function_mel(
librosa_mel_freq = librosa.mel_frequencies(n_mels, f_min, f_max, self, n_mels: int, f_min: float, f_max: float, htk_flag: bool
htk_flag) ):
librosa_mel_freq = librosa.mel_frequencies(
n_mels, f_min, f_max, htk_flag
)
paddle_mel_freq = paddle.audio.functional.mel_frequencies( paddle_mel_freq = paddle.audio.functional.mel_frequencies(
n_mels, f_min, f_max, htk_flag, 'float64') n_mels, f_min, f_max, htk_flag, 'float64'
np.testing.assert_almost_equal(paddle_mel_freq, )
librosa_mel_freq, np.testing.assert_almost_equal(
decimal=3) paddle_mel_freq, librosa_mel_freq, decimal=3
)
@parameterize([8000, 16000], [64, 128, 256]) @parameterize([8000, 16000], [64, 128, 256])
def test_audio_function_fft(self, sr: int, n_fft: int): def test_audio_function_fft(self, sr: int, n_fft: int):
...@@ -107,63 +112,73 @@ class TestAudioFuncitons(unittest.TestCase): ...@@ -107,63 +112,73 @@ class TestAudioFuncitons(unittest.TestCase):
@parameterize([1.0, 3.0, 9.0]) @parameterize([1.0, 3.0, 9.0])
def test_audio_function_exception(self, spect: float): def test_audio_function_exception(self, spect: float):
try: try:
paddle.audio.functional.power_to_db(paddle.to_tensor([spect]), paddle.audio.functional.power_to_db(
amin=0) paddle.to_tensor([spect]), amin=0
)
except Exception: except Exception:
pass pass
try: try:
paddle.audio.functional.power_to_db(paddle.to_tensor([spect]), paddle.audio.functional.power_to_db(
ref_value=0) paddle.to_tensor([spect]), ref_value=0
)
except Exception: except Exception:
pass pass
try: try:
paddle.audio.functional.power_to_db(paddle.to_tensor([spect]), paddle.audio.functional.power_to_db(
top_db=-1) paddle.to_tensor([spect]), top_db=-1
)
except Exception: except Exception:
pass pass
@parameterize([ @parameterize(
"hamming", "hann", "triang", "bohman", "blackman", "cosine", "tukey", [
"taylor" "hamming",
], [1, 512]) "hann",
"triang",
"bohman",
"blackman",
"cosine",
"tukey",
"taylor",
],
[1, 512],
)
def test_window(self, window_type: str, n_fft: int): def test_window(self, window_type: str, n_fft: int):
window_scipy = signal.get_window(window_type, n_fft) window_scipy = signal.get_window(window_type, n_fft)
window_paddle = paddle.audio.functional.get_window(window_type, n_fft) window_paddle = paddle.audio.functional.get_window(window_type, n_fft)
np.testing.assert_array_almost_equal(window_scipy, np.testing.assert_array_almost_equal(
window_paddle.numpy(), window_scipy, window_paddle.numpy(), decimal=5
decimal=5) )
@parameterize([1, 512]) @parameterize([1, 512])
def test_gaussian_window_and_exception(self, n_fft: int): def test_gaussian_window_and_exception(self, n_fft: int):
window_scipy_gaussain = signal.windows.gaussian(n_fft, std=7) window_scipy_gaussain = signal.windows.gaussian(n_fft, std=7)
window_paddle_gaussian = paddle.audio.functional.get_window( window_paddle_gaussian = paddle.audio.functional.get_window(
('gaussian', 7), n_fft, False) ('gaussian', 7), n_fft, False
np.testing.assert_array_almost_equal(window_scipy_gaussain, )
window_paddle_gaussian.numpy(), np.testing.assert_array_almost_equal(
decimal=5) window_scipy_gaussain, window_paddle_gaussian.numpy(), decimal=5
)
window_scipy_general_gaussain = signal.windows.general_gaussian( window_scipy_general_gaussain = signal.windows.general_gaussian(
n_fft, 1, 7) n_fft, 1, 7
)
window_paddle_general_gaussian = paddle.audio.functional.get_window( window_paddle_general_gaussian = paddle.audio.functional.get_window(
('general_gaussian', 1, 7), n_fft, False) ('general_gaussian', 1, 7), n_fft, False
np.testing.assert_array_almost_equal(window_scipy_gaussain, )
window_paddle_gaussian.numpy(), np.testing.assert_array_almost_equal(
decimal=5) window_scipy_gaussain, window_paddle_gaussian.numpy(), decimal=5
)
window_scipy_exp = signal.windows.exponential(n_fft) window_scipy_exp = signal.windows.exponential(n_fft)
window_paddle_exp = paddle.audio.functional.get_window( window_paddle_exp = paddle.audio.functional.get_window(
('exponential', None, 1), n_fft, False) ('exponential', None, 1), n_fft, False
np.testing.assert_array_almost_equal(window_scipy_exp, )
window_paddle_exp.numpy(), np.testing.assert_array_almost_equal(
decimal=5) window_scipy_exp, window_paddle_exp.numpy(), decimal=5
try: )
window_paddle = paddle.audio.functional.get_window(("kaiser", 1.0),
self.n_fft)
except NotImplementedError:
pass
try: try:
window_paddle = paddle.audio.functional.get_window("hann", -1) window_paddle = paddle.audio.functional.get_window("hann", -1)
except ValueError: except ValueError:
...@@ -171,7 +186,8 @@ class TestAudioFuncitons(unittest.TestCase): ...@@ -171,7 +186,8 @@ class TestAudioFuncitons(unittest.TestCase):
try: try:
window_paddle = paddle.audio.functional.get_window( window_paddle = paddle.audio.functional.get_window(
"fake_window", self.n_fft) "fake_window", self.n_fft
)
except ValueError: except ValueError:
pass pass
...@@ -182,7 +198,6 @@ class TestAudioFuncitons(unittest.TestCase): ...@@ -182,7 +198,6 @@ class TestAudioFuncitons(unittest.TestCase):
@parameterize([5, 13, 23], [257, 513, 1025]) @parameterize([5, 13, 23], [257, 513, 1025])
def test_create_dct(self, n_mfcc: int, n_mels: int): def test_create_dct(self, n_mfcc: int, n_mels: int):
def dct(n_filters, n_input): def dct(n_filters, n_input):
basis = np.empty((n_filters, n_input)) basis = np.empty((n_filters, n_input))
basis[0, :] = 1.0 / np.sqrt(n_input) basis[0, :] = 1.0 / np.sqrt(n_input)
...@@ -196,14 +211,17 @@ class TestAudioFuncitons(unittest.TestCase): ...@@ -196,14 +211,17 @@ class TestAudioFuncitons(unittest.TestCase):
paddle_dct = paddle.audio.functional.create_dct(n_mfcc, n_mels) paddle_dct = paddle.audio.functional.create_dct(n_mfcc, n_mels)
np.testing.assert_array_almost_equal(librosa_dct, paddle_dct, decimal=5) np.testing.assert_array_almost_equal(librosa_dct, paddle_dct, decimal=5)
@parameterize([128, 256, 512], ["hamming", "hann", "triang", "bohman"], @parameterize(
[True, False]) [128, 256, 512], ["hamming", "hann", "triang", "bohman"], [True, False]
def test_stft_and_spect(self, n_fft: int, window_str: str, )
center_flag: bool): def test_stft_and_spect(
self, n_fft: int, window_str: str, center_flag: bool
):
hop_length = int(n_fft / 4) hop_length = int(n_fft / 4)
if len(self.waveform.shape) == 2: # (C, T) if len(self.waveform.shape) == 2: # (C, T)
self.waveform = self.waveform.squeeze( self.waveform = self.waveform.squeeze(
0) # 1D input for librosa.feature.melspectrogram 0
) # 1D input for librosa.feature.melspectrogram
feature_librosa = librosa.core.stft( feature_librosa = librosa.core.stft(
y=self.waveform, y=self.waveform,
n_fft=n_fft, n_fft=n_fft,
...@@ -215,9 +233,9 @@ class TestAudioFuncitons(unittest.TestCase): ...@@ -215,9 +233,9 @@ class TestAudioFuncitons(unittest.TestCase):
pad_mode=self.pad_mode, pad_mode=self.pad_mode,
) )
x = paddle.to_tensor(self.waveform).unsqueeze(0) x = paddle.to_tensor(self.waveform).unsqueeze(0)
window = paddle.audio.functional.get_window(window_str, window = paddle.audio.functional.get_window(
n_fft, window_str, n_fft, dtype=x.dtype
dtype=x.dtype) )
feature_paddle = paddle.signal.stft( feature_paddle = paddle.signal.stft(
x=x, x=x,
n_fft=n_fft, n_fft=n_fft,
...@@ -229,9 +247,9 @@ class TestAudioFuncitons(unittest.TestCase): ...@@ -229,9 +247,9 @@ class TestAudioFuncitons(unittest.TestCase):
normalized=False, normalized=False,
onesided=True, onesided=True,
).squeeze(0) ).squeeze(0)
np.testing.assert_array_almost_equal(feature_librosa, np.testing.assert_array_almost_equal(
feature_paddle, feature_librosa, feature_paddle, decimal=5
decimal=5) )
feature_bg = np.power(np.abs(feature_librosa), 2.0) feature_bg = np.power(np.abs(feature_librosa), 2.0)
feature_extractor = paddle.audio.features.Spectrogram( feature_extractor = paddle.audio.features.Spectrogram(
...@@ -244,16 +262,18 @@ class TestAudioFuncitons(unittest.TestCase): ...@@ -244,16 +262,18 @@ class TestAudioFuncitons(unittest.TestCase):
pad_mode=self.pad_mode, pad_mode=self.pad_mode,
) )
feature_layer = feature_extractor(x).squeeze(0) feature_layer = feature_extractor(x).squeeze(0)
np.testing.assert_array_almost_equal(feature_layer, np.testing.assert_array_almost_equal(
feature_bg, feature_layer, feature_bg, decimal=3
decimal=3) )
@parameterize([128, 256, 512], [64, 82], @parameterize(
["hamming", "hann", "triang", "bohman"]) [128, 256, 512], [64, 82], ["hamming", "hann", "triang", "bohman"]
)
def test_istft(self, n_fft: int, hop_length: int, window_str: str): def test_istft(self, n_fft: int, hop_length: int, window_str: str):
if len(self.waveform.shape) == 2: # (C, T) if len(self.waveform.shape) == 2: # (C, T)
self.waveform = self.waveform.squeeze( self.waveform = self.waveform.squeeze(
0) # 1D input for librosa.feature.melspectrogram 0
) # 1D input for librosa.feature.melspectrogram
# librosa # librosa
# Get stft result from librosa. # Get stft result from librosa.
stft_matrix = librosa.core.stft( stft_matrix = librosa.core.stft(
...@@ -275,10 +295,9 @@ class TestAudioFuncitons(unittest.TestCase): ...@@ -275,10 +295,9 @@ class TestAudioFuncitons(unittest.TestCase):
length=None, length=None,
) )
x = paddle.to_tensor(stft_matrix).unsqueeze(0) x = paddle.to_tensor(stft_matrix).unsqueeze(0)
window = paddle.audio.functional.get_window(window_str, window = paddle.audio.functional.get_window(
n_fft, window_str, n_fft, dtype=paddle.to_tensor(self.waveform).dtype
dtype=paddle.to_tensor( )
self.waveform).dtype)
feature_paddle = paddle.signal.istft( feature_paddle = paddle.signal.istft(
x=x, x=x,
n_fft=n_fft, n_fft=n_fft,
...@@ -292,9 +311,9 @@ class TestAudioFuncitons(unittest.TestCase): ...@@ -292,9 +311,9 @@ class TestAudioFuncitons(unittest.TestCase):
return_complex=False, return_complex=False,
).squeeze(0) ).squeeze(0)
np.testing.assert_array_almost_equal(feature_librosa, np.testing.assert_array_almost_equal(
feature_paddle, feature_librosa, feature_paddle, decimal=5
decimal=5) )
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册