From f93e9a584224449957975f87b4a6dcfca79178d9 Mon Sep 17 00:00:00 2001 From: YangZhou <56786796+SmileGoat@users.noreply.github.com> Date: Mon, 31 Oct 2022 15:24:09 +0800 Subject: [PATCH] [Cherry-pick][audio] rm kaiser window in audio get_window function && rm audio utils(#47469) (#47479) * [audio] rm kaiser window in audio get_window function && rm audio utils (#47469) * rm kaiser window in audio window function * rm paddle audio utils which is redundant * rm kaiser in test_audio_functions.py Conflicts: python/paddle/audio/utils/error.py python/paddle/tests/test_audio_functions.py * format --- python/paddle/audio/functional/window.py | 14 +- python/paddle/audio/utils/__init__.py | 13 -- python/paddle/audio/utils/error.py | 20 --- python/paddle/tests/test_audio_functions.py | 189 +++++++++++--------- 4 files changed, 106 insertions(+), 130 deletions(-) delete mode 100644 python/paddle/audio/utils/__init__.py delete mode 100644 python/paddle/audio/utils/error.py diff --git a/python/paddle/audio/functional/window.py b/python/paddle/audio/functional/window.py index 315d5a50a32..472c56b87ac 100644 --- a/python/paddle/audio/functional/window.py +++ b/python/paddle/audio/functional/window.py @@ -231,16 +231,6 @@ def _tukey( return _truncate(w, needs_trunc) -@window_function_register.register() -def _kaiser( - M: int, beta: float, sym: bool = True, dtype: str = 'float64' -) -> Tensor: - """Compute a Kaiser window. - The Kaiser window is a taper formed by using a Bessel function. - """ - raise NotImplementedError() - - @window_function_register.register() def _gaussian( M: int, std: float, sym: bool = True, dtype: str = 'float64' @@ -346,7 +336,7 @@ def get_window( """Return a window of a given length and type. Args: - window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'general_gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. + window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'general_gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. win_length (int): Number of samples. fftbins (bool, optional): If True, create a "periodic" window. Otherwise, create a "symmetric" window, for use in filter design. Defaults to True. dtype (str, optional): The data type of the return window. Defaults to 'float64'. @@ -363,7 +353,7 @@ def get_window( cosine_window = paddle.audio.functional.get_window('cosine', n_fft) std = 7 - gussian_window = paddle.audio.functional.get_window(('gaussian',std), n_fft) + gaussian_window = paddle.audio.functional.get_window(('gaussian',std), n_fft) """ sym = not fftbins diff --git a/python/paddle/audio/utils/__init__.py b/python/paddle/audio/utils/__init__.py deleted file mode 100644 index 55a55c3ed0a..00000000000 --- a/python/paddle/audio/utils/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/python/paddle/audio/utils/error.py b/python/paddle/audio/utils/error.py deleted file mode 100644 index ab239a24970..00000000000 --- a/python/paddle/audio/utils/error.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = ['ParameterError'] - - -class ParameterError(Exception): - """Exception class for Parameter checking""" - pass diff --git a/python/paddle/tests/test_audio_functions.py b/python/paddle/tests/test_audio_functions.py index f2812700502..da1009558ff 100644 --- a/python/paddle/tests/test_audio_functions.py +++ b/python/paddle/tests/test_audio_functions.py @@ -29,12 +29,10 @@ def parameterize(*params): class TestAudioFuncitons(unittest.TestCase): - def setUp(self): self.initParmas() def initParmas(self): - def get_wav_data(dtype: str, num_channels: int, num_frames: int): dtype_ = getattr(paddle, dtype) base = paddle.linspace(-1.0, 1.0, num_frames, dtype=dtype_) * 0.1 @@ -54,49 +52,56 @@ class TestAudioFuncitons(unittest.TestCase): self.sr = 16000 self.dtype = "float32" self.window_size = 1024 - waveform_tensor = get_wav_data(self.dtype, - self.num_channels, - num_frames=self.duration * self.sr) + waveform_tensor = get_wav_data( + self.dtype, self.num_channels, num_frames=self.duration * self.sr + ) self.waveform = waveform_tensor.numpy() @parameterize([1.0, 3.0, 9.0, 25.0], [True, False]) def test_audio_function(self, val: float, htk_flag: bool): mel_paddle = paddle.audio.functional.hz_to_mel(val, htk_flag) mel_paddle_tensor = paddle.audio.functional.hz_to_mel( - paddle.to_tensor(val), htk_flag) + paddle.to_tensor(val), htk_flag + ) mel_librosa = librosa.hz_to_mel(val, htk_flag) np.testing.assert_almost_equal(mel_paddle, mel_librosa, decimal=5) - np.testing.assert_almost_equal(mel_paddle_tensor.numpy(), - mel_librosa, - decimal=4) + np.testing.assert_almost_equal( + mel_paddle_tensor.numpy(), mel_librosa, decimal=4 + ) hz_paddle = paddle.audio.functional.mel_to_hz(val, htk_flag) hz_paddle_tensor = paddle.audio.functional.mel_to_hz( - paddle.to_tensor(val), htk_flag) + paddle.to_tensor(val), htk_flag + ) hz_librosa = librosa.mel_to_hz(val, htk_flag) np.testing.assert_almost_equal(hz_paddle, hz_librosa, decimal=4) - np.testing.assert_almost_equal(hz_paddle_tensor.numpy(), - hz_librosa, - decimal=4) + np.testing.assert_almost_equal( + hz_paddle_tensor.numpy(), hz_librosa, decimal=4 + ) decibel_paddle = paddle.audio.functional.power_to_db( - paddle.to_tensor(val)) + paddle.to_tensor(val) + ) decibel_librosa = librosa.power_to_db(val) - np.testing.assert_almost_equal(decibel_paddle.numpy(), - decibel_paddle, - decimal=5) + np.testing.assert_almost_equal( + decibel_paddle.numpy(), decibel_paddle, decimal=5 + ) - @parameterize([64, 128, 256], [0.0, 0.5, 1.0], [10000, 11025], - [False, True]) - def test_audio_function_mel(self, n_mels: int, f_min: float, f_max: float, - htk_flag: bool): - librosa_mel_freq = librosa.mel_frequencies(n_mels, f_min, f_max, - htk_flag) + @parameterize( + [64, 128, 256], [0.0, 0.5, 1.0], [10000, 11025], [False, True] + ) + def test_audio_function_mel( + self, n_mels: int, f_min: float, f_max: float, htk_flag: bool + ): + librosa_mel_freq = librosa.mel_frequencies( + n_mels, f_min, f_max, htk_flag + ) paddle_mel_freq = paddle.audio.functional.mel_frequencies( - n_mels, f_min, f_max, htk_flag, 'float64') - np.testing.assert_almost_equal(paddle_mel_freq, - librosa_mel_freq, - decimal=3) + n_mels, f_min, f_max, htk_flag, 'float64' + ) + np.testing.assert_almost_equal( + paddle_mel_freq, librosa_mel_freq, decimal=3 + ) @parameterize([8000, 16000], [64, 128, 256]) def test_audio_function_fft(self, sr: int, n_fft: int): @@ -107,63 +112,73 @@ class TestAudioFuncitons(unittest.TestCase): @parameterize([1.0, 3.0, 9.0]) def test_audio_function_exception(self, spect: float): try: - paddle.audio.functional.power_to_db(paddle.to_tensor([spect]), - amin=0) + paddle.audio.functional.power_to_db( + paddle.to_tensor([spect]), amin=0 + ) except Exception: pass try: - paddle.audio.functional.power_to_db(paddle.to_tensor([spect]), - ref_value=0) + paddle.audio.functional.power_to_db( + paddle.to_tensor([spect]), ref_value=0 + ) except Exception: pass try: - paddle.audio.functional.power_to_db(paddle.to_tensor([spect]), - top_db=-1) + paddle.audio.functional.power_to_db( + paddle.to_tensor([spect]), top_db=-1 + ) except Exception: pass - @parameterize([ - "hamming", "hann", "triang", "bohman", "blackman", "cosine", "tukey", - "taylor" - ], [1, 512]) + @parameterize( + [ + "hamming", + "hann", + "triang", + "bohman", + "blackman", + "cosine", + "tukey", + "taylor", + ], + [1, 512], + ) def test_window(self, window_type: str, n_fft: int): window_scipy = signal.get_window(window_type, n_fft) window_paddle = paddle.audio.functional.get_window(window_type, n_fft) - np.testing.assert_array_almost_equal(window_scipy, - window_paddle.numpy(), - decimal=5) + np.testing.assert_array_almost_equal( + window_scipy, window_paddle.numpy(), decimal=5 + ) @parameterize([1, 512]) def test_gaussian_window_and_exception(self, n_fft: int): window_scipy_gaussain = signal.windows.gaussian(n_fft, std=7) window_paddle_gaussian = paddle.audio.functional.get_window( - ('gaussian', 7), n_fft, False) - np.testing.assert_array_almost_equal(window_scipy_gaussain, - window_paddle_gaussian.numpy(), - decimal=5) + ('gaussian', 7), n_fft, False + ) + np.testing.assert_array_almost_equal( + window_scipy_gaussain, window_paddle_gaussian.numpy(), decimal=5 + ) window_scipy_general_gaussain = signal.windows.general_gaussian( - n_fft, 1, 7) + n_fft, 1, 7 + ) window_paddle_general_gaussian = paddle.audio.functional.get_window( - ('general_gaussian', 1, 7), n_fft, False) - np.testing.assert_array_almost_equal(window_scipy_gaussain, - window_paddle_gaussian.numpy(), - decimal=5) + ('general_gaussian', 1, 7), n_fft, False + ) + np.testing.assert_array_almost_equal( + window_scipy_gaussain, window_paddle_gaussian.numpy(), decimal=5 + ) window_scipy_exp = signal.windows.exponential(n_fft) window_paddle_exp = paddle.audio.functional.get_window( - ('exponential', None, 1), n_fft, False) - np.testing.assert_array_almost_equal(window_scipy_exp, - window_paddle_exp.numpy(), - decimal=5) - try: - window_paddle = paddle.audio.functional.get_window(("kaiser", 1.0), - self.n_fft) - except NotImplementedError: - pass - + ('exponential', None, 1), n_fft, False + ) + np.testing.assert_array_almost_equal( + window_scipy_exp, window_paddle_exp.numpy(), decimal=5 + ) try: window_paddle = paddle.audio.functional.get_window("hann", -1) except ValueError: @@ -171,7 +186,8 @@ class TestAudioFuncitons(unittest.TestCase): try: window_paddle = paddle.audio.functional.get_window( - "fake_window", self.n_fft) + "fake_window", self.n_fft + ) except ValueError: pass @@ -182,7 +198,6 @@ class TestAudioFuncitons(unittest.TestCase): @parameterize([5, 13, 23], [257, 513, 1025]) def test_create_dct(self, n_mfcc: int, n_mels: int): - def dct(n_filters, n_input): basis = np.empty((n_filters, n_input)) basis[0, :] = 1.0 / np.sqrt(n_input) @@ -196,14 +211,17 @@ class TestAudioFuncitons(unittest.TestCase): paddle_dct = paddle.audio.functional.create_dct(n_mfcc, n_mels) np.testing.assert_array_almost_equal(librosa_dct, paddle_dct, decimal=5) - @parameterize([128, 256, 512], ["hamming", "hann", "triang", "bohman"], - [True, False]) - def test_stft_and_spect(self, n_fft: int, window_str: str, - center_flag: bool): + @parameterize( + [128, 256, 512], ["hamming", "hann", "triang", "bohman"], [True, False] + ) + def test_stft_and_spect( + self, n_fft: int, window_str: str, center_flag: bool + ): hop_length = int(n_fft / 4) if len(self.waveform.shape) == 2: # (C, T) self.waveform = self.waveform.squeeze( - 0) # 1D input for librosa.feature.melspectrogram + 0 + ) # 1D input for librosa.feature.melspectrogram feature_librosa = librosa.core.stft( y=self.waveform, n_fft=n_fft, @@ -215,9 +233,9 @@ class TestAudioFuncitons(unittest.TestCase): pad_mode=self.pad_mode, ) x = paddle.to_tensor(self.waveform).unsqueeze(0) - window = paddle.audio.functional.get_window(window_str, - n_fft, - dtype=x.dtype) + window = paddle.audio.functional.get_window( + window_str, n_fft, dtype=x.dtype + ) feature_paddle = paddle.signal.stft( x=x, n_fft=n_fft, @@ -229,9 +247,9 @@ class TestAudioFuncitons(unittest.TestCase): normalized=False, onesided=True, ).squeeze(0) - np.testing.assert_array_almost_equal(feature_librosa, - feature_paddle, - decimal=5) + np.testing.assert_array_almost_equal( + feature_librosa, feature_paddle, decimal=5 + ) feature_bg = np.power(np.abs(feature_librosa), 2.0) feature_extractor = paddle.audio.features.Spectrogram( @@ -244,16 +262,18 @@ class TestAudioFuncitons(unittest.TestCase): pad_mode=self.pad_mode, ) feature_layer = feature_extractor(x).squeeze(0) - np.testing.assert_array_almost_equal(feature_layer, - feature_bg, - decimal=3) + np.testing.assert_array_almost_equal( + feature_layer, feature_bg, decimal=3 + ) - @parameterize([128, 256, 512], [64, 82], - ["hamming", "hann", "triang", "bohman"]) + @parameterize( + [128, 256, 512], [64, 82], ["hamming", "hann", "triang", "bohman"] + ) def test_istft(self, n_fft: int, hop_length: int, window_str: str): if len(self.waveform.shape) == 2: # (C, T) self.waveform = self.waveform.squeeze( - 0) # 1D input for librosa.feature.melspectrogram + 0 + ) # 1D input for librosa.feature.melspectrogram # librosa # Get stft result from librosa. stft_matrix = librosa.core.stft( @@ -275,10 +295,9 @@ class TestAudioFuncitons(unittest.TestCase): length=None, ) x = paddle.to_tensor(stft_matrix).unsqueeze(0) - window = paddle.audio.functional.get_window(window_str, - n_fft, - dtype=paddle.to_tensor( - self.waveform).dtype) + window = paddle.audio.functional.get_window( + window_str, n_fft, dtype=paddle.to_tensor(self.waveform).dtype + ) feature_paddle = paddle.signal.istft( x=x, n_fft=n_fft, @@ -292,9 +311,9 @@ class TestAudioFuncitons(unittest.TestCase): return_complex=False, ).squeeze(0) - np.testing.assert_array_almost_equal(feature_librosa, - feature_paddle, - decimal=5) + np.testing.assert_array_almost_equal( + feature_librosa, feature_paddle, decimal=5 + ) if __name__ == '__main__': -- GitLab