update audio api examples (#46938)

* update audio api examples * fix format * format * fix * test api * fix format * fix static check error * fix doc error * fix ci * fix api error * update api.spec * fix ci * fix typo in window gaussian

update audio api examples (#46938)
* update audio api examples * fix format * format * fix * test api * fix format * fix static check error * fix doc error * fix ci * fix api error * update api.spec * fix ci * fix typo in window gaussian
c7d2e82c · YangZhou · GitHub · 5c0bfc18 · c7d2e82c · c7d2e82c
7 changed file
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -80,6 +80,7 @@ import paddle.onnx  # noqa: F401
 import paddle.reader  # noqa: F401
 import paddle.static  # noqa: F401
 import paddle.vision  # noqa: F401
+import paddle.audio  # noqa: F401
 import paddle.geometric  # noqa: F401

 from .tensor.attribute import is_complex  # noqa: F401

--- a/python/paddle/audio/features/__init__.py
+++ b/python/paddle/audio/features/__init__.py
@@ -11,7 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .layers import LogMelSpectrogram
-from .layers import MelSpectrogram
-from .layers import MFCC
-from .layers import Spectrogram
+from .layers import LogMelSpectrogram  # noqa: F401
+from .layers import MelSpectrogram  # noqa: F401
+from .layers import MFCC  # noqa: F401
+from .layers import Spectrogram  # noqa: F401
+
+__all__ = [  # noqa
+    'LogMelSpectrogram',
+    'MelSpectrogram',
+    'MFCC',
+    'Spectrogram',
+]
--- a/python/paddle/audio/features/layers.py
+++ b/python/paddle/audio/features/layers.py
@@ -24,13 +24,6 @@ from ..functional import create_dct
 from ..functional import power_to_db
 from ..functional.window import get_window

-__all__ = [
-    'Spectrogram',
-    'MelSpectrogram',
-    'LogMelSpectrogram',
-    'MFCC',
-]
-

 class Spectrogram(nn.Layer):
    """Compute spectrogram of given signals, typically audio waveforms.
@@ -45,6 +38,27 @@ class Spectrogram(nn.Layer):
        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'.
        dtype (str, optional): Data type of input and window. Defaults to 'float32'.
+
+    Returns:
+        :ref:`api_paddle_nn_Layer`. An instance of Spectrogram.
+
+
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.audio.features import Spectrogram
+
+            sample_rate = 16000
+            wav_duration = 0.5
+            num_channels = 1
+            num_frames = sample_rate * wav_duration
+            wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
+            waveform = wav_data.tile([num_channels, 1])
+
+            feature_extractor = Spectrogram(n_fft=512, window = 'hann', power = 1.0)
+            feats = feature_extractor(waveform)
    """

    def __init__(self,
@@ -108,6 +122,25 @@ class MelSpectrogram(nn.Layer):
        htk (bool, optional): Use HTK formula in computing fbank matrix. Defaults to False.
        norm (Union[str, float], optional): Type of normalization in computing fbank matrix. Slaney-style is used by default. You can specify norm=1.0/2.0 to use customized p-norm normalization. Defaults to 'slaney'.
        dtype (str, optional): Data type of input and window. Defaults to 'float32'.
+
+    Returns:
+        :ref:`api_paddle_nn_Layer`. An instance of MelSpectrogram.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.audio.features import MelSpectrogram
+
+            sample_rate = 16000
+            wav_duration = 0.5
+            num_channels = 1
+            num_frames = sample_rate * wav_duration
+            wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
+            waveform = wav_data.tile([num_channels, 1])
+
+            feature_extractor = MelSpectrogram(sr=sample_rate, n_fft=512, window = 'hann', power = 1.0)
+            feats = feature_extractor(waveform)
    """

    def __init__(self,
@@ -186,6 +219,25 @@ class LogMelSpectrogram(nn.Layer):
        amin (float, optional): The minimum value of input magnitude. Defaults to 1e-10.
        top_db (Optional[float], optional): The maximum db value of spectrogram. Defaults to None.
        dtype (str, optional): Data type of input and window. Defaults to 'float32'.
+
+    Returns:
+        :ref:`api_paddle_nn_Layer`. An instance of LogMelSpectrogram.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.audio.features import LogMelSpectrogram
+
+            sample_rate = 16000
+            wav_duration = 0.5
+            num_channels = 1
+            num_frames = sample_rate * wav_duration
+            wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
+            waveform = wav_data.tile([num_channels, 1])
+
+            feature_extractor = LogMelSpectrogram(sr=sample_rate, n_fft=512, window = 'hann', power = 1.0)
+            feats = feature_extractor(waveform)
    """

    def __init__(self,
@@ -265,6 +317,25 @@ class MFCC(nn.Layer):
        amin (float, optional): The minimum value of input magnitude. Defaults to 1e-10.
        top_db (Optional[float], optional): The maximum db value of spectrogram. Defaults to None.
        dtype (str, optional): Data type of input and window. Defaults to 'float32'.
+
+    Returns:
+        :ref:`api_paddle_nn_Layer`. An instance of MFCC.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            from paddle.audio.features import MFCC
+
+            sample_rate = 16000
+            wav_duration = 0.5
+            num_channels = 1
+            num_frames = sample_rate * wav_duration
+            wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
+            waveform = wav_data.tile([num_channels, 1])
+
+            feature_extractor = MFCC(sr=sample_rate, n_fft=512, window = 'hann')
+            feats = feature_extractor(waveform)
    """

    def __init__(self,

--- a/python/paddle/audio/functional/__init__.py
+++ b/python/paddle/audio/functional/__init__.py
@@ -11,11 +11,22 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .functional import compute_fbank_matrix
-from .functional import create_dct
-from .functional import fft_frequencies
-from .functional import hz_to_mel
-from .functional import mel_frequencies
-from .functional import mel_to_hz
-from .functional import power_to_db
-from .window import get_window
+from .functional import compute_fbank_matrix  # noqa: F401
+from .functional import create_dct  # noqa: F401
+from .functional import fft_frequencies  # noqa: F401
+from .functional import hz_to_mel  # noqa: F401
+from .functional import mel_frequencies  # noqa: F401
+from .functional import mel_to_hz  # noqa: F401
+from .functional import power_to_db  # noqa: F401
+from .window import get_window  # noqa: F401
+
+__all__ = [  # noqa
+    'compute_fbank_matrix',
+    'create_dct',
+    'fft_frequencies',
+    'hz_to_mel',
+    'mel_frequencies',
+    'mel_to_hz',
+    'power_to_db',
+    'get_window',
+]
--- a/python/paddle/audio/functional/functional.py
+++ b/python/paddle/audio/functional/functional.py
@@ -19,16 +19,6 @@ from typing import Union
 import paddle
 from paddle import Tensor

-__all__ = [
-    'hz_to_mel',
-    'mel_to_hz',
-    'mel_frequencies',
-    'fft_frequencies',
-    'compute_fbank_matrix',
-    'power_to_db',
-    'create_dct',
-]
-

 def hz_to_mel(freq: Union[Tensor, float],
              htk: bool = False) -> Union[Tensor, float]:
@@ -40,6 +30,16 @@ def hz_to_mel(freq: Union[Tensor, float],

    Returns:
        Union[Tensor, float]: Frequency in mels.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            val = 3.0
+            htk_flag = True
+            mel_paddle_tensor = paddle.audio.functional.hz_to_mel(
+                paddle.to_tensor(val), htk_flag)
    """

    if htk:
@@ -83,6 +83,17 @@ def mel_to_hz(mel: Union[float, Tensor],

    Returns:
        Union[float, Tensor]: Frequencies in Hz.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            val = 3.0
+            htk_flag = True
+            mel_paddle_tensor = paddle.audio.functional.mel_to_hz(
+                paddle.to_tensor(val), htk_flag)
+
    """
    if htk:
        return 700.0 * (10.0**(mel / 2595.0) - 1.0)
@@ -121,6 +132,19 @@ def mel_frequencies(n_mels: int = 64,

    Returns:
        Tensor: Tensor of n_mels frequencies in Hz with shape `(n_mels,)`.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            n_mels = 64
+            f_min = 0.5
+            f_max = 10000
+            htk_flag = True
+
+            paddle_mel_freq = paddle.audio.functional.mel_frequencies(
+                n_mels, f_min, f_max, htk_flag, 'float64')
    """
    # 'Center freqs' of mel bands - uniformly spaced between limits
    min_mel = hz_to_mel(f_min, htk=htk)
@@ -140,6 +164,15 @@ def fft_frequencies(sr: int, n_fft: int, dtype: str = 'float32') -> Tensor:

    Returns:
        Tensor: FFT frequencies in Hz with shape `(n_fft//2 + 1,)`.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            sr = 16000
+            n_fft = 128
+            fft_freq = paddle.audio.functional.fft_frequencies(sr, n_fft)
    """
    return paddle.linspace(0, float(sr) / 2, int(1 + n_fft // 2), dtype=dtype)

@@ -166,6 +199,15 @@ def compute_fbank_matrix(sr: int,

    Returns:
        Tensor: Mel transform matrix with shape `(n_mels, n_fft//2 + 1)`.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            n_mfcc = 23
+            n_mels = 51
+            paddle_dct = paddle.audio.functional.create_dct(n_mfcc, n_mels)
    """

    if f_max is None:
@@ -221,6 +263,15 @@ def power_to_db(spect: Tensor,

    Returns:
        Tensor: Power spectrogram in db scale.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            val = 3.0
+            decibel_paddle = paddle.audio.functional.power_to_db(
+                paddle.to_tensor(val))
    """
    if amin <= 0:
        raise Exception("amin must be strictly positive")
@@ -254,6 +305,14 @@ def create_dct(n_mfcc: int,

    Returns:
        Tensor: The DCT matrix with shape `(n_mels, n_mfcc)`.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            n_mfcc = 23
+            n_mels = 257
+            dct = paddle.audio.functional.create_dct(n_mfcc, n_mels)
    """
    n = paddle.arange(n_mels, dtype=dtype)
    k = paddle.arange(n_mfcc, dtype=dtype).unsqueeze(1)

--- a/python/paddle/audio/functional/window.py
+++ b/python/paddle/audio/functional/window.py
@@ -18,10 +18,6 @@ from typing import Union
 import paddle
 from paddle import Tensor

-__all__ = [
-    'get_window',
-]
-

 def _cat(x: List[Tensor], data_type: str) -> Tensor:
    l = [paddle.to_tensor(_, data_type) for _ in x]
@@ -323,6 +319,17 @@ def get_window(window: Union[str, Tuple[str, float]],

    Returns:
        Tensor: The window represented as a tensor.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            n_fft = 512
+            cosine_window = paddle.audio.functional.get_window('cosine', n_fft)
+
+            std = 7
+            gussian_window = paddle.audio.functional.get_window(('gaussian',std), n_fft)
    """
    sym = not fftbins


--- a/python/paddle/tests/test_audio_functions.py
+++ b/python/paddle/tests/test_audio_functions.py
@@ -136,7 +136,7 @@ class TestAudioFuncitons(unittest.TestCase):
                                             decimal=5)

    @parameterize([1, 512])
-    def test_gussian_window_and_exception(self, n_fft: int):
+    def test_gaussian_window_and_exception(self, n_fft: int):
        window_scipy_gaussain = signal.windows.gaussian(n_fft, std=7)
        window_paddle_gaussian = paddle.audio.functional.get_window(
            ('gaussian', 7), n_fft, False)