merge audio

38c55e44 · Yang Zhou · cffe555c · a2e8b76a · 38c55e44 · 38c55e44
9 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -57,7 +57,7 @@ include(openblas)
 # packages
 find_package(Python3 COMPONENTS Interpreter Development)
-find_package(pybind11 CONFIG)
+find_package(pybind11 CONFIG REQUIRED)
 # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O0 -Wall -g")

--- a/cmake/summary.cmake
+++ b/cmake/summary.cmake
@@ -37,5 +37,9 @@ function (onnx_print_configuration_summary)
  message(STATUS "    Python executable     : ${Python_EXECUTABLE}")
  message(STATUS "    Python includes       : ${Python_INCLUDE_DIR}")
  message(STATUS "    Python libraries      : ${Python_LIBRARY}")
+  message(STATUS "  PYBIND11                  : ${pybind11_FOUND}")
+  message(STATUS "    Pybind11 version        : ${pybind11_VERSION}")
+  message(STATUS "    Pybind11 include        : ${pybind11_INCLUDE_DIR}")
+  message(STATUS "    Pybind11 includes       : ${pybind11_INCLUDE_DIRS}")
+  message(STATUS "    Pybind11 libraries      : ${pybind11_LIBRARIES}")
 endfunction()
\ No newline at end of file
--- a/paddlespeech/audio/_internal/module_utils.py
+++ b/paddlespeech/audio/_internal/module_utils.py
@@ -5,6 +5,7 @@ from typing import Optional
 #code is from https://github.com/pytorch/audio/blob/main/torchaudio/_internal/module_utils.py
 def is_module_available(*modules: str) -> bool:
    r"""Returns if a top-level module with :attr:`name` exists *without**
    importing it. This is generally safer than try-catch block around a

--- a/paddlespeech/audio/backends/no_backend.py
+++ b/paddlespeech/audio/backends/no_backend.py
@@ -8,19 +8,23 @@ from paddle import Tensor
 #code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/no_backend.py
 def load(
        filepath: Union[str, Path],
-    out: Optional[Tensor] = None,
+        out: Optional[Tensor]=None,
-    normalization: Union[bool, float, Callable] = True,
+        normalization: Union[bool, float, Callable]=True,
-    channels_first: bool = True,
+        channels_first: bool=True,
-    num_frames: int = 0,
+        num_frames: int=0,
-    offset: int = 0,
+        offset: int=0,
-    filetype: Optional[str] = None,
+        filetype: Optional[str]=None, ) -> Tuple[Tensor, int]:
-) -> Tuple[Tensor, int]:
    raise RuntimeError("No audio I/O backend is available.")
-def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
+def save(filepath: str,
+         src: Tensor,
+         sample_rate: int,
+         precision: int=16,
+         channels_first: bool=True) -> None:
    raise RuntimeError("No audio I/O backend is available.")

--- a/paddlespeech/audio/backends/sox_io_backend.py
+++ b/paddlespeech/audio/backends/sox_io_backend.py
 from pathlib import Path
 from typing import Callable
 from typing import Optional
@@ -44,16 +43,19 @@ _fallback_load_filebj = _fail_load_fileobj
 def load(
        filepath: Union[str, Path],
-    out: Optional[Tensor] = None,
+        out: Optional[Tensor]=None,
-    normalization: Union[bool, float, Callable] = True,
+        normalization: Union[bool, float, Callable]=True,
-    channels_first: bool = True,
+        channels_first: bool=True,
-    num_frames: int = 0,
+        num_frames: int=0,
-    offset: int = 0,
+        offset: int=0,
-    filetype: Optional[str] = None,
+        filetype: Optional[str]=None, ) -> Tuple[Tensor, int]:
-) -> Tuple[Tensor, int]:
    raise RuntimeError("No audio I/O backend is available.")
-def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, channels_first: bool = True) -> None:
+def save(filepath: str, 
+         src: Tensor, 
+         sample_rate: int, 
+         precision: int = 16, 
+         channels_first: bool = True) -> None:
    raise RuntimeError("No audio I/O backend is available.")
 @_mod_utils.requires_sox()

--- a/paddlespeech/audio/backends/utils.py
+++ b/paddlespeech/audio/backends/utils.py
@@ -40,7 +40,8 @@ def set_audio_backend(backend: Optional[str]):
            of the system. If ``None`` is provided the  current backend is unassigned.
    """
    if backend is not None and backend not in list_audio_backends():
-        raise RuntimeError(f'Backend "{backend}" is not one of ' f"available backends: {list_audio_backends()}.")
+        raise RuntimeError(f'Backend "{backend}" is not one of '
+                           f"available backends: {list_audio_backends()}.")
    if backend is None:
        module = no_backend
@@ -76,6 +77,7 @@ def _init_audio_backend():
        warnings.warn("No audio backend is available.")
        set_audio_backend(None)
 def get_audio_backend() -> Optional[str]:
    """Get the name of the current backend

--- a/paddlespeech/audio/kaldi/kaldi.py
+++ b/paddlespeech/audio/kaldi/kaldi.py
@@ -27,7 +27,8 @@ __all__ = [
 @module_utils.requires_kaldi()
-def fbank(wav,
+def fbank(
+        wav,
        samp_freq: int=16000,
        frame_shift_ms: float=10.0,
        frame_length_ms: float=25.0,
@@ -88,6 +89,7 @@ def fbank(wav,
    feat = ComputeFbank(frame_opts, mel_opts, fbank_opts, wav)
    return feat
 @module_utils.requires_kaldi()
 def pitch(wav,
          samp_freq: int=16000,

--- a/paddlespeech/audio/src/CMakeLists.txt
+++ b/paddlespeech/audio/src/CMakeLists.txt
@@ -105,7 +105,7 @@ function(define_extension name sources include_dirs libraries definitions)
  add_library(${name} SHARED ${sources})
  target_compile_definitions(${name} PRIVATE "${definitions}")
  target_include_directories(
-    ${name} PRIVATE ${PROJECT_SOURCE_DIR} ${Python_INCLUDE_DIR} ${include_dirs})
+    ${name} PRIVATE ${PROJECT_SOURCE_DIR} ${Python_INCLUDE_DIR} ${pybind11_INCLUDE_DIR} ${include_dirs})
  target_link_libraries(
    ${name}
    ${libraries}

--- a/paddlespeech/audio/src/pybind/kaldi/feature_common.h
+++ b/paddlespeech/audio/src/pybind/kaldi/feature_common.h
@@ -14,8 +14,8 @@
 #pragma once
-#include <pybind11/numpy.h>
+#include "pybind11/pybind11.h"
-#include <pybind11/pybind11.h>
+#include "pybind11/numpy.h"
 #include "feat/feature-window.h"
 namespace paddleaudio {