未验证 提交 42ff9460 编写于 作者: Y YangZhou 提交者: GitHub

[audio] mv paddlespeech/audio to paddleaudio (#2706)

* split paddlespeech/audio to paddleaudio.
* add sox io ,sox effect, kaldi native fbank to paddleaudio.
上级 0cc54bb7
...@@ -16,6 +16,9 @@ ...@@ -16,6 +16,9 @@
build build
*output/ *output/
audio/dist/
audio/fc_patch/
docs/build/ docs/build/
docs/topic/ctc/warp-ctc/ docs/topic/ctc/warp-ctc/
...@@ -42,6 +45,7 @@ tools/python-soundfile/ ...@@ -42,6 +45,7 @@ tools/python-soundfile/
tools/onnx tools/onnx
tools/onnxruntime tools/onnxruntime
tools/Paddle2ONNX tools/Paddle2ONNX
tools/onnx-simplifier/
speechx/fc_patch/ speechx/fc_patch/
......
...@@ -3,8 +3,13 @@ repos: ...@@ -3,8 +3,13 @@ repos:
rev: v0.16.0 rev: v0.16.0
hooks: hooks:
- id: yapf - id: yapf
files: \.py$ name: yapf
exclude: (?=third_party).*(\.py)$ language: python
entry: yapf
args: [-i, -vv]
types: [python]
exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: a11d9314b22d8f8c7556443875b731ef05965464 rev: a11d9314b22d8f8c7556443875b731ef05965464
hooks: hooks:
...@@ -30,7 +35,8 @@ repos: ...@@ -30,7 +35,8 @@ repos:
- --ignore=E501,E228,E226,E261,E266,E128,E402,W503 - --ignore=E501,E228,E226,E261,E266,E128,E402,W503
- --builtins=G,request - --builtins=G,request
- --jobs=1 - --jobs=1
exclude: (?=third_party).*(\.py)$ exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$
- repo : https://github.com/Lucas-C/pre-commit-hooks - repo : https://github.com/Lucas-C/pre-commit-hooks
rev: v1.0.1 rev: v1.0.1
hooks: hooks:
...@@ -42,6 +48,7 @@ repos: ...@@ -42,6 +48,7 @@ repos:
files: \.md$ files: \.md$
- id: remove-tabs - id: remove-tabs
files: \.md$ files: \.md$
- repo: local - repo: local
hooks: hooks:
- id: clang-format - id: clang-format
...@@ -49,23 +56,17 @@ repos: ...@@ -49,23 +56,17 @@ repos:
description: Format files with ClangFormat description: Format files with ClangFormat
entry: bash .pre-commit-hooks/clang-format.hook -i entry: bash .pre-commit-hooks/clang-format.hook -i
language: system language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$ files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$
exclude: (?=speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h|\.py)$ exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h|\.hpp|\.py)$
#- id: copyright_checker
# name: copyright_checker
# entry: python .pre-commit-hooks/copyright-check.hook
# language: system
# files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
# exclude: (?=third_party|pypinyin|speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin).*(\.cpp|\.cc|\.h|\.py)$
- id: cpplint - id: cpplint
name: cpplint name: cpplint
description: Static code analysis of C/C++ files description: Static code analysis of C/C++ files
language: python language: python
files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$ files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$
exclude: (?=speechx/speechx/kaldi|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h|\.py)$ exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h|\.hpp|\.py)$
entry: cpplint --filter=-build,-whitespace,+whitespace/comma,-whitespace/indent entry: cpplint --filter=-build,-whitespace,+whitespace/comma,-whitespace/indent
- repo: https://github.com/asottile/reorder_python_imports - repo: https://github.com/asottile/reorder_python_imports
rev: v2.4.0 rev: v2.4.0
hooks: hooks:
- id: reorder-python-imports - id: reorder-python-imports
exclude: (?=third_party).*(\.py)$ exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h\.hpp|\.py)$
...@@ -23,4 +23,4 @@ python: ...@@ -23,4 +23,4 @@ python:
- requirements: docs/requirements.txt - requirements: docs/requirements.txt
- method: setuptools - method: setuptools
path: . path: .
system_packages: true system_packages: true
\ No newline at end of file
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
# Use compiler ID "AppleClang" instead of "Clang" for XCode.
# Not setting this sometimes makes XCode C compiler gets detected as "Clang",
# even when the C++ one is detected as "AppleClang".
cmake_policy(SET CMP0010 NEW)
cmake_policy(SET CMP0025 NEW)
# Suppress warning flags in default MSVC configuration. It's not
# mandatory that we do this (and we don't if cmake is old), but it's
# nice when it's possible, and it's possible on our Windows configs.
if(NOT CMAKE_VERSION VERSION_LESS 3.15.0)
cmake_policy(SET CMP0092 NEW)
endif()
project(paddleaudio)
# check and set CMAKE_CXX_STANDARD
string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
if(env_cxx_standard GREATER -1)
message(
WARNING "C++ standard version definition detected in environment variable."
"paddleaudio requires -std=c++14. Please remove -std=c++ settings in your environment.")
endif()
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_C_STANDARD 11)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_VERBOSE_MAKEFILE ON)
# Options
option(BUILD_SOX "Build libsox statically" ON)
option(BUILD_MAD "Enable libmad" ON)
option(BUILD_KALDI "Build kaldi statically" ON)
option(BUILD_PADDLEAUDIO_PYTHON_EXTENSION "Build Python extension" ON)
# cmake
set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}/cmake;${PROJECT_SOURCE_DIR}/cmake/external")
if (NOT MSVC)
find_package(GFortranLibs REQUIRED)
include(FortranCInterface)
include(FindGFortranLibs REQUIRED)
endif()
# fc_patch dir
set(FETCHCONTENT_QUIET off)
get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}")
set(FETCHCONTENT_BASE_DIR ${fc_patch})
set(THIRD_PARTY_PATH ${fc_patch})
include(openblas)
set(PYBIND11_PYTHON_VERSION ${PY_VERSION})
include(cmake/pybind.cmake)
include_directories(${PYTHON_INCLUDE_DIR})
# packages
find_package(Python3 COMPONENTS Interpreter Development)
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O0 -Wall -g")
add_subdirectory(paddleaudio)
# Summary
include(cmake/summary.cmake)
onnx_print_configuration_summary()
# PaddleAudio
安装方式: pip install paddleaudio
目前支持的平台:Linux:
## Environment
## Build wheel
Linux test build whl environment:
* docker - `registry.baidubce.com/paddlepaddle/paddle:2.2.2`
* os - Ubuntu 16.04.7 LTS
* gcc/g++/gfortran - 8.2.0
* cmake - 3.18.0 (need install)
* [How to Install Docker](https://docs.docker.com/engine/install/)
* [A Docker Tutorial for Beginners](https://docker-curriculum.com/)
1. First to launch docker container.
```
docker run --privileged --net=host --ipc=host -it --rm -v $PWD:/workspace --name=dev registry.baidubce.com/paddlepaddle/paddle:2.2.2 /bin/bash
```
2. python setup.py bdist_wheel
MAC:test build whl envrioment:
* os
* gcc/g++/gfortran 12.2.0
* cpu Intel Xeon E5 x86_64
Windows:
not support: paddleaudio C++ extension lib (sox io, kaldi native fbank)
python setup.py bdist_wheel
\ No newline at end of file
#.rst:
# FindGFortranLibs
# --------
# https://github.com/Argonne-National-Laboratory/PIPS/blob/master/cmake/Modules/FindGFortranLibs.cmake
# https://enccs.github.io/cmake-workshop/cxx-fortran/
#
# Find gcc Fortran compiler & library paths
#
# The module defines the following variables:
#
# ::
#
#
# GFORTRANLIBS_FOUND - true if system has gfortran
# LIBGFORTRAN_LIBRARIES - path to libgfortran
# LIBQUADMATH_LIBRARIES - path to libquadmath
# GFORTRAN_LIBARIES_DIR - directory containing libgfortran, libquadmath
# GFORTRAN_INCLUDE_DIR - directory containing gfortran/gcc headers
# LIBGOMP_LIBRARIES - path to libgomp
# LIBGOMP_INCLUDE_DIR - directory containing omp.h header
# GFORTRAN_VERSION_STRING - version of gfortran found
#
set(CMAKE_REQUIRED_QUIET ${LIBIOMP_FIND_QUIETLY})
if(NOT CMAKE_REQUIRED_QUIET)
message(STATUS "Looking for gfortran related libraries...")
endif()
enable_language(Fortran)
if(CMAKE_Fortran_COMPILER_ID MATCHES "GNU")
# Basically, call "gfortran -v" to dump compiler info to the string
# GFORTRAN_VERBOSE_STR, which will be used to get necessary paths
message(STATUS "Extracting library and header information by calling 'gfortran -v'...")
execute_process(COMMAND "${CMAKE_Fortran_COMPILER}" "-v" ERROR_VARIABLE
GFORTRAN_VERBOSE_STR RESULT_VARIABLE FLAG)
# For debugging
message(STATUS "'gfortran -v' returned:")
message(STATUS "${GFORTRAN_VERBOSE_STR}")
# Detect gfortran version
string(REGEX MATCH "gcc version [^\t\n ]+" GFORTRAN_VER_STR "${GFORTRAN_VERBOSE_STR}")
string(REGEX REPLACE "gcc version ([^\t\n ]+)" "\\1" GFORTRAN_VERSION_STRING "${GFORTRAN_VER_STR}")
message(STATUS "Detected gfortran version ${GFORTRAN_VERSION_STRING}")
unset(GFORTRAN_VER_STR)
set(MATCH_REGEX "[^\t\n ]+[\t\n ]+")
set(REPLACE_REGEX "([^\t\n ]+)")
# Find architecture for compiler
string(REGEX MATCH "Target: [^\t\n ]+"
GFORTRAN_ARCH_STR "${GFORTRAN_VERBOSE_STR}")
message(STATUS "Architecture string: ${GFORTRAN_ARCH_STR}")
string(REGEX REPLACE "Target: ([^\t\n ]+)" "\\1"
GFORTRAN_ARCH "${GFORTRAN_ARCH_STR}")
message(STATUS "Detected gfortran architecture: ${GFORTRAN_ARCH}")
unset(GFORTRAN_ARCH_STR)
# Find install prefix, if it exists; if not, use default
string(REGEX MATCH "--prefix=[^\t\n ]+[\t\n ]+"
GFORTRAN_PREFIX_STR "${GFORTRAN_VERBOSE_STR}")
if(NOT GFORTRAN_PREFIX_STR)
message(STATUS "Detected default gfortran prefix")
set(GFORTRAN_PREFIX_DIR "/usr/local") # default prefix for gcc install
else()
string(REGEX REPLACE "--prefix=([^\t\n ]+)" "\\1"
GFORTRAN_PREFIX_DIR "${GFORTRAN_PREFIX_STR}")
endif()
message(STATUS "Detected gfortran prefix: ${GFORTRAN_PREFIX_DIR}")
unset(GFORTRAN_PREFIX_STR)
# Find install exec-prefix, if it exists; if not, use default
string(REGEX MATCH "--exec-prefix=[^\t\n ]+[\t\n ]+" "\\1"
GFORTRAN_EXEC_PREFIX_STR "${GFORTRAN_VERBOSE_STR}")
if(NOT GFORTRAN_EXEC_PREFIX_STR)
message(STATUS "Detected default gfortran exec-prefix")
set(GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_PREFIX_DIR}")
else()
string(REGEX REPLACE "--exec-prefix=([^\t\n ]+)" "\\1"
GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_EXEC_PREFIX_STR}")
endif()
message(STATUS "Detected gfortran exec-prefix: ${GFORTRAN_EXEC_PREFIX_DIR}")
UNSET(GFORTRAN_EXEC_PREFIX_STR)
# Find library directory and include directory, if library directory specified
string(REGEX MATCH "--libdir=[^\t\n ]+"
GFORTRAN_LIB_DIR_STR "${GFORTRAN_VERBOSE_STR}")
if(NOT GFORTRAN_LIB_DIR_STR)
message(STATUS "Found --libdir flag -- not found")
message(STATUS "Using default gfortran library & include directory paths")
string(STRIP ${GFORTRAN_PREFIX_DIR} TMPLIBDIR)
set(GFORTRAN_LIBRARIES_DIR "${TMPLIBDIR}/lib64")
set(GFORTRAN_INCLUDE_DIR "${TMPLIBDIR}/include")
else()
message(STATUS "Found --libdir flag -- yes")
string(REGEX REPLACE "--libdir=([^\t\n ]+)" "\\1"
GFORTRAN_LIBRARIES_DIR "${GFORTRAN_LIB_DIR_STR}")
string(CONCAT GFORTRAN_INCLUDE_DIR "${GFORTRAN_LIBRARIES_DIR}" "/gcc/" "${GFORTRAN_ARCH}" "/" "${GFORTRAN_VERSION_STRING}" "/include")
endif()
message(STATUS "gfortran libraries path: ${GFORTRAN_LIBRARIES_DIR}")
message(STATUS "gfortran include path dir: ${GFORTRAN_INCLUDE_DIR}")
unset(GFORTRAN_LIB_DIR_STR)
# There are lots of other build options for gcc & gfortran. For now, the
# options implemented above should cover a lot of common use cases.
# Clean up be deleting the output string from "gfortran -v"
unset(GFORTRAN_VERBOSE_STR)
# Find paths for libgfortran, libquadmath, libgomp
# libgomp needed for OpenMP support without Clang
find_library(LIBGFORTRAN_LIBRARIES NAMES gfortran libgfortran
HINTS ${GFORTRAN_LIBRARIES_DIR})
find_library(LIBQUADMATH_LIBRARIES NAMES quadmath libquadmath
HINTS ${GFORTRAN_LIBRARIES_DIR})
find_library(LIBGOMP_LIBRARIES NAMES gomp libgomp
HINTS ${GFORTRAN_LIBRARIES_DIR})
# Find OpenMP headers
find_path(LIBGOMP_INCLUDE_DIR NAMES omp.h HINTS ${GFORTRAN_INCLUDE_DIR})
else()
message(STATUS "CMAKE_Fortran_COMPILER_ID does not match 'GNU'!")
endif()
include(FindPackageHandleStandardArgs)
# Required: libgfortran, libquadmath, path for gfortran libraries
# Optional: libgomp, path for OpenMP headers, path for gcc/gfortran headers
find_package_handle_standard_args(GFortranLibs
REQUIRED_VARS LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES GFORTRAN_LIBRARIES_DIR
VERSION_VAR GFORTRAN_VERSION_STRING)
if(GFORTRANLIBS_FOUND)
message(STATUS "Looking for gfortran libraries -- found")
message(STATUS "gfortran version: ${GFORTRAN_VERSION_STRING}")
else()
message(STATUS "Looking for gfortran libraries -- not found")
endif()
mark_as_advanced(LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES
LIBGOMP_LIBRARIES LIBGOMP_INCLUDE_DIR
GFORTRAN_LIBRARIES_DIR GFORTRAN_INCLUDE_DIR)
# FindGFortranLIBS.cmake ends here
message(STATUS LIBGFORTRAN_LIBRARIES= ${LIBGFORTRAN_LIBRARIES})
message(STATUS LIBQUADMATH_LIBRARIES= ${LIBQUADMATH_LIBRARIES})
message(STATUS LIBGOMP_LIBRARIES= ${LIBGOMP_LIBRARIES})
message(STATUS LIBGOMP_INCLUDE_DIR= ${LIBGOMP_INCLUDE_DIR})
message(STATUS GFORTRAN_LIBRARIES_DIR= ${GFORTRAN_LIBRARIES_DIR})
message(STATUS GFORTRAN_INCLUDE_DIR= ${GFORTRAN_INCLUDE_DIR})
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include(ExternalProject)
set(CBLAS_PREFIX_DIR ${THIRD_PARTY_PATH}/openblas)
set(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
set(CBLAS_REPOSITORY https://github.com/xianyi/OpenBLAS.git)
set(CBLAS_TAG v0.3.10)
if(NOT WIN32)
set(CBLAS_LIBRARIES
"${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
CACHE FILEPATH "openblas library." FORCE)
set(CBLAS_INC_DIR
"${CBLAS_INSTALL_DIR}/include"
CACHE PATH "openblas include directory." FORCE)
set(OPENBLAS_CC
"${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable")
if(APPLE)
set(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}")
endif()
set(OPTIONAL_ARGS "")
set(COMMON_ARGS "")
if(APPLE)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$")
set(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64)
endif()
set(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1)
endif()
ExternalProject_Add(
OPENBLAS
URL "https://paddleaudio.bj.bcebos.com/build/OpenBLAS-0.3.10.zip"
GIT_SHALLOW YES
DOWNLOAD_DIR ${CBLAS_PREFIX_DIR}
SOURCE_DIR ${CBLAS_PREFIX_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1
BUILD_COMMAND make -j${NPROC} ${COMMON_ARGS} ${OPTIONAL_ARGS}
INSTALL_COMMAND make install PREFIX=<INSTALL_DIR>
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_BYPRODUCTS ${CBLAS_LIBRARIES})
ExternalProject_Get_Property(OPENBLAS INSTALL_DIR)
set(OpenBLAS_INSTALL_PREFIX ${INSTALL_DIR})
add_library(openblas STATIC IMPORTED)
add_dependencies(openblas OPENBLAS)
set_target_properties(openblas PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES Fortran)
set_target_properties(openblas PROPERTIES IMPORTED_LOCATION ${OpenBLAS_INSTALL_PREFIX}/lib/libopenblas.a)
link_directories(${OpenBLAS_INSTALL_PREFIX}/lib)
include_directories(${OpenBLAS_INSTALL_PREFIX}/include)
set(OPENBLAS_LIBRARIES
${OpenBLAS_INSTALL_PREFIX}/lib/libopenblas.a
)
add_library(libopenblas INTERFACE)
add_dependencies(libopenblas openblas)
target_include_directories(libopenblas INTERFACE ${OpenBLAS_INSTALL_PREFIX}/include/openblas)
target_link_libraries(libopenblas INTERFACE ${OPENBLAS_LIBRARIES})
else()
set(CBLAS_LIBRARIES
"${CBLAS_INSTALL_DIR}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
CACHE FILEPATH "openblas library." FORCE)
set(CBLAS_INC_DIR
"${CBLAS_INSTALL_DIR}/include/openblas"
CACHE PATH "openblas include directory." FORCE)
ExternalProject_Add(
extern_openblas
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY ${CBLAS_REPOSITORY}
GIT_TAG ${CBLAS_TAG}
PREFIX ${CBLAS_PREFIX_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 0
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_C_COMPILER=clang-cl
-DCMAKE_CXX_COMPILER=clang-cl
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_INSTALL_PREFIX=${CBLAS_INSTALL_DIR}
-DCMAKE_BUILD_TYPE=Release #${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_MT=mt
-DUSE_THREAD=OFF
-DBUILD_WITHOUT_LAPACK=NO
-DCMAKE_Fortran_COMPILER=flang
-DNOFORTRAN=0
-DDYNAMIC_ARCH=ON
#${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${CBLAS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release #${THIRD_PARTY_BUILD_TYPE}
# ninja need to know where openblas.lib comes from
BUILD_BYPRODUCTS ${CBLAS_LIBRARIES})
set(OPENBLAS_SHARED_LIB
${CBLAS_INSTALL_DIR}/bin/openblas${CMAKE_SHARED_LIBRARY_SUFFIX})
add_library(openblas INTERFACE)
add_dependencies(openblas extern_openblas)
include_directories(${CBLAS_INC_DIR})
link_libraries(${CBLAS_LIBRARIES})
endif()
#the pybind11 is from:https://github.com/pybind/pybind11
# Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
SET(PYBIND_ZIP "v2.10.0.zip")
SET(LOCAL_PYBIND_ZIP ${FETCHCONTENT_BASE_DIR}/${PYBIND_ZIP})
SET(PYBIND_SRC ${FETCHCONTENT_BASE_DIR}/pybind11)
SET(DOWNLOAD_URL "https://paddleaudio.bj.bcebos.com/build/v2.10.0.zip")
SET(PYBIND_TIMEOUT 600 CACHE STRING "Timeout in seconds when downloading pybind.")
IF(NOT EXISTS ${LOCAL_PYBIND_ZIP})
FILE(DOWNLOAD ${DOWNLOAD_URL}
${LOCAL_PYBIND_ZIP}
TIMEOUT ${PYBIND_TIMEOUT}
STATUS ERR
SHOW_PROGRESS
)
IF(ERR EQUAL 0)
MESSAGE(STATUS "download pybind success")
ELSE()
MESSAGE(FATAL_ERROR "download pybind fail")
ENDIF()
ENDIF()
IF(NOT EXISTS ${PYBIND_SRC})
EXECUTE_PROCESS(
COMMAND ${CMAKE_COMMAND} -E tar xfz ${LOCAL_PYBIND_ZIP}
WORKING_DIRECTORY ${FETCHCONTENT_BASE_DIR}
RESULT_VARIABLE tar_result
)
file(RENAME ${FETCHCONTENT_BASE_DIR}/pybind11-2.10.0 ${PYBIND_SRC})
IF (tar_result MATCHES 0)
MESSAGE(STATUS "unzip pybind success")
ELSE()
MESSAGE(FATAL_ERROR "unzip pybind fail")
ENDIF()
ENDIF()
include_directories(${PYBIND_SRC}/include)
# SPDX-License-Identifier: Apache-2.0
# Prints accumulated ONNX configuration summary
function (onnx_print_configuration_summary)
message(STATUS "")
message(STATUS "******** Summary ********")
message(STATUS " CMake version : ${CMAKE_VERSION}")
message(STATUS " CMake command : ${CMAKE_COMMAND}")
message(STATUS " System : ${CMAKE_SYSTEM_NAME}")
message(STATUS " C++ compiler : ${CMAKE_CXX_COMPILER}")
message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}")
message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}")
message(STATUS " Build type : ${CMAKE_BUILD_TYPE}")
get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR} COMPILE_DEFINITIONS)
message(STATUS " Compile definitions : ${tmp}")
message(STATUS " CMAKE_PREFIX_PATH : ${CMAKE_PREFIX_PATH}")
message(STATUS " CMAKE_INSTALL_PREFIX : ${CMAKE_INSTALL_PREFIX}")
message(STATUS " CMAKE_MODULE_PATH : ${CMAKE_MODULE_PATH}")
message(STATUS "")
message(STATUS " ONNX version : ${ONNX_VERSION}")
message(STATUS " ONNX NAMESPACE : ${ONNX_NAMESPACE}")
message(STATUS " ONNX_USE_LITE_PROTO : ${ONNX_USE_LITE_PROTO}")
message(STATUS " USE_PROTOBUF_SHARED_LIBS : ${ONNX_USE_PROTOBUF_SHARED_LIBS}")
message(STATUS " Protobuf_USE_STATIC_LIBS : ${Protobuf_USE_STATIC_LIBS}")
message(STATUS " ONNX_DISABLE_EXCEPTIONS : ${ONNX_DISABLE_EXCEPTIONS}")
message(STATUS " ONNX_WERROR : ${ONNX_WERROR}")
message(STATUS " ONNX_BUILD_TESTS : ${ONNX_BUILD_TESTS}")
message(STATUS " ONNX_BUILD_BENCHMARKS : ${ONNX_BUILD_BENCHMARKS}")
message(STATUS " ONNXIFI_DUMMY_BACKEND : ${ONNXIFI_DUMMY_BACKEND}")
message(STATUS " ONNXIFI_ENABLE_EXT : ${ONNXIFI_ENABLE_EXT}")
message(STATUS "")
message(STATUS " Protobuf compiler : ${PROTOBUF_PROTOC_EXECUTABLE}")
message(STATUS " Protobuf includes : ${PROTOBUF_INCLUDE_DIRS}")
message(STATUS " Protobuf libraries : ${PROTOBUF_LIBRARIES}")
message(STATUS " BUILD_ONNX_PYTHON : ${BUILD_ONNX_PYTHON}")
message(STATUS " Python version : ${Python_VERSION}")
message(STATUS " Python executable : ${Python_EXECUTABLE}")
message(STATUS " Python includes : ${Python_INCLUDE_DIR}")
message(STATUS " Python libraries : ${Python_LIBRARY}")
message(STATUS " PYBIND11 : ${pybind11_FOUND}")
message(STATUS " Pybind11 version : ${pybind11_VERSION}")
message(STATUS " Pybind11 include : ${pybind11_INCLUDE_DIR}")
message(STATUS " Pybind11 includes : ${pybind11_INCLUDE_DIRS}")
message(STATUS " Pybind11 libraries : ${pybind11_LIBRARIES}")
endfunction()
\ No newline at end of file
add_subdirectory(third_party)
add_subdirectory(src)
if (APPLE)
file(COPY ${GFORTRAN_LIBRARIES_DIR}/libgcc_s.1.1.dylib
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/lib)
endif(APPLE)
if (UNIX AND NOT APPLE)
file(COPY ${GFORTRAN_LIBRARIES_DIR}/libgfortran.so.5
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/lib FOLLOW_SYMLINK_CHAIN)
file(COPY ${GFORTRAN_LIBRARIES_DIR}/libquadmath.so.0
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/lib FOLLOW_SYMLINK_CHAIN)
file(COPY ${GFORTRAN_LIBRARIES_DIR}/libgcc_s.so.1
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/lib FOLLOW_SYMLINK_CHAIN)
endif()
...@@ -11,9 +11,12 @@ ...@@ -11,9 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .soundfile_backend import depth_convert from . import _extension
from .soundfile_backend import load from . import backends
from .soundfile_backend import normalize from . import compliance
from .soundfile_backend import resample from . import datasets
from .soundfile_backend import save from . import features
from .soundfile_backend import to_mono from . import functional
from . import metric
from . import sox_effects
from . import utils
import contextlib
import ctypes
import os
import sys
import types
import warnings
from pathlib import Path
from ._internal import module_utils as _mod_utils # noqa: F401
# Query `hasattr` only once.
_SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys,
'setdlopenflags')
@contextlib.contextmanager
def dl_open_guard():
"""
# https://manpages.debian.org/bullseye/manpages-dev/dlopen.3.en.html
Context manager to set the RTLD_GLOBAL dynamic linker flag while we open a
shared library to load custom operators.
"""
if _SET_GLOBAL_FLAGS:
old_flags = sys.getdlopenflags()
sys.setdlopenflags(old_flags | ctypes.RTLD_GLOBAL)
yield
if _SET_GLOBAL_FLAGS:
sys.setdlopenflags(old_flags)
def resolve_library_path(path: str) -> str:
return os.path.realpath(path)
class _Ops(types.ModuleType):
#__file__ = '_ops.py'
def __init__(self):
super(_Ops, self).__init__('paddleaudio.ops')
self.loaded_libraries = set()
def load_library(self, path):
"""
Loads a shared library from the given path into the current process.
This allows dynamically loading custom operators. For this,
you should compile your operator and
the static registration code into a shared library object, and then
call ``paddleaudio.ops.load_library('path/to/libcustom.so')`` to load the
shared object.
After the library is loaded, it is added to the
``paddleaudio.ops.loaded_libraries`` attribute, a set that may be inspected
for the paths of all libraries loaded using this function.
Args:
path (str): A path to a shared library to load.
"""
path = resolve_library_path(path)
with dl_open_guard():
# https://docs.python.org/3/library/ctypes.html?highlight=ctypes#loading-shared-libraries
# Import the shared library into the process, thus running its
# static (global) initialization code in order to register custom
# operators with the JIT.
ctypes.CDLL(path)
self.loaded_libraries.add(path)
_LIB_DIR = Path(__file__).parent / "lib"
def _get_lib_path(lib: str):
suffix = "pyd" if os.name == "nt" else "so"
path = _LIB_DIR / f"{lib}.{suffix}"
return path
def _load_lib(lib: str) -> bool:
"""Load extension module
Note:
In case `paddleaudio` is deployed with `pex` format, the library file
is not in a standard location.
In this case, we expect that `libpaddlleaudio` is available somewhere
in the search path of dynamic loading mechanism, so that importing
`_paddlleaudio` will have library loader find and load `libpaddlleaudio`.
This is the reason why the function should not raising an error when the library
file is not found.
Returns:
bool:
True if the library file is found AND the library loaded without failure.
False if the library file is not found (like in the case where paddlleaudio
is deployed with pex format, thus the shared library file is
in a non-standard location.).
If the library file is found but there is an issue loading the library,
(such as missing dependency) then this function raises the exception as-is.
Raises:
Exception:
If the library file is found, but there is an issue loading the library file,
(when underlying `ctype.DLL` throws an exception), this function will pass
the exception as-is, instead of catching it and returning bool.
The expected case is `OSError` thrown by `ctype.DLL` when a dynamic dependency
is not found.
This behavior was chosen because the expected failure case is not recoverable.
If a dependency is missing, then users have to install it.
"""
path = _get_lib_path(lib)
if not path.exists():
warnings.warn("lib path is not exists:" + str(path))
return False
ops.load_library(path)
return True
_FFMPEG_INITIALIZED = False
def _init_ffmpeg():
global _FFMPEG_INITIALIZED
if _FFMPEG_INITIALIZED:
return
if not paddleaudio._paddlleaudio.is_ffmpeg_available():
raise RuntimeError(
"paddlleaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling paddlleaudio."
)
try:
_load_lib("libpaddlleaudio_ffmpeg")
except OSError as err:
raise ImportError(
"FFmpeg libraries are not found. Please install FFmpeg.") from err
import paddllespeech.audio._paddlleaudio_ffmpeg # noqa
paddleaudio._paddlleaudio.ffmpeg_init()
if paddleaudio._paddlleaudio.ffmpeg_get_log_level() > 8:
paddleaudio._paddlleaudio.ffmpeg_set_log_level(8)
_FFMPEG_INITIALIZED = True
def _init_extension():
if not _mod_utils.is_module_available("paddleaudio._paddleaudio"):
warnings.warn(
"paddleaudio C++ extension is not available. sox_io, sox_effect, kaldi raw feature is not supported!!!")
return
_load_lib("libpaddleaudio")
# This import is for initializing the methods registered via PyBind11
# This has to happen after the base library is loaded
try:
from paddleaudio import _paddleaudio # noqa
except Exception:
warnings.warn(
"paddleaudio C++ extension is not available. sox_io, sox_effect, kaldi raw feature is not supported!!!")
return
# Because this part is executed as part of `import torchaudio`, we ignore the
# initialization failure.
# If the FFmpeg integration is not properly initialized, then detailed error
# will be raised when client code attempts to import the dedicated feature.
try:
_init_ffmpeg()
except Exception:
pass
ops = _Ops()
_init_extension()
import importlib.util
import platform
import warnings
from functools import wraps
from typing import Optional
#code is from https://github.com/pytorch/audio/blob/main/torchaudio/_internal/module_utils.py with modification.
def is_module_available(*modules: str) -> bool:
r"""Returns if a top-level module with :attr:`name` exists *without**
importing it. This is generally safer than try-catch block around a
`import X`. It avoids third party libraries breaking assumptions of some of
our tests, e.g., setting multiprocessing start method when imported
(see librosa/#747, torchvision/#544).
"""
return all(importlib.util.find_spec(m) is not None for m in modules)
def requires_module(*modules: str):
"""Decorate function to give error message if invoked without required optional modules.
This decorator is to give better error message to users rather
than raising ``NameError: name 'module' is not defined`` at random places.
"""
missing = [m for m in modules if not is_module_available(m)]
if not missing:
# fall through. If all the modules are available, no need to decorate
def decorator(func):
return func
else:
req = f"module: {missing[0]}" if len(
missing) == 1 else f"modules: {missing}"
def decorator(func):
@wraps(func)
def wrapped(*args, **kwargs):
raise RuntimeError(
f"{func.__module__}.{func.__name__} requires {req}")
return wrapped
return decorator
def deprecated(direction: str, version: Optional[str]=None):
"""Decorator to add deprecation message
Args:
direction (str): Migration steps to be given to users.
version (str or int): The version when the object will be removed
"""
def decorator(func):
@wraps(func)
def wrapped(*args, **kwargs):
message = (
f"{func.__module__}.{func.__name__} has been deprecated "
f'and will be removed from {"future" if version is None else version} release. '
f"{direction}")
warnings.warn(message, stacklevel=2)
return func(*args, **kwargs)
return wrapped
return decorator
def is_kaldi_available():
return is_module_available("paddleaudio._paddleaudio")
def requires_kaldi():
if is_kaldi_available():
def decorator(func):
return func
else:
def decorator(func):
@wraps(func)
def wrapped(*args, **kwargs):
raise RuntimeError(
f"{func.__module__}.{func.__name__} requires libpaddleaudio build with kaldi")
return wrapped
return decorator
def _check_soundfile_importable():
if not is_module_available("soundfile"):
return False
try:
import soundfile # noqa: F401
return True
except Exception:
warnings.warn(
"Failed to import soundfile. 'soundfile' backend is not available.")
return False
_is_soundfile_importable = _check_soundfile_importable()
def is_soundfile_available():
return _is_soundfile_importable
def requires_soundfile():
if is_soundfile_available():
def decorator(func):
return func
else:
def decorator(func):
@wraps(func)
def wrapped(*args, **kwargs):
raise RuntimeError(
f"{func.__module__}.{func.__name__} requires soundfile")
return wrapped
return decorator
def is_sox_available():
if platform.system() == "Windows": # not support sox in windows
return False
return is_module_available("paddleaudio._paddleaudio")
def requires_sox():
if is_sox_available():
def decorator(func):
return func
else:
def decorator(func):
@wraps(func)
def wrapped(*args, **kwargs):
raise RuntimeError(
f"{func.__module__}.{func.__name__} requires libpaddleaudio build with sox")
return wrapped
return decorator
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import utils
from .soundfile_backend import depth_convert
from .soundfile_backend import normalize
from .soundfile_backend import resample
from .soundfile_backend import soundfile_load
from .soundfile_backend import soundfile_save
from .soundfile_backend import to_mono
from .utils import get_audio_backend
from .utils import list_audio_backends
from .utils import set_audio_backend
utils._init_audio_backend()
# Token form https://github.com/pytorch/audio/blob/main/torchaudio/backend/common.py with modification.
class AudioInfo:
"""return of info function.
This class is used by :ref:`"sox_io" backend<sox_io_backend>` and
:ref:`"soundfile" backend with the new interface<soundfile_backend>`.
:ivar int sample_rate: Sample rate
:ivar int num_frames: The number of frames
:ivar int num_channels: The number of channels
:ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats,
or when it cannot be accurately inferred.
:ivar str encoding: Audio encoding
The values encoding can take are one of the following:
* ``PCM_S``: Signed integer linear PCM
* ``PCM_U``: Unsigned integer linear PCM
* ``PCM_F``: Floating point linear PCM
* ``FLAC``: Flac, Free Lossless Audio Codec
* ``ULAW``: Mu-law
* ``ALAW``: A-law
* ``MP3`` : MP3, MPEG-1 Audio Layer III
* ``VORBIS``: OGG Vorbis
* ``AMR_WB``: Adaptive Multi-Rate
* ``AMR_NB``: Adaptive Multi-Rate Wideband
* ``OPUS``: Opus
* ``HTK``: Single channel 16-bit PCM
* ``UNKNOWN`` : None of above
"""
def __init__(
self,
sample_rate: int,
num_frames: int,
num_channels: int,
bits_per_sample: int,
encoding: str,
):
self.sample_rate = sample_rate
self.num_frames = num_frames
self.num_channels = num_channels
self.bits_per_sample = bits_per_sample
self.encoding = encoding
def __str__(self):
return (
f"AudioMetaData("
f"sample_rate={self.sample_rate}, "
f"num_frames={self.num_frames}, "
f"num_channels={self.num_channels}, "
f"bits_per_sample={self.bits_per_sample}, "
f"encoding={self.encoding}"
f")"
)
from pathlib import Path
from typing import Callable
from typing import Optional
from typing import Tuple
from typing import Union
from paddle import Tensor
#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/no_backend.py
def load(
filepath: Union[str, Path],
out: Optional[Tensor]=None,
normalization: Union[bool, float, Callable]=True,
channels_first: bool=True,
num_frames: int=0,
offset: int=0,
filetype: Optional[str]=None, ) -> Tuple[Tensor, int]:
raise RuntimeError("No audio I/O backend is available.")
def save(filepath: str,
src: Tensor,
sample_rate: int,
precision: int=16,
channels_first: bool=True) -> None:
raise RuntimeError("No audio I/O backend is available.")
def info(filepath: str) -> None:
raise RuntimeError("No audio I/O backend is available.")
import os
from typing import Optional
from typing import Tuple
import paddle
import paddleaudio
from paddle import Tensor
from paddleaudio._internal import module_utils as _mod_utils
from .common import AudioInfo
#https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py
def _fail_info(filepath: str, format: Optional[str]) -> AudioInfo:
raise RuntimeError("Failed to fetch metadata from {}".format(filepath))
def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioInfo:
raise RuntimeError("Failed to fetch metadata from {}".format(fileobj))
# Note: need to comply TorchScript syntax -- need annotation and no f-string
def _fail_load(
filepath: str,
frame_offset: int=0,
num_frames: int=-1,
normalize: bool=True,
channels_first: bool=True,
format: Optional[str]=None, ) -> Tuple[Tensor, int]:
raise RuntimeError("Failed to load audio from {}".format(filepath))
def _fail_load_fileobj(fileobj, *args, **kwargs):
raise RuntimeError(f"Failed to load audio from {fileobj}")
_fallback_info = _fail_info
_fallback_info_fileobj = _fail_info_fileobj
_fallback_load = _fail_load
_fallback_load_filebj = _fail_load_fileobj
@_mod_utils.requires_sox()
def load(
filepath: str,
frame_offset: int=0,
num_frames: int=-1,
normalize: bool=True,
channels_first: bool=True,
format: Optional[str]=None, ) -> Tuple[Tensor, int]:
if hasattr(filepath, "read"):
ret = paddleaudio._paddleaudio.load_audio_fileobj(
filepath, frame_offset, num_frames, normalize, channels_first,
format)
if ret is not None:
audio_tensor = paddle.to_tensor(ret[0])
return (audio_tensor, ret[1])
return _fallback_load_fileobj(filepath, frame_offset, num_frames,
normalize, channels_first, format)
filepath = os.fspath(filepath)
ret = paddleaudio._paddleaudio.sox_io_load_audio_file(
filepath, frame_offset, num_frames, normalize, channels_first, format)
if ret is not None:
audio_tensor = paddle.to_tensor(ret[0])
return (audio_tensor, ret[1])
return _fallback_load(filepath, frame_offset, num_frames, normalize,
channels_first, format)
@_mod_utils.requires_sox()
def save(
filepath: str,
src: Tensor,
sample_rate: int,
channels_first: bool=True,
compression: Optional[float]=None,
format: Optional[str]=None,
encoding: Optional[str]=None,
bits_per_sample: Optional[int]=None, ):
src_arr = src.numpy()
if hasattr(filepath, "write"):
paddleaudio._paddleaudio.save_audio_fileobj(
filepath, src_arr, sample_rate, channels_first, compression, format,
encoding, bits_per_sample)
return
filepath = os.fspath(filepath)
paddleaudio._paddleaudio.sox_io_save_audio_file(
filepath, src_arr, sample_rate, channels_first, compression, format,
encoding, bits_per_sample)
@_mod_utils.requires_sox()
def info(
filepath: str,
format: Optional[str]=None, ) -> AudioInfo:
if hasattr(filepath, "read"):
sinfo = paddleaudio._paddleaudio.get_info_fileobj(filepath, format)
if sinfo is not None:
return AudioInfo(*sinfo)
return _fallback_info_fileobj(filepath, format)
filepath = os.fspath(filepath)
sinfo = paddleaudio._paddleaudio.get_info_file(filepath, format)
if sinfo is not None:
return AudioInfo(*sinfo)
return _fallback_info(filepath, format)
"""Defines utilities for switching audio backends"""
#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/utils.py
import warnings
from typing import List
from typing import Optional
import paddleaudio
from paddleaudio._internal import module_utils as _mod_utils
from . import no_backend
from . import soundfile_backend
from . import sox_io_backend
__all__ = [
"list_audio_backends",
"get_audio_backend",
"set_audio_backend",
]
def list_audio_backends() -> List[str]:
"""List available backends
Returns:
List[str]: The list of available backends.
"""
backends = []
if _mod_utils.is_module_available("soundfile"):
backends.append("soundfile")
if _mod_utils.is_sox_available():
backends.append("sox_io")
return backends
def set_audio_backend(backend: Optional[str]):
"""Set the backend for I/O operation
Args:
backend (str or None): Name of the backend.
One of ``"sox_io"`` or ``"soundfile"`` based on availability
of the system. If ``None`` is provided the current backend is unassigned.
"""
if backend is not None and backend not in list_audio_backends():
raise RuntimeError(f'Backend "{backend}" is not one of '
f"available backends: {list_audio_backends()}.")
if backend is None:
module = no_backend
elif backend == "sox_io":
module = sox_io_backend
elif backend == "soundfile":
module = soundfile_backend
else:
raise NotImplementedError(f'Unexpected backend "{backend}"')
for func in ["save", "load", "info"]:
setattr(paddleaudio, func, getattr(module, func))
def _init_audio_backend():
backends = list_audio_backends()
if "soundfile" in backends:
set_audio_backend("soundfile")
elif "sox_io" in backends:
set_audio_backend("sox_io")
else:
warnings.warn("No audio backend is available.")
set_audio_backend(None)
def get_audio_backend() -> Optional[str]:
"""Get the name of the current backend
Returns:
Optional[str]: The name of the current backend or ``None`` if no backend is assigned.
"""
if paddleaudio.load == no_backend.load:
return None
if paddleaudio.load == sox_io_backend.load:
return "sox_io"
if paddleaudio.load == soundfile_backend.load:
return "soundfile"
raise ValueError("Unknown backend.")
...@@ -16,7 +16,7 @@ from typing import List ...@@ -16,7 +16,7 @@ from typing import List
import numpy as np import numpy as np
import paddle import paddle
from ..backends import load as load_audio from ..backends.soundfile_backend import soundfile_load as load_audio
from ..compliance.kaldi import fbank as kaldi_fbank from ..compliance.kaldi import fbank as kaldi_fbank
from ..compliance.kaldi import mfcc as kaldi_mfcc from ..compliance.kaldi import mfcc as kaldi_mfcc
from ..compliance.librosa import melspectrogram from ..compliance.librosa import melspectrogram
......
...@@ -16,8 +16,8 @@ import os ...@@ -16,8 +16,8 @@ import os
from typing import List from typing import List
from typing import Tuple from typing import Tuple
from ..utils import DATA_HOME
from ..utils.download import download_and_decompress from ..utils.download import download_and_decompress
from ..utils.env import DATA_HOME
from .dataset import AudioClassificationDataset from .dataset import AudioClassificationDataset
__all__ = ['ESC50'] __all__ = ['ESC50']
......
...@@ -17,8 +17,8 @@ import random ...@@ -17,8 +17,8 @@ import random
from typing import List from typing import List
from typing import Tuple from typing import Tuple
from ..utils import DATA_HOME
from ..utils.download import download_and_decompress from ..utils.download import download_and_decompress
from ..utils.env import DATA_HOME
from .dataset import AudioClassificationDataset from .dataset import AudioClassificationDataset
__all__ = ['GTZAN'] __all__ = ['GTZAN']
......
...@@ -20,8 +20,8 @@ from typing import List ...@@ -20,8 +20,8 @@ from typing import List
from paddle.io import Dataset from paddle.io import Dataset
from tqdm import tqdm from tqdm import tqdm
from ..backends import load as load_audio from ..backends.soundfile_backend import soundfile_load as load_audio
from ..backends import save as save_wav from ..backends.soundfile_backend import soundfile_save as save_wav
from ..utils import DATA_HOME from ..utils import DATA_HOME
from ..utils.download import download_and_decompress from ..utils.download import download_and_decompress
from .dataset import feat_funcs from .dataset import feat_funcs
......
...@@ -17,8 +17,8 @@ import random ...@@ -17,8 +17,8 @@ import random
from typing import List from typing import List
from typing import Tuple from typing import Tuple
from ..utils import DATA_HOME
from ..utils.download import download_and_decompress from ..utils.download import download_and_decompress
from ..utils.env import DATA_HOME
from .dataset import AudioClassificationDataset from .dataset import AudioClassificationDataset
__all__ = ['TESS'] __all__ = ['TESS']
......
...@@ -16,8 +16,8 @@ import os ...@@ -16,8 +16,8 @@ import os
from typing import List from typing import List
from typing import Tuple from typing import Tuple
from ..utils import DATA_HOME
from ..utils.download import download_and_decompress from ..utils.download import download_and_decompress
from ..utils.env import DATA_HOME
from .dataset import AudioClassificationDataset from .dataset import AudioClassificationDataset
__all__ = ['UrbanSound8K'] __all__ = ['UrbanSound8K']
......
...@@ -23,7 +23,7 @@ from paddle.io import Dataset ...@@ -23,7 +23,7 @@ from paddle.io import Dataset
from pathos.multiprocessing import Pool from pathos.multiprocessing import Pool
from tqdm import tqdm from tqdm import tqdm
from ..backends import load as load_audio from ..backends.soundfile_backend import soundfile_load as load_audio
from ..utils import DATA_HOME from ..utils import DATA_HOME
from ..utils import decompress from ..utils import decompress
from ..utils.download import download_and_decompress from ..utils.download import download_and_decompress
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -18,127 +18,156 @@ from typing import Union ...@@ -18,127 +18,156 @@ from typing import Union
import paddle import paddle
from paddle import Tensor from paddle import Tensor
__all__ = [
'get_window',
]
class WindowFunctionRegister(object):
def __init__(self):
self._functions_dict = dict()
def register(self):
def add_subfunction(func):
name = func.__name__
self._functions_dict[name] = func
return func
return add_subfunction
def get(self, name):
return self._functions_dict[name]
window_function_register = WindowFunctionRegister()
@window_function_register.register()
def _cat(x: List[Tensor], data_type: str) -> Tensor: def _cat(x: List[Tensor], data_type: str) -> Tensor:
l = [paddle.to_tensor(_, data_type) for _ in x] l = [paddle.to_tensor(_, data_type) for _ in x]
return paddle.concat(l) return paddle.concat(l)
@window_function_register.register()
def _acosh(x: Union[Tensor, float]) -> Tensor: def _acosh(x: Union[Tensor, float]) -> Tensor:
if isinstance(x, float): if isinstance(x, float):
return math.log(x + math.sqrt(x**2 - 1)) return math.log(x + math.sqrt(x**2 - 1))
return paddle.log(x + paddle.sqrt(paddle.square(x) - 1)) return paddle.log(x + paddle.sqrt(paddle.square(x) - 1))
@window_function_register.register()
def _extend(M: int, sym: bool) -> bool: def _extend(M: int, sym: bool) -> bool:
"""Extend window by 1 sample if needed for DFT-even symmetry. """ """Extend window by 1 sample if needed for DFT-even symmetry."""
if not sym: if not sym:
return M + 1, True return M + 1, True
else: else:
return M, False return M, False
@window_function_register.register()
def _len_guards(M: int) -> bool: def _len_guards(M: int) -> bool:
"""Handle small or incorrect window lengths. """ """Handle small or incorrect window lengths."""
if int(M) != M or M < 0: if int(M) != M or M < 0:
raise ValueError('Window length M must be a non-negative integer') raise ValueError('Window length M must be a non-negative integer')
return M <= 1 return M <= 1
@window_function_register.register()
def _truncate(w: Tensor, needed: bool) -> Tensor: def _truncate(w: Tensor, needed: bool) -> Tensor:
"""Truncate window by 1 sample if needed for DFT-even symmetry. """ """Truncate window by 1 sample if needed for DFT-even symmetry."""
if needed: if needed:
return w[:-1] return w[:-1]
else: else:
return w return w
def _general_gaussian(M: int, p, sig, sym: bool=True, @window_function_register.register()
dtype: str='float64') -> Tensor: def _general_gaussian(
M: int, p, sig, sym: bool = True, dtype: str = 'float64'
) -> Tensor:
"""Compute a window with a generalized Gaussian shape. """Compute a window with a generalized Gaussian shape.
This function is consistent with scipy.signal.windows.general_gaussian(). This function is consistent with scipy.signal.windows.general_gaussian().
""" """
if _len_guards(M): if _len_guards(M):
return paddle.ones((M, ), dtype=dtype) return paddle.ones((M,), dtype=dtype)
M, needs_trunc = _extend(M, sym) M, needs_trunc = _extend(M, sym)
n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0 n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0
w = paddle.exp(-0.5 * paddle.abs(n / sig)**(2 * p)) w = paddle.exp(-0.5 * paddle.abs(n / sig) ** (2 * p))
return _truncate(w, needs_trunc) return _truncate(w, needs_trunc)
def _general_cosine(M: int, a: float, sym: bool=True, @window_function_register.register()
dtype: str='float64') -> Tensor: def _general_cosine(
M: int, a: float, sym: bool = True, dtype: str = 'float64'
) -> Tensor:
"""Compute a generic weighted sum of cosine terms window. """Compute a generic weighted sum of cosine terms window.
This function is consistent with scipy.signal.windows.general_cosine(). This function is consistent with scipy.signal.windows.general_cosine().
""" """
if _len_guards(M): if _len_guards(M):
return paddle.ones((M, ), dtype=dtype) return paddle.ones((M,), dtype=dtype)
M, needs_trunc = _extend(M, sym) M, needs_trunc = _extend(M, sym)
fac = paddle.linspace(-math.pi, math.pi, M, dtype=dtype) fac = paddle.linspace(-math.pi, math.pi, M, dtype=dtype)
w = paddle.zeros((M, ), dtype=dtype) w = paddle.zeros((M,), dtype=dtype)
for k in range(len(a)): for k in range(len(a)):
w += a[k] * paddle.cos(k * fac) w += a[k] * paddle.cos(k * fac)
return _truncate(w, needs_trunc) return _truncate(w, needs_trunc)
def _general_hamming(M: int, alpha: float, sym: bool=True, @window_function_register.register()
dtype: str='float64') -> Tensor: def _general_hamming(
M: int, alpha: float, sym: bool = True, dtype: str = 'float64'
) -> Tensor:
"""Compute a generalized Hamming window. """Compute a generalized Hamming window.
This function is consistent with scipy.signal.windows.general_hamming() This function is consistent with scipy.signal.windows.general_hamming()
""" """
return _general_cosine(M, [alpha, 1. - alpha], sym, dtype=dtype) return _general_cosine(M, [alpha, 1.0 - alpha], sym, dtype=dtype)
def _taylor(M: int, @window_function_register.register()
nbar=4, def _taylor(
sll=30, M: int, nbar=4, sll=30, norm=True, sym: bool = True, dtype: str = 'float64'
norm=True, ) -> Tensor:
sym: bool=True,
dtype: str='float64') -> Tensor:
"""Compute a Taylor window. """Compute a Taylor window.
The Taylor window taper function approximates the Dolph-Chebyshev window's The Taylor window taper function approximates the Dolph-Chebyshev window's
constant sidelobe level for a parameterized number of near-in sidelobes. constant sidelobe level for a parameterized number of near-in sidelobes.
""" """
if _len_guards(M): if _len_guards(M):
return paddle.ones((M, ), dtype=dtype) return paddle.ones((M,), dtype=dtype)
M, needs_trunc = _extend(M, sym) M, needs_trunc = _extend(M, sym)
# Original text uses a negative sidelobe level parameter and then negates # Original text uses a negative sidelobe level parameter and then negates
# it in the calculation of B. To keep consistent with other methods we # it in the calculation of B. To keep consistent with other methods we
# assume the sidelobe level parameter to be positive. # assume the sidelobe level parameter to be positive.
B = 10**(sll / 20) B = 10 ** (sll / 20)
A = _acosh(B) / math.pi A = _acosh(B) / math.pi
s2 = nbar**2 / (A**2 + (nbar - 0.5)**2) s2 = nbar**2 / (A**2 + (nbar - 0.5) ** 2)
ma = paddle.arange(1, nbar, dtype=dtype) ma = paddle.arange(1, nbar, dtype=dtype)
Fm = paddle.empty((nbar - 1, ), dtype=dtype) Fm = paddle.empty((nbar - 1,), dtype=dtype)
signs = paddle.empty_like(ma) signs = paddle.empty_like(ma)
signs[::2] = 1 signs[::2] = 1
signs[1::2] = -1 signs[1::2] = -1
m2 = ma * ma m2 = ma * ma
for mi in range(len(ma)): for mi in range(len(ma)):
numer = signs[mi] * paddle.prod(1 - m2[mi] / s2 / (A**2 + (ma - 0.5)**2 numer = signs[mi] * paddle.prod(
)) 1 - m2[mi] / s2 / (A**2 + (ma - 0.5) ** 2)
)
if mi == 0: if mi == 0:
denom = 2 * paddle.prod(1 - m2[mi] / m2[mi + 1:]) denom = 2 * paddle.prod(1 - m2[mi] / m2[mi + 1 :])
elif mi == len(ma) - 1: elif mi == len(ma) - 1:
denom = 2 * paddle.prod(1 - m2[mi] / m2[:mi]) denom = 2 * paddle.prod(1 - m2[mi] / m2[:mi])
else: else:
denom = 2 * paddle.prod(1 - m2[mi] / m2[:mi]) * paddle.prod(1 - m2[ denom = (
mi] / m2[mi + 1:]) 2
* paddle.prod(1 - m2[mi] / m2[:mi])
* paddle.prod(1 - m2[mi] / m2[mi + 1 :])
)
Fm[mi] = numer / denom Fm[mi] = numer / denom
def W(n): def W(n):
return 1 + 2 * paddle.matmul( return 1 + 2 * paddle.matmul(
Fm.unsqueeze(0), Fm.unsqueeze(0),
paddle.cos(2 * math.pi * ma.unsqueeze(1) * (n - M / 2. + 0.5) / M)) paddle.cos(2 * math.pi * ma.unsqueeze(1) * (n - M / 2.0 + 0.5) / M),
)
w = W(paddle.arange(0, M, dtype=dtype)) w = W(paddle.arange(0, M, dtype=dtype))
...@@ -150,7 +179,8 @@ def _taylor(M: int, ...@@ -150,7 +179,8 @@ def _taylor(M: int,
return _truncate(w, needs_trunc) return _truncate(w, needs_trunc)
def _hamming(M: int, sym: bool=True, dtype: str='float64') -> Tensor: @window_function_register.register()
def _hamming(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
"""Compute a Hamming window. """Compute a Hamming window.
The Hamming window is a taper formed by using a raised cosine with The Hamming window is a taper formed by using a raised cosine with
non-zero endpoints, optimized to minimize the nearest side lobe. non-zero endpoints, optimized to minimize the nearest side lobe.
...@@ -158,7 +188,8 @@ def _hamming(M: int, sym: bool=True, dtype: str='float64') -> Tensor: ...@@ -158,7 +188,8 @@ def _hamming(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
return _general_hamming(M, 0.54, sym, dtype=dtype) return _general_hamming(M, 0.54, sym, dtype=dtype)
def _hann(M: int, sym: bool=True, dtype: str='float64') -> Tensor: @window_function_register.register()
def _hann(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
"""Compute a Hann window. """Compute a Hann window.
The Hann window is a taper formed by using a raised cosine or sine-squared The Hann window is a taper formed by using a raised cosine or sine-squared
with ends that touch zero. with ends that touch zero.
...@@ -166,15 +197,18 @@ def _hann(M: int, sym: bool=True, dtype: str='float64') -> Tensor: ...@@ -166,15 +197,18 @@ def _hann(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
return _general_hamming(M, 0.5, sym, dtype=dtype) return _general_hamming(M, 0.5, sym, dtype=dtype)
def _tukey(M: int, alpha=0.5, sym: bool=True, dtype: str='float64') -> Tensor: @window_function_register.register()
def _tukey(
M: int, alpha=0.5, sym: bool = True, dtype: str = 'float64'
) -> Tensor:
"""Compute a Tukey window. """Compute a Tukey window.
The Tukey window is also known as a tapered cosine window. The Tukey window is also known as a tapered cosine window.
""" """
if _len_guards(M): if _len_guards(M):
return paddle.ones((M, ), dtype=dtype) return paddle.ones((M,), dtype=dtype)
if alpha <= 0: if alpha <= 0:
return paddle.ones((M, ), dtype=dtype) return paddle.ones((M,), dtype=dtype)
elif alpha >= 1.0: elif alpha >= 1.0:
return hann(M, sym=sym) return hann(M, sym=sym)
...@@ -182,53 +216,48 @@ def _tukey(M: int, alpha=0.5, sym: bool=True, dtype: str='float64') -> Tensor: ...@@ -182,53 +216,48 @@ def _tukey(M: int, alpha=0.5, sym: bool=True, dtype: str='float64') -> Tensor:
n = paddle.arange(0, M, dtype=dtype) n = paddle.arange(0, M, dtype=dtype)
width = int(alpha * (M - 1) / 2.0) width = int(alpha * (M - 1) / 2.0)
n1 = n[0:width + 1] n1 = n[0 : width + 1]
n2 = n[width + 1:M - width - 1] n2 = n[width + 1 : M - width - 1]
n3 = n[M - width - 1:] n3 = n[M - width - 1 :]
w1 = 0.5 * (1 + paddle.cos(math.pi * (-1 + 2.0 * n1 / alpha / (M - 1)))) w1 = 0.5 * (1 + paddle.cos(math.pi * (-1 + 2.0 * n1 / alpha / (M - 1))))
w2 = paddle.ones(n2.shape, dtype=dtype) w2 = paddle.ones(n2.shape, dtype=dtype)
w3 = 0.5 * (1 + paddle.cos(math.pi * (-2.0 / alpha + 1 + 2.0 * n3 / alpha / w3 = 0.5 * (
(M - 1)))) 1
+ paddle.cos(math.pi * (-2.0 / alpha + 1 + 2.0 * n3 / alpha / (M - 1)))
)
w = paddle.concat([w1, w2, w3]) w = paddle.concat([w1, w2, w3])
return _truncate(w, needs_trunc) return _truncate(w, needs_trunc)
def _kaiser(M: int, beta: float, sym: bool=True, @window_function_register.register()
dtype: str='float64') -> Tensor: def _gaussian(
"""Compute a Kaiser window. M: int, std: float, sym: bool = True, dtype: str = 'float64'
The Kaiser window is a taper formed by using a Bessel function. ) -> Tensor:
"""
raise NotImplementedError()
def _gaussian(M: int, std: float, sym: bool=True,
dtype: str='float64') -> Tensor:
"""Compute a Gaussian window. """Compute a Gaussian window.
The Gaussian widows has a Gaussian shape defined by the standard deviation(std). The Gaussian widows has a Gaussian shape defined by the standard deviation(std).
""" """
if _len_guards(M): if _len_guards(M):
return paddle.ones((M, ), dtype=dtype) return paddle.ones((M,), dtype=dtype)
M, needs_trunc = _extend(M, sym) M, needs_trunc = _extend(M, sym)
n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0 n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0
sig2 = 2 * std * std sig2 = 2 * std * std
w = paddle.exp(-n**2 / sig2) w = paddle.exp(-(n**2) / sig2)
return _truncate(w, needs_trunc) return _truncate(w, needs_trunc)
def _exponential(M: int, @window_function_register.register()
center=None, def _exponential(
tau=1., M: int, center=None, tau=1.0, sym: bool = True, dtype: str = 'float64'
sym: bool=True, ) -> Tensor:
dtype: str='float64') -> Tensor: """Compute an exponential (or Poisson) window."""
"""Compute an exponential (or Poisson) window. """
if sym and center is not None: if sym and center is not None:
raise ValueError("If sym==True, center must be None.") raise ValueError("If sym==True, center must be None.")
if _len_guards(M): if _len_guards(M):
return paddle.ones((M, ), dtype=dtype) return paddle.ones((M,), dtype=dtype)
M, needs_trunc = _extend(M, sym) M, needs_trunc = _extend(M, sym)
if center is None: if center is None:
...@@ -240,11 +269,11 @@ def _exponential(M: int, ...@@ -240,11 +269,11 @@ def _exponential(M: int,
return _truncate(w, needs_trunc) return _truncate(w, needs_trunc)
def _triang(M: int, sym: bool=True, dtype: str='float64') -> Tensor: @window_function_register.register()
"""Compute a triangular window. def _triang(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
""" """Compute a triangular window."""
if _len_guards(M): if _len_guards(M):
return paddle.ones((M, ), dtype=dtype) return paddle.ones((M,), dtype=dtype)
M, needs_trunc = _extend(M, sym) M, needs_trunc = _extend(M, sym)
n = paddle.arange(1, (M + 1) // 2 + 1, dtype=dtype) n = paddle.arange(1, (M + 1) // 2 + 1, dtype=dtype)
...@@ -258,23 +287,26 @@ def _triang(M: int, sym: bool=True, dtype: str='float64') -> Tensor: ...@@ -258,23 +287,26 @@ def _triang(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
return _truncate(w, needs_trunc) return _truncate(w, needs_trunc)
def _bohman(M: int, sym: bool=True, dtype: str='float64') -> Tensor: @window_function_register.register()
def _bohman(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
"""Compute a Bohman window. """Compute a Bohman window.
The Bohman window is the autocorrelation of a cosine window. The Bohman window is the autocorrelation of a cosine window.
""" """
if _len_guards(M): if _len_guards(M):
return paddle.ones((M, ), dtype=dtype) return paddle.ones((M,), dtype=dtype)
M, needs_trunc = _extend(M, sym) M, needs_trunc = _extend(M, sym)
fac = paddle.abs(paddle.linspace(-1, 1, M, dtype=dtype)[1:-1]) fac = paddle.abs(paddle.linspace(-1, 1, M, dtype=dtype)[1:-1])
w = (1 - fac) * paddle.cos(math.pi * fac) + 1.0 / math.pi * paddle.sin( w = (1 - fac) * paddle.cos(math.pi * fac) + 1.0 / math.pi * paddle.sin(
math.pi * fac) math.pi * fac
)
w = _cat([0, w, 0], dtype) w = _cat([0, w, 0], dtype)
return _truncate(w, needs_trunc) return _truncate(w, needs_trunc)
def _blackman(M: int, sym: bool=True, dtype: str='float64') -> Tensor: @window_function_register.register()
def _blackman(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
"""Compute a Blackman window. """Compute a Blackman window.
The Blackman window is a taper formed by using the first three terms of The Blackman window is a taper formed by using the first three terms of
a summation of cosines. It was designed to have close to the minimal a summation of cosines. It was designed to have close to the minimal
...@@ -284,31 +316,44 @@ def _blackman(M: int, sym: bool=True, dtype: str='float64') -> Tensor: ...@@ -284,31 +316,44 @@ def _blackman(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
return _general_cosine(M, [0.42, 0.50, 0.08], sym, dtype=dtype) return _general_cosine(M, [0.42, 0.50, 0.08], sym, dtype=dtype)
def _cosine(M: int, sym: bool=True, dtype: str='float64') -> Tensor: @window_function_register.register()
"""Compute a window with a simple cosine shape. def _cosine(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
""" """Compute a window with a simple cosine shape."""
if _len_guards(M): if _len_guards(M):
return paddle.ones((M, ), dtype=dtype) return paddle.ones((M,), dtype=dtype)
M, needs_trunc = _extend(M, sym) M, needs_trunc = _extend(M, sym)
w = paddle.sin(math.pi / M * (paddle.arange(0, M, dtype=dtype) + .5)) w = paddle.sin(math.pi / M * (paddle.arange(0, M, dtype=dtype) + 0.5))
return _truncate(w, needs_trunc) return _truncate(w, needs_trunc)
def get_window(window: Union[str, Tuple[str, float]], def get_window(
win_length: int, window: Union[str, Tuple[str, float]],
fftbins: bool=True, win_length: int,
dtype: str='float64') -> Tensor: fftbins: bool = True,
dtype: str = 'float64',
) -> Tensor:
"""Return a window of a given length and type. """Return a window of a given length and type.
Args: Args:
window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'general_gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'.
win_length (int): Number of samples. win_length (int): Number of samples.
fftbins (bool, optional): If True, create a "periodic" window. Otherwise, create a "symmetric" window, for use in filter design. Defaults to True. fftbins (bool, optional): If True, create a "periodic" window. Otherwise, create a "symmetric" window, for use in filter design. Defaults to True.
dtype (str, optional): The data type of the return window. Defaults to 'float64'. dtype (str, optional): The data type of the return window. Defaults to 'float64'.
Returns: Returns:
Tensor: The window represented as a tensor. Tensor: The window represented as a tensor.
Examples:
.. code-block:: python
import paddle
n_fft = 512
cosine_window = paddle.audio.functional.get_window('cosine', n_fft)
std = 7
gaussian_window = paddle.audio.functional.get_window(('gaussian',std), n_fft)
""" """
sym = not fftbins sym = not fftbins
...@@ -319,19 +364,22 @@ def get_window(window: Union[str, Tuple[str, float]], ...@@ -319,19 +364,22 @@ def get_window(window: Union[str, Tuple[str, float]],
args = window[1:] args = window[1:]
elif isinstance(window, str): elif isinstance(window, str):
if window in ['gaussian', 'exponential']: if window in ['gaussian', 'exponential']:
raise ValueError("The '" + window + "' window needs one or " raise ValueError(
"more parameters -- pass a tuple.") "The '" + window + "' window needs one or "
"more parameters -- pass a tuple."
)
else: else:
winstr = window winstr = window
else: else:
raise ValueError("%s as window type is not supported." % raise ValueError(
str(type(window))) "%s as window type is not supported." % str(type(window))
)
try: try:
winfunc = eval('_' + winstr) winfunc = window_function_register.get('_' + winstr)
except KeyError as e: except KeyError as e:
raise ValueError("Unknown window type.") from e raise ValueError("Unknown window type.") from e
params = (win_length, ) + args params = (win_length,) + args
kwargs = {'sym': sym} kwargs = {'sym': sym}
return winfunc(*params, dtype=dtype, **kwargs) return winfunc(*params, dtype=dtype, **kwargs)
...@@ -11,3 +11,5 @@ ...@@ -11,3 +11,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .kaldi import fbank
from .kaldi import pitch
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddleaudio
from paddleaudio._internal import module_utils
__all__ = [
'fbank',
'pitch',
]
@module_utils.requires_kaldi()
def fbank(
wav,
samp_freq: int=16000,
frame_shift_ms: float=10.0,
frame_length_ms: float=25.0,
dither: float=0.0,
preemph_coeff: float=0.97,
remove_dc_offset: bool=True,
window_type: str='povey',
round_to_power_of_two: bool=True,
blackman_coeff: float=0.42,
snip_edges: bool=True,
allow_downsample: bool=False,
allow_upsample: bool=False,
max_feature_vectors: int=-1,
num_bins: int=23,
low_freq: float=20,
high_freq: float=0,
vtln_low: float=100,
vtln_high: float=-500,
debug_mel: bool=False,
htk_mode: bool=False,
use_energy: bool=False, # fbank opts
energy_floor: float=0.0,
raw_energy: bool=True,
htk_compat: bool=False,
use_log_fbank: bool=True,
use_power: bool=True):
frame_opts = paddleaudio._paddleaudio.FrameExtractionOptions()
mel_opts = paddleaudio._paddleaudio.MelBanksOptions()
fbank_opts = paddleaudio._paddleaudio.FbankOptions()
frame_opts.samp_freq = samp_freq
frame_opts.frame_shift_ms = frame_shift_ms
frame_opts.frame_length_ms = frame_length_ms
frame_opts.dither = dither
frame_opts.preemph_coeff = preemph_coeff
frame_opts.remove_dc_offset = remove_dc_offset
frame_opts.window_type = window_type
frame_opts.round_to_power_of_two = round_to_power_of_two
frame_opts.blackman_coeff = blackman_coeff
frame_opts.snip_edges = snip_edges
frame_opts.allow_downsample = allow_downsample
frame_opts.allow_upsample = allow_upsample
frame_opts.max_feature_vectors = max_feature_vectors
mel_opts.num_bins = num_bins
mel_opts.low_freq = low_freq
mel_opts.high_freq = high_freq
mel_opts.vtln_low = vtln_low
mel_opts.vtln_high = vtln_high
mel_opts.debug_mel = debug_mel
mel_opts.htk_mode = htk_mode
fbank_opts.use_energy = use_energy
fbank_opts.energy_floor = energy_floor
fbank_opts.raw_energy = raw_energy
fbank_opts.htk_compat = htk_compat
fbank_opts.use_log_fbank = use_log_fbank
fbank_opts.use_power = use_power
feat = paddleaudio._paddleaudio.ComputeFbank(frame_opts, mel_opts,
fbank_opts, wav)
return feat
@module_utils.requires_kaldi()
def pitch(wav,
samp_freq: int=16000,
frame_shift_ms: float=10.0,
frame_length_ms: float=25.0,
preemph_coeff: float=0.0,
min_f0: int=50,
max_f0: int=400,
soft_min_f0: float=10.0,
penalty_factor: float=0.1,
lowpass_cutoff: int=1000,
resample_freq: int=4000,
delta_pitch: float=0.005,
nccf_ballast: int=7000,
lowpass_filter_width: int=1,
upsample_filter_width: int=5,
max_frames_latency: int=0,
frames_per_chunk: int=0,
simulate_first_pass_online: bool=False,
recompute_frame: int=500,
nccf_ballast_online: bool=False,
snip_edges: bool=True):
pitch_opts = paddleaudio._paddleaudio.PitchExtractionOptions()
pitch_opts.samp_freq = samp_freq
pitch_opts.frame_shift_ms = frame_shift_ms
pitch_opts.frame_length_ms = frame_length_ms
pitch_opts.preemph_coeff = preemph_coeff
pitch_opts.min_f0 = min_f0
pitch_opts.max_f0 = max_f0
pitch_opts.soft_min_f0 = soft_min_f0
pitch_opts.penalty_factor = penalty_factor
pitch_opts.lowpass_cutoff = lowpass_cutoff
pitch_opts.resample_freq = resample_freq
pitch_opts.delta_pitch = delta_pitch
pitch_opts.nccf_ballast = nccf_ballast
pitch_opts.lowpass_filter_width = lowpass_filter_width
pitch_opts.upsample_filter_width = upsample_filter_width
pitch_opts.max_frames_latency = max_frames_latency
pitch_opts.frames_per_chunk = frames_per_chunk
pitch_opts.simulate_first_pass_online = simulate_first_pass_online
pitch_opts.recompute_frame = recompute_frame
pitch_opts.nccf_ballast_online = nccf_ballast_online
pitch_opts.snip_edges = snip_edges
pitch = paddleaudio._paddleaudio.ComputeKaldiPitch(pitch_opts, wav)
return pitch
from paddleaudio._internal import module_utils as _mod_utils
from .sox_effects import apply_effects_file
from .sox_effects import apply_effects_tensor
from .sox_effects import effect_names
from .sox_effects import init_sox_effects
from .sox_effects import shutdown_sox_effects
if _mod_utils.is_sox_available():
import atexit
init_sox_effects()
atexit.register(shutdown_sox_effects)
__all__ = [
"init_sox_effects",
"shutdown_sox_effects",
"effect_names",
"apply_effects_tensor",
"apply_effects_file",
]
import os
from typing import List
from typing import Optional
from typing import Tuple
import paddle
import paddleaudio
from paddleaudio._internal import module_utils as _mod_utils
from paddleaudio.utils.sox_utils import list_effects
#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/sox_effects/sox_effects.py
@_mod_utils.requires_sox()
def init_sox_effects():
"""Initialize resources required to use sox effects.
Note:
You do not need to call this function manually. It is called automatically.
Once initialized, you do not need to call this function again across the multiple uses of
sox effects though it is safe to do so as long as :func:`shutdown_sox_effects` is not called yet.
Once :func:`shutdown_sox_effects` is called, you can no longer use SoX effects and initializing
again will result in error.
"""
paddleaudio._paddleaudio.sox_effects_initialize_sox_effects()
@_mod_utils.requires_sox()
def shutdown_sox_effects():
"""Clean up resources required to use sox effects.
Note:
You do not need to call this function manually. It is called automatically.
It is safe to call this function multiple times.
Once :py:func:`shutdown_sox_effects` is called, you can no longer use SoX effects and
initializing again will result in error.
"""
paddleaudio._paddleaudio.sox_effects_shutdown_sox_effects()
@_mod_utils.requires_sox()
def effect_names() -> List[str]:
"""Gets list of valid sox effect names
Returns:
List[str]: list of available effect names.
Example
>>> paddleaudio.sox_effects.effect_names()
['allpass', 'band', 'bandpass', ... ]
"""
return list(list_effects().keys())
@_mod_utils.requires_sox()
def apply_effects_tensor(
tensor: paddle.Tensor,
sample_rate: int,
effects: List[List[str]],
channels_first: bool=True, ) -> Tuple[paddle.Tensor, int]:
"""Apply sox effects to given Tensor
.. devices:: CPU
Note:
This function only works on CPU Tensors.
This function works in the way very similar to ``sox`` command, however there are slight
differences. For example, ``sox`` command adds certain effects automatically (such as
``rate`` effect after ``speed`` and ``pitch`` and other effects), but this function does
only applies the given effects. (Therefore, to actually apply ``speed`` effect, you also
need to give ``rate`` effect with desired sampling rate.).
Args:
tensor (paddle.Tensor): Input 2D CPU Tensor.
sample_rate (int): Sample rate
effects (List[List[str]]): List of effects.
channels_first (bool, optional): Indicates if the input Tensor's dimension is
`[channels, time]` or `[time, channels]`
Returns:
(Tensor, int): Resulting Tensor and sample rate.
The resulting Tensor has the same ``dtype`` as the input Tensor, and
the same channels order. The shape of the Tensor can be different based on the
effects applied. Sample rate can also be different based on the effects applied.
Example - Basic usage
>>>
>>> # Defines the effects to apply
>>> effects = [
... ['gain', '-n'], # normalises to 0dB
... ['pitch', '5'], # 5 cent pitch shift
... ['rate', '8000'], # resample to 8000 Hz
... ]
>>>
>>> # Generate pseudo wave:
>>> # normalized, channels first, 2ch, sampling rate 16000, 1 second
>>> sample_rate = 16000
>>> waveform = 2 * paddle.rand([2, sample_rate * 1]) - 1
>>> waveform.shape
paddle.Size([2, 16000])
>>> waveform
tensor([[ 0.3138, 0.7620, -0.9019, ..., -0.7495, -0.4935, 0.5442],
[-0.0832, 0.0061, 0.8233, ..., -0.5176, -0.9140, -0.2434]])
>>>
>>> # Apply effects
>>> waveform, sample_rate = apply_effects_tensor(
... wave_form, sample_rate, effects, channels_first=True)
>>>
>>> # Check the result
>>> # The new waveform is sampling rate 8000, 1 second.
>>> # normalization and channel order are preserved
>>> waveform.shape
paddle.Size([2, 8000])
>>> waveform
tensor([[ 0.5054, -0.5518, -0.4800, ..., -0.0076, 0.0096, -0.0110],
[ 0.1331, 0.0436, -0.3783, ..., -0.0035, 0.0012, 0.0008]])
>>> sample_rate
8000
"""
tensor_np = tensor.numpy()
ret = paddleaudio._paddleaudio.sox_effects_apply_effects_tensor(tensor_np, sample_rate,
effects, channels_first)
if ret is not None:
return (paddle.to_tensor(ret[0]), ret[1])
raise RuntimeError("Failed to apply sox effect")
@_mod_utils.requires_sox()
def apply_effects_file(
path: str,
effects: List[List[str]],
normalize: bool=True,
channels_first: bool=True,
format: Optional[str]=None, ) -> Tuple[paddle.Tensor, int]:
"""Apply sox effects to the audio file and load the resulting data as Tensor
Note:
This function works in the way very similar to ``sox`` command, however there are slight
differences. For example, ``sox`` commnad adds certain effects automatically (such as
``rate`` effect after ``speed``, ``pitch`` etc), but this function only applies the given
effects. Therefore, to actually apply ``speed`` effect, you also need to give ``rate``
effect with desired sampling rate, because internally, ``speed`` effects only alter sampling
rate and leave samples untouched.
Args:
path (path-like object or file-like object):
effects (List[List[str]]): List of effects.
normalize (bool, optional):
When ``True``, this function always return ``float32``, and sample values are
normalized to ``[-1.0, 1.0]``.
If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
integer type. This argument has no effect for formats other
than integer WAV type.
channels_first (bool, optional): When True, the returned Tensor has dimension `[channel, time]`.
Otherwise, the returned Tensor's dimension is `[time, channel]`.
format (str or None, optional):
Override the format detection with the given format.
Providing the argument might help when libsox can not infer the format
from header or extension,
Returns:
(Tensor, int): Resulting Tensor and sample rate.
If ``normalize=True``, the resulting Tensor is always ``float32`` type.
If ``normalize=False`` and the input audio file is of integer WAV file, then the
resulting Tensor has corresponding integer type. (Note 24 bit integer type is not supported)
If ``channels_first=True``, the resulting Tensor has dimension `[channel, time]`,
otherwise `[time, channel]`.
Example - Basic usage
>>>
>>> # Defines the effects to apply
>>> effects = [
... ['gain', '-n'], # normalises to 0dB
... ['pitch', '5'], # 5 cent pitch shift
... ['rate', '8000'], # resample to 8000 Hz
... ]
>>>
>>> # Apply effects and load data with channels_first=True
>>> waveform, sample_rate = apply_effects_file("data.wav", effects, channels_first=True)
>>>
>>> # Check the result
>>> waveform.shape
paddle.Size([2, 8000])
>>> waveform
tensor([[ 5.1151e-03, 1.8073e-02, 2.2188e-02, ..., 1.0431e-07,
-1.4761e-07, 1.8114e-07],
[-2.6924e-03, 2.1860e-03, 1.0650e-02, ..., 6.4122e-07,
-5.6159e-07, 4.8103e-07]])
>>> sample_rate
8000
Example - Apply random speed perturbation to dataset
>>>
>>> # Load data from file, apply random speed perturbation
>>> class RandomPerturbationFile(paddle.utils.data.Dataset):
... \"\"\"Given flist, apply random speed perturbation
...
... Suppose all the input files are at least one second long.
... \"\"\"
... def __init__(self, flist: List[str], sample_rate: int):
... super().__init__()
... self.flist = flist
... self.sample_rate = sample_rate
...
... def __getitem__(self, index):
... speed = 0.5 + 1.5 * random.randn()
... effects = [
... ['gain', '-n', '-10'], # apply 10 db attenuation
... ['remix', '-'], # merge all the channels
... ['speed', f'{speed:.5f}'], # duration is now 0.5 ~ 2.0 seconds.
... ['rate', f'{self.sample_rate}'],
... ['pad', '0', '1.5'], # add 1.5 seconds silence at the end
... ['trim', '0', '2'], # get the first 2 seconds
... ]
... waveform, _ = paddleaudio.sox_effects.apply_effects_file(
... self.flist[index], effects)
... return waveform
...
... def __len__(self):
... return len(self.flist)
...
>>> dataset = RandomPerturbationFile(file_list, sample_rate=8000)
>>> loader = paddle.utils.data.DataLoader(dataset, batch_size=32)
>>> for batch in loader:
>>> pass
"""
if hasattr(path, "read"):
ret = paddleaudio._paddleaudio.apply_effects_fileobj(path, effects, normalize,
channels_first, format)
if ret is None:
raise RuntimeError("Failed to load audio from {}".format(path))
return (paddle.to_tensor(ret[0]), ret[1])
path = os.fspath(path)
ret = paddleaudio._paddleaudio.sox_effects_apply_effects_file(path, effects, normalize,
channels_first, format)
if ret is not None:
return (paddle.to_tensor(ret[0]), ret[1])
raise RuntimeError("Failed to load audio from {}".format(path))
if (MSVC)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()
if(APPLE)
set(CMAKE_SHARED_LIBRARY_SUFFIX ".so")
endif(APPLE)
################################################################################
# libpaddleaudio
################################################################################
set(
LIBPADDLEAUDIO_SOURCES
utils.cpp
)
set(
LIBPADDLEAUDIO_INCLUDE_DIRS
${PROJECT_SOURCE_DIR}
)
set(
LIBPADDLEAUDIO_LINK_LIBRARIES
)
set(
LIBPADDLEAUDIO_COMPILE_DEFINITIONS)
#------------------------------------------------------------------------------#
# START OF CUSTOMIZATION LOGICS
#------------------------------------------------------------------------------#
if(BUILD_SOX)
list(
APPEND
LIBPADDLEAUDIO_LINK_LIBRARIES
libsox
)
list(
APPEND
LIBPADDLEAUDIO_SOURCES
)
list(
APPEND
LIBPADDLEAUDIO_COMPILE_DEFINITIONS
INCLUDE_SOX
)
endif()
if(BUILD_KALDI)
list(
APPEND
LIBPADDLEAUDIO_LINK_LIBRARIES
libkaldi
)
list(
APPEND
LIBPADDLEAUDIO_COMPILE_DEFINITIONS
INCLUDE_KALDI
COMPILE_WITHOUT_OPENFST
)
endif()
#------------------------------------------------------------------------------#
# END OF CUSTOMIZATION LOGICS
#------------------------------------------------------------------------------#
function (define_library name source include_dirs link_libraries compile_defs)
add_library(${name} SHARED ${source})
target_include_directories(${name} PRIVATE ${include_dirs})
target_link_libraries(${name} ${link_libraries})
target_compile_definitions(${name} PRIVATE ${compile_defs})
set_target_properties(${name} PROPERTIES PREFIX "")
if (MSVC)
set_target_properties(${name} PROPERTIES SUFFIX ".pyd")
endif(MSVC)
install(
TARGETS ${name}
LIBRARY DESTINATION lib
RUNTIME DESTINATION lib # For Windows
)
endfunction()
define_library(
libpaddleaudio
"${LIBPADDLEAUDIO_SOURCES}"
"${LIBPADDLEAUDIO_INCLUDE_DIRS}"
"${LIBPADDLEAUDIO_LINK_LIBRARIES}"
"${LIBPADDLEAUDIO_COMPILE_DEFINITIONS}"
)
if (APPLE)
add_custom_command(TARGET libpaddleaudio POST_BUILD COMMAND install_name_tool -change "${GFORTRAN_LIBRARIES_DIR}/libgcc_s.1.1.dylib" "@loader_path/libgcc_s.1.1.dylib" libpaddleaudio.so)
endif(APPLE)
if (UNIX AND NOT APPLE)
set_target_properties(libpaddleaudio PROPERTIES INSTALL_RPATH "$ORIGIN")
endif()
if (APPLE)
set(AUDIO_LIBRARY libpaddleaudio CACHE INTERNAL "")
else()
set(AUDIO_LIBRARY -Wl,--no-as-needed libpaddleaudio -Wl,--as-needed CACHE INTERNAL "")
endif()
################################################################################
# _paddleaudio.so
################################################################################
if (BUILD_PADDLEAUDIO_PYTHON_EXTENSION)
if (WIN32)
find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
set(ADDITIONAL_ITEMS Python3::Python)
endif()
function(define_extension name sources include_dirs libraries definitions)
add_library(${name} SHARED ${sources})
target_compile_definitions(${name} PRIVATE "${definitions}")
target_include_directories(
${name} PRIVATE ${PROJECT_SOURCE_DIR} ${Python_INCLUDE_DIR} ${pybind11_INCLUDE_DIR} ${include_dirs})
target_link_libraries(
${name}
${libraries}
${PYTHON_LIBRARY}
${ADDITIONAL_ITEMS}
)
set_target_properties(${name} PROPERTIES PREFIX "")
if (MSVC)
set_target_properties(${name} PROPERTIES SUFFIX ".pyd")
endif(MSVC)
if (APPLE)
# https://github.com/facebookarchive/caffe2/issues/854#issuecomment-364538485
# https://github.com/pytorch/pytorch/commit/73f6715f4725a0723d8171d3131e09ac7abf0666
set_target_properties(${name} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
endif()
install(
TARGETS ${name}
LIBRARY DESTINATION .
RUNTIME DESTINATION . # For Windows
)
endfunction()
set(
EXTENSION_SOURCES
pybind/pybind.cpp
)
#----------------------------------------------------------------------------#
# START OF CUSTOMIZATION LOGICS
#----------------------------------------------------------------------------#
if(BUILD_SOX)
list(
APPEND
EXTENSION_SOURCES
pybind/sox/effects.cpp
pybind/sox/effects_chain.cpp
pybind/sox/io.cpp
pybind/sox/types.cpp
pybind/sox/utils.cpp
)
endif()
if(BUILD_KALDI)
list(
APPEND
EXTENSION_SOURCES
pybind/kaldi/kaldi_feature_wrapper.cc
pybind/kaldi/kaldi_feature.cc
)
endif()
#----------------------------------------------------------------------------#
# END OF CUSTOMIZATION LOGICS
#----------------------------------------------------------------------------#
define_extension(
_paddleaudio
"${EXTENSION_SOURCES}"
""
libpaddleaudio
"${LIBPADDLEAUDIO_COMPILE_DEFINITIONS}"
)
# if(BUILD_CTC_DECODER)
# set(
# DECODER_EXTENSION_SOURCES
# decoder/bindings/pybind.cpp
# )
# define_extension(
# _paddleaudio_decoder
# "${DECODER_EXTENSION_SOURCES}"
# ""
# "libpaddleaudio_decoder"
# "${LIBPADDLEAUDIO_DECODER_DEFINITIONS}"
# )
# endif()
# if(USE_FFMPEG)
# set(
# FFMPEG_EXTENSION_SOURCES
# ffmpeg/pybind/typedefs.cpp
# ffmpeg/pybind/pybind.cpp
# ffmpeg/pybind/stream_reader.cpp
# )
# define_extension(
# _paddleaudio_ffmpeg
# "${FFMPEG_EXTENSION_SOURCES}"
# "${FFMPEG_INCLUDE_DIRS}"
# "libpaddleaudio_ffmpeg"
# "${LIBPADDLEAUDIO_DECODER_DEFINITIONS}"
# )
# endif()
endif()
if (APPLE)
add_custom_command(TARGET _paddleaudio POST_BUILD COMMAND install_name_tool -change "${GFORTRAN_LIBRARIES_DIR}/libgcc_s.1.1.dylib" "@loader_path/lib/libgcc_s.1.1.dylib" _paddleaudio.so)
endif(APPLE)
if (UNIX AND NOT APPLE)
set_target_properties(_paddleaudio PROPERTIES INSTALL_RPATH "$ORIGIN/lib")
endif()
Creative Commons Legal Code
CC0 1.0 Universal
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
HEREUNDER.
Statement of Purpose
The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator
and subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for
the purpose of contributing to a commons of creative, cultural and
scientific works ("Commons") that the public can reliably and without fear
of later claims of infringement build upon, modify, incorporate in other
works, reuse and redistribute as freely as possible in any form whatsoever
and for any purposes, including without limitation commercial purposes.
These owners may contribute to the Commons to promote the ideal of a free
culture and the further production of creative, cultural and scientific
works, or to gain reputation or greater distribution for their Work in
part through the use and efforts of others.
For these and/or other purposes and motivations, and without any
expectation of additional consideration or compensation, the person
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
is an owner of Copyright and Related Rights in the Work, voluntarily
elects to apply CC0 to the Work and publicly distribute the Work under its
terms, with knowledge of his or her Copyright and Related Rights in the
Work and the meaning and intended legal effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not
limited to, the following:
i. the right to reproduce, adapt, distribute, perform, display,
communicate, and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or
likeness depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data
in a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation
thereof, including any amended or successor version of such
directive); and
vii. other similar, equivalent or corresponding rights throughout the
world based on applicable law or treaty, and any national
implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention
of, applicable law, Affirmer hereby overtly, fully, permanently,
irrevocably and unconditionally waives, abandons, and surrenders all of
Affirmer's Copyright and Related Rights and associated claims and causes
of action, whether now known or unknown (including existing as well as
future claims and causes of action), in the Work (i) in all territories
worldwide, (ii) for the maximum duration provided by applicable law or
treaty (including future time extensions), (iii) in any current or future
medium and for any number of copies, and (iv) for any purpose whatsoever,
including without limitation commercial, advertising or promotional
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
member of the public at large and to the detriment of Affirmer's heirs and
successors, fully intending that such Waiver shall not be subject to
revocation, rescission, cancellation, termination, or any other legal or
equitable action to disrupt the quiet enjoyment of the Work by the public
as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason
be judged legally invalid or ineffective under applicable law, then the
Waiver shall be preserved to the maximum extent permitted taking into
account Affirmer's express Statement of Purpose. In addition, to the
extent the Waiver is so judged Affirmer hereby grants to each affected
person a royalty-free, non transferable, non sublicensable, non exclusive,
irrevocable and unconditional license to exercise Affirmer's Copyright and
Related Rights in the Work (i) in all territories worldwide, (ii) for the
maximum duration provided by applicable law or treaty (including future
time extensions), (iii) in any current or future medium and for any number
of copies, and (iv) for any purpose whatsoever, including without
limitation commercial, advertising or promotional purposes (the
"License"). The License shall be deemed effective as of the date CC0 was
applied by Affirmer to the Work. Should any part of the License for any
reason be judged legally invalid or ineffective under applicable law, such
partial invalidity or ineffectiveness shall not invalidate the remainder
of the License, and in such case Affirmer hereby affirms that he or she
will not (i) exercise any of his or her remaining Copyright and Related
Rights in the Work or (ii) assert any associated claims and causes of
action with respect to the Work, in either case contrary to Affirmer's
express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or
warranties of any kind concerning the Work, express, implied,
statutory or otherwise, including without limitation warranties of
title, merchantability, fitness for a particular purpose, non
infringement, or the absence of latent or other defects, accuracy, or
the present or absence of errors, whether or not discoverable, all to
the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without
limitation any person's Copyright and Related Rights in the Work.
Further, Affirmer disclaims responsibility for obtaining any necessary
consents, permissions or other rights required for any use of the
Work.
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to
this CC0 or use of the Work.
此差异已折叠。
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "pybind11/pybind11.h"
#include "pybind11/numpy.h"
#include "feat/feature-window.h"
namespace paddleaudio {
namespace kaldi {
namespace py = pybind11;
template <class F>
class StreamingFeatureTpl {
public:
typedef typename F::Options Options;
StreamingFeatureTpl(const Options& opts);
bool ComputeFeature(const ::kaldi::VectorBase<::kaldi::BaseFloat>& wav,
::kaldi::Vector<::kaldi::BaseFloat>* feats);
void Reset() { remained_wav_.Resize(0); }
int Dim() { return computer_.Dim(); }
private:
bool Compute(const ::kaldi::Vector<::kaldi::BaseFloat>& waves,
::kaldi::Vector<::kaldi::BaseFloat>* feats);
Options opts_;
::kaldi::FeatureWindowFunction window_function_;
::kaldi::Vector<::kaldi::BaseFloat> remained_wav_;
F computer_;
};
} // namespace kaldi
} // namespace ppspeech
#include "feature_common_inl.h"
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
namespace paddleaudio {
namespace kaldi {
template <class F>
StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts)
: opts_(opts), computer_(opts), window_function_(opts.frame_opts) {
// window_function_(computer_.GetFrameOptions()) { the opt set to zero
}
template <class F>
bool StreamingFeatureTpl<F>::ComputeFeature(
const ::kaldi::VectorBase<::kaldi::BaseFloat>& wav,
::kaldi::Vector<::kaldi::BaseFloat>* feats) {
// append remaned waves
::kaldi::int32 wav_len = wav.Dim();
if (wav_len == 0) return false;
::kaldi::int32 left_len = remained_wav_.Dim();
::kaldi::Vector<::kaldi::BaseFloat> waves(left_len + wav_len);
waves.Range(0, left_len).CopyFromVec(remained_wav_);
waves.Range(left_len, wav_len).CopyFromVec(wav);
// cache remaned waves
::kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
::kaldi::int32 num_frames = ::kaldi::NumFrames(waves.Dim(), frame_opts);
::kaldi::int32 frame_shift = frame_opts.WindowShift();
::kaldi::int32 left_samples = waves.Dim() - frame_shift * num_frames;
remained_wav_.Resize(left_samples);
remained_wav_.CopyFromVec(
waves.Range(frame_shift * num_frames, left_samples));
// compute speech feature
Compute(waves, feats);
return true;
}
// Compute feat
template <class F>
bool StreamingFeatureTpl<F>::Compute(
const ::kaldi::Vector<::kaldi::BaseFloat>& waves,
::kaldi::Vector<::kaldi::BaseFloat>* feats) {
::kaldi::BaseFloat vtln_warp = 1.0;
const ::kaldi::FrameExtractionOptions& frame_opts =
computer_.GetFrameOptions();
::kaldi::int32 num_samples = waves.Dim();
::kaldi::int32 frame_length = frame_opts.WindowSize();
::kaldi::int32 sample_rate = frame_opts.samp_freq;
if (num_samples < frame_length) {
return false;
}
::kaldi::int32 num_frames = ::kaldi::NumFrames(num_samples, frame_opts);
feats->Resize(num_frames * Dim());
::kaldi::Vector<::kaldi::BaseFloat> window;
bool need_raw_log_energy = computer_.NeedRawLogEnergy();
for (::kaldi::int32 frame = 0; frame < num_frames; frame++) {
::kaldi::BaseFloat raw_log_energy = 0.0;
::kaldi::ExtractWindow(0,
waves,
frame,
frame_opts,
window_function_,
&window,
need_raw_log_energy ? &raw_log_energy : NULL);
::kaldi::Vector<::kaldi::BaseFloat> this_feature(computer_.Dim(),
::kaldi::kUndefined);
computer_.Compute(raw_log_energy, vtln_warp, &window, &this_feature);
::kaldi::SubVector<::kaldi::BaseFloat> output_row(
feats->Data() + frame * Dim(), Dim());
output_row.CopyFromVec(this_feature);
}
return true;
}
} // namespace kaldi
} // namespace paddleaudio
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddleaudio/src/pybind/kaldi/kaldi_feature.h"
#include "feat/pitch-functions.h"
namespace paddleaudio {
namespace kaldi {
bool InitFbank(
::kaldi::FrameExtractionOptions frame_opts,
::kaldi::MelBanksOptions mel_opts,
FbankOptions fbank_opts) {
::kaldi::FbankOptions opts;
opts.frame_opts = frame_opts;
opts.mel_opts = mel_opts;
opts.use_energy = fbank_opts.use_energy;
opts.energy_floor = fbank_opts.energy_floor;
opts.raw_energy = fbank_opts.raw_energy;
opts.htk_compat = fbank_opts.htk_compat;
opts.use_log_fbank = fbank_opts.use_log_fbank;
opts.use_power = fbank_opts.use_power;
paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->InitFbank(opts);
return true;
}
py::array_t<float> ComputeFbankStreaming(const py::array_t<float>& wav) {
return paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ComputeFbank(
wav);
}
py::array_t<float> ComputeFbank(
::kaldi::FrameExtractionOptions frame_opts,
::kaldi::MelBanksOptions mel_opts,
FbankOptions fbank_opts,
const py::array_t<float>& wav) {
InitFbank(frame_opts, mel_opts, fbank_opts);
py::array_t<float> result = ComputeFbankStreaming(wav);
paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank();
return result;
}
void ResetFbank() {
paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank();
}
py::array_t<float> ComputeKaldiPitch(
const ::kaldi::PitchExtractionOptions& opts,
const py::array_t<float>& wav) {
py::buffer_info info = wav.request();
::kaldi::SubVector<::kaldi::BaseFloat> input_wav((float*)info.ptr, info.size);
::kaldi::Matrix<::kaldi::BaseFloat> features;
::kaldi::ComputeKaldiPitch(opts, input_wav, &features);
auto result = py::array_t<float>({features.NumRows(), features.NumCols()});
for (int row_idx = 0; row_idx < features.NumRows(); ++row_idx) {
std::memcpy(result.mutable_data(row_idx), features.Row(row_idx).Data(),
sizeof(float)*features.NumCols());
}
return result;
}
} // namespace kaldi
} // namespace paddleaudio
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <string>
#include "paddleaudio/src/pybind/kaldi/kaldi_feature_wrapper.h"
#include "feat/pitch-functions.h"
namespace py = pybind11;
namespace paddleaudio {
namespace kaldi {
struct FbankOptions{
bool use_energy; // append an extra dimension with energy to the filter banks
float energy_floor;
bool raw_energy; // If true, compute energy before preemphasis and windowing
bool htk_compat; // If true, put energy last (if using energy)
bool use_log_fbank; // if true (default), produce log-filterbank, else linear
bool use_power;
FbankOptions(): use_energy(false),
energy_floor(0.0),
raw_energy(true),
htk_compat(false),
use_log_fbank(true),
use_power(true) {}
};
bool InitFbank(
::kaldi::FrameExtractionOptions frame_opts,
::kaldi::MelBanksOptions mel_opts,
FbankOptions fbank_opts);
py::array_t<float> ComputeFbank(
::kaldi::FrameExtractionOptions frame_opts,
::kaldi::MelBanksOptions mel_opts,
FbankOptions fbank_opts,
const py::array_t<float>& wav);
py::array_t<float> ComputeFbankStreaming(const py::array_t<float>& wav);
void ResetFbank();
py::array_t<float> ComputeKaldiPitch(
const ::kaldi::PitchExtractionOptions& opts,
const py::array_t<float>& wav);
} // namespace kaldi
} // namespace paddleaudio
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddleaudio/src/pybind/kaldi/kaldi_feature_wrapper.h"
namespace paddleaudio {
namespace kaldi {
KaldiFeatureWrapper* KaldiFeatureWrapper::GetInstance() {
static KaldiFeatureWrapper instance;
return &instance;
}
bool KaldiFeatureWrapper::InitFbank(::kaldi::FbankOptions opts) {
fbank_.reset(new Fbank(opts));
return true;
}
py::array_t<float> KaldiFeatureWrapper::ComputeFbank(
const py::array_t<float> wav) {
py::buffer_info info = wav.request();
::kaldi::SubVector<::kaldi::BaseFloat> input_wav((float*)info.ptr, info.size);
::kaldi::Vector<::kaldi::BaseFloat> feats;
bool flag = fbank_->ComputeFeature(input_wav, &feats);
if (flag == false || feats.Dim() == 0) return py::array_t<float>();
auto result = py::array_t<float>(feats.Dim());
py::buffer_info xs = result.request();
std::cout << std::endl;
float* res_ptr = (float*)xs.ptr;
for (int idx = 0; idx < feats.Dim(); ++idx) {
*res_ptr = feats(idx);
res_ptr++;
}
return result.reshape({feats.Dim() / Dim(), Dim()});
}
} // namesapce kaldi
} // namespace paddleaudio
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "base/kaldi-common.h"
#include "feat/feature-fbank.h"
#include "paddleaudio/src/pybind/kaldi/feature_common.h"
namespace paddleaudio {
namespace kaldi {
typedef StreamingFeatureTpl<::kaldi::FbankComputer> Fbank;
class KaldiFeatureWrapper {
public:
static KaldiFeatureWrapper* GetInstance();
bool InitFbank(::kaldi::FbankOptions opts);
py::array_t<float> ComputeFbank(const py::array_t<float> wav);
int Dim() { return fbank_->Dim(); }
void ResetFbank() { fbank_->Reset(); }
private:
std::unique_ptr<paddleaudio::kaldi::Fbank> fbank_;
};
} // namespace kaldi
} // namespace paddleaudio
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#include "paddleaudio/src/pybind/kaldi/kaldi_feature.h"
#include "paddleaudio/third_party/kaldi/feat/feature-fbank.h"
#ifdef INCLUDE_SOX
#include "paddleaudio/src/pybind/sox/io.h"
#include "paddleaudio/src/pybind/sox/effects.h"
#endif
#include <pybind11/stl.h>
#include <pybind11/pybind11.h>
// `tl::optional`
#ifdef INCLUDE_SOX
namespace pybind11 { namespace detail {
template <typename T>
struct type_caster<tl::optional<T>> : optional_caster<tl::optional<T>> {};
}}
#endif
PYBIND11_MODULE(_paddleaudio, m) {
#ifdef INCLUDE_SOX
m.def("get_info_file",
&paddleaudio::sox_io::get_info_file,
"Get metadata of audio file.");
// support obj later
m.def("get_info_fileobj",
&paddleaudio::sox_io::get_info_fileobj,
"Get metadata of audio in file object.");
m.def("load_audio_fileobj",
&paddleaudio::sox_io::load_audio_fileobj,
"Load audio from file object.");
m.def("save_audio_fileobj",
&paddleaudio::sox_io::save_audio_fileobj,
"Save audio to file obj.");
// sox io
m.def("sox_io_get_info", &paddleaudio::sox_io::get_info_file);
m.def(
"sox_io_load_audio_file",
&paddleaudio::sox_io::load_audio_file);
m.def(
"sox_io_save_audio_file",
&paddleaudio::sox_io::save_audio_file);
// sox utils
m.def("sox_utils_set_seed", &paddleaudio::sox_utils::set_seed);
m.def(
"sox_utils_set_verbosity",
&paddleaudio::sox_utils::set_verbosity);
m.def(
"sox_utils_set_use_threads",
&paddleaudio::sox_utils::set_use_threads);
m.def(
"sox_utils_set_buffer_size",
&paddleaudio::sox_utils::set_buffer_size);
m.def(
"sox_utils_list_effects",
&paddleaudio::sox_utils::list_effects);
m.def(
"sox_utils_list_read_formats",
&paddleaudio::sox_utils::list_read_formats);
m.def(
"sox_utils_list_write_formats",
&paddleaudio::sox_utils::list_write_formats);
m.def(
"sox_utils_get_buffer_size",
&paddleaudio::sox_utils::get_buffer_size);
// effect
m.def("apply_effects_fileobj",
&paddleaudio::sox_effects::apply_effects_fileobj,
"Decode audio data from file-like obj and apply effects.");
m.def("sox_effects_initialize_sox_effects",
&paddleaudio::sox_effects::initialize_sox_effects);
m.def(
"sox_effects_shutdown_sox_effects",
&paddleaudio::sox_effects::shutdown_sox_effects);
m.def(
"sox_effects_apply_effects_tensor",
&paddleaudio::sox_effects::apply_effects_tensor);
m.def(
"sox_effects_apply_effects_file",
&paddleaudio::sox_effects::apply_effects_file);
#endif
#ifdef INCLUDE_KALDI
m.def("ComputeFbank", &paddleaudio::kaldi::ComputeFbank, "compute fbank");
py::class_<kaldi::PitchExtractionOptions>(m, "PitchExtractionOptions")
.def(py::init<>())
.def_readwrite("samp_freq", &kaldi::PitchExtractionOptions::samp_freq)
.def_readwrite("frame_shift_ms", &kaldi::PitchExtractionOptions::frame_shift_ms)
.def_readwrite("frame_length_ms", &kaldi::PitchExtractionOptions::frame_length_ms)
.def_readwrite("preemph_coeff", &kaldi::PitchExtractionOptions::preemph_coeff)
.def_readwrite("min_f0", &kaldi::PitchExtractionOptions::min_f0)
.def_readwrite("max_f0", &kaldi::PitchExtractionOptions::max_f0)
.def_readwrite("soft_min_f0", &kaldi::PitchExtractionOptions::soft_min_f0)
.def_readwrite("penalty_factor", &kaldi::PitchExtractionOptions::penalty_factor)
.def_readwrite("lowpass_cutoff", &kaldi::PitchExtractionOptions::lowpass_cutoff)
.def_readwrite("resample_freq", &kaldi::PitchExtractionOptions::resample_freq)
.def_readwrite("delta_pitch", &kaldi::PitchExtractionOptions::delta_pitch)
.def_readwrite("nccf_ballast", &kaldi::PitchExtractionOptions::nccf_ballast)
.def_readwrite("lowpass_filter_width", &kaldi::PitchExtractionOptions::lowpass_filter_width)
.def_readwrite("upsample_filter_width", &kaldi::PitchExtractionOptions::upsample_filter_width)
.def_readwrite("max_frames_latency", &kaldi::PitchExtractionOptions::max_frames_latency)
.def_readwrite("frames_per_chunk", &kaldi::PitchExtractionOptions::frames_per_chunk)
.def_readwrite("simulate_first_pass_online", &kaldi::PitchExtractionOptions::simulate_first_pass_online)
.def_readwrite("recompute_frame", &kaldi::PitchExtractionOptions::recompute_frame)
.def_readwrite("nccf_ballast_online", &kaldi::PitchExtractionOptions::nccf_ballast_online)
.def_readwrite("snip_edges", &kaldi::PitchExtractionOptions::snip_edges);
m.def("ComputeKaldiPitch", &paddleaudio::kaldi::ComputeKaldiPitch, "compute kaldi pitch");
py::class_<kaldi::FrameExtractionOptions>(m, "FrameExtractionOptions")
.def(py::init<>())
.def_readwrite("samp_freq", &kaldi::FrameExtractionOptions::samp_freq)
.def_readwrite("frame_shift_ms", &kaldi::FrameExtractionOptions::frame_shift_ms)
.def_readwrite("frame_length_ms", &kaldi::FrameExtractionOptions::frame_length_ms)
.def_readwrite("dither", &kaldi::FrameExtractionOptions::dither)
.def_readwrite("preemph_coeff", &kaldi::FrameExtractionOptions::preemph_coeff)
.def_readwrite("remove_dc_offset", &kaldi::FrameExtractionOptions::remove_dc_offset)
.def_readwrite("window_type", &kaldi::FrameExtractionOptions::window_type)
.def_readwrite("round_to_power_of_two", &kaldi::FrameExtractionOptions::round_to_power_of_two)
.def_readwrite("blackman_coeff", &kaldi::FrameExtractionOptions::blackman_coeff)
.def_readwrite("snip_edges", &kaldi::FrameExtractionOptions::snip_edges)
.def_readwrite("allow_downsample", &kaldi::FrameExtractionOptions::allow_downsample)
.def_readwrite("allow_upsample", &kaldi::FrameExtractionOptions::allow_upsample)
.def_readwrite("max_feature_vectors", &kaldi::FrameExtractionOptions::max_feature_vectors);
py::class_<kaldi::MelBanksOptions>(m, "MelBanksOptions")
.def(py::init<>())
.def_readwrite("num_bins", &kaldi::MelBanksOptions::num_bins)
.def_readwrite("low_freq", &kaldi::MelBanksOptions::low_freq)
.def_readwrite("high_freq", &kaldi::MelBanksOptions::high_freq)
.def_readwrite("vtln_low", &kaldi::MelBanksOptions::vtln_low)
.def_readwrite("vtln_high", &kaldi::MelBanksOptions::vtln_high)
.def_readwrite("debug_mel", &kaldi::MelBanksOptions::debug_mel)
.def_readwrite("htk_mode", &kaldi::MelBanksOptions::htk_mode);
py::class_<paddleaudio::kaldi::FbankOptions>(m, "FbankOptions")
.def(py::init<>())
.def_readwrite("use_energy", &paddleaudio::kaldi::FbankOptions::use_energy)
.def_readwrite("energy_floor", &paddleaudio::kaldi::FbankOptions::energy_floor)
.def_readwrite("raw_energy", &paddleaudio::kaldi::FbankOptions::raw_energy)
.def_readwrite("htk_compat", &paddleaudio::kaldi::FbankOptions::htk_compat)
.def_readwrite("use_log_fbank", &paddleaudio::kaldi::FbankOptions::use_log_fbank)
.def_readwrite("use_power", &paddleaudio::kaldi::FbankOptions::use_power);
#endif
}
// the code is from https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/effects.cpp with modification.
#include <mutex>
#include <sox.h>
#include "paddleaudio/src/pybind/sox/effects.h"
#include "paddleaudio/src/pybind/sox/effects_chain.h"
#include "paddleaudio/src/pybind/sox/utils.h"
using namespace paddleaudio::sox_utils;
namespace paddleaudio::sox_effects {
// Streaming decoding over file-like object is tricky because libsox operates on
// FILE pointer. The folloing is what `sox` and `play` commands do
// - file input -> FILE pointer
// - URL input -> call wget in suprocess and pipe the data -> FILE pointer
// - stdin -> FILE pointer
//
// We want to, instead, fetch byte strings chunk by chunk, consume them, and
// discard.
//
// Here is the approach
// 1. Initialize sox_format_t using sox_open_mem_read, providing the initial
// chunk of byte string
// This will perform header-based format detection, if necessary, then fill
// the metadata of sox_format_t. Internally, sox_open_mem_read uses fmemopen,
// which returns FILE* which points the buffer of the provided byte string.
// 2. Each time sox reads a chunk from the FILE*, we update the underlying
// buffer in a way that it
// starts with unseen data, and append the new data read from the given
// fileobj. This will trick libsox as if it keeps reading from the FILE*
// continuously.
// For Step 2. see `fileobj_input_drain` function in effects_chain.cpp
auto apply_effects_fileobj(
py::object fileobj,
const std::vector<std::vector<std::string>>& effects,
tl::optional<bool> normalize,
tl::optional<bool> channels_first,
tl::optional<std::string> format)
-> tl::optional<std::tuple<py::array, int64_t>> {
// Prepare the buffer used throughout the lifecycle of SoxEffectChain.
//
// For certain format (such as FLAC), libsox keeps reading the content at
// the initialization unless it reaches EOF even when the header is properly
// parsed. (Making buffer size 8192, which is way bigger than the header,
// resulted in libsox consuming all the buffer content at the time it opens
// the file.) Therefore buffer has to always contain valid data, except after
// EOF. We default to `sox_get_globals()->bufsiz`* for buffer size and we
// first check if there is enough data to fill the buffer. `read_fileobj`
// repeatedly calls `read` method until it receives the requested length of
// bytes or it reaches EOF. If we get bytes shorter than requested, that means
// the whole audio data are fetched.
//
// * This can be changed with `paddleaudio.utils.sox_utils.set_buffer_size`.
const auto capacity = [&]() {
// NOTE:
// Use the abstraction provided by `libpaddleaudio` to access the global
// config defined by libsox. Directly using `sox_get_globals` function will
// end up retrieving the static variable defined in `_paddleaudio`, which is
// not correct.
const auto bufsiz = get_buffer_size();
const int64_t kDefaultCapacityInBytes = 256;
return (bufsiz > kDefaultCapacityInBytes) ? bufsiz
: kDefaultCapacityInBytes;
}();
std::string buffer(capacity, '\0');
auto* in_buf = const_cast<char*>(buffer.data());
auto num_read = read_fileobj(&fileobj, capacity, in_buf);
// If the file is shorter than 256, then libsox cannot read the header.
auto in_buffer_size = (num_read > 256) ? num_read : 256;
// Open file (this starts reading the header)
// When opening a file there are two functions that can touches FILE*.
// * `auto_detect_format`
// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L43
// * `startread` handler of detected format.
// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L574
// To see the handler of a particular format, go to
// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/<FORMAT>.c
// For example, voribs can be found
// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/vorbis.c#L97-L158
SoxFormat sf(sox_open_mem_read(
in_buf,
in_buffer_size,
/*signal=*/nullptr,
/*encoding=*/nullptr,
/*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
// In case of streamed data, length can be 0
if (static_cast<sox_format_t*>(sf) == nullptr ||
sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
return {};
}
// Prepare output buffer
std::vector<sox_sample_t> out_buffer;
out_buffer.reserve(sf->signal.length);
// Create and run SoxEffectsChain
const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);
paddleaudio::sox_effects_chain::SoxEffectsChainPyBind chain(
/*input_encoding=*/sf->encoding,
/*output_encoding=*/get_tensor_encodinginfo(dtype));
chain.addInputFileObj(sf, in_buf, in_buffer_size, &fileobj);
for (const auto& effect : effects) {
chain.addEffect(effect);
}
chain.addOutputBuffer(&out_buffer);
chain.run();
// Create tensor from buffer
bool channels_first_ = channels_first.value_or(true);
auto tensor = convert_to_tensor(
/*buffer=*/out_buffer.data(),
/*num_samples=*/out_buffer.size(),
/*num_channels=*/chain.getOutputNumChannels(),
dtype,
normalize.value_or(true),
channels_first_);
return std::forward_as_tuple(
tensor, static_cast<int64_t>(chain.getOutputSampleRate()));
}
namespace {
enum SoxEffectsResourceState { NotInitialized, Initialized, ShutDown };
SoxEffectsResourceState SOX_RESOURCE_STATE = NotInitialized;
std::mutex SOX_RESOUCE_STATE_MUTEX;
} // namespace
void initialize_sox_effects() {
const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);
switch (SOX_RESOURCE_STATE) {
case NotInitialized:
if (sox_init() != SOX_SUCCESS) {
throw std::runtime_error("Failed to initialize sox effects.");
};
SOX_RESOURCE_STATE = Initialized;
break;
case Initialized:
break;
case ShutDown:
throw std::runtime_error(
"SoX Effects has been shut down. Cannot initialize again.");
}
};
void shutdown_sox_effects() {
const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);
switch (SOX_RESOURCE_STATE) {
case NotInitialized:
throw std::runtime_error(
"SoX Effects is not initialized. Cannot shutdown.");
case Initialized:
if (sox_quit() != SOX_SUCCESS) {
throw std::runtime_error("Failed to initialize sox effects.");
};
SOX_RESOURCE_STATE = ShutDown;
break;
case ShutDown:
break;
}
}
auto apply_effects_tensor(
py::array waveform,
int64_t sample_rate,
const std::vector<std::vector<std::string>>& effects,
bool channels_first) -> std::tuple<py::array, int64_t> {
validate_input_tensor(waveform);
// Create SoxEffectsChain
const auto dtype = waveform.dtype();
paddleaudio::sox_effects_chain::SoxEffectsChain chain(
/*input_encoding=*/get_tensor_encodinginfo(dtype),
/*output_encoding=*/get_tensor_encodinginfo(dtype));
// Prepare output buffer
std::vector<sox_sample_t> out_buffer;
out_buffer.reserve(waveform.size());
// Build and run effects chain
chain.addInputTensor(&waveform, sample_rate, channels_first);
for (const auto& effect : effects) {
chain.addEffect(effect);
}
chain.addOutputBuffer(&out_buffer);
chain.run();
// Create tensor from buffer
auto out_tensor = convert_to_tensor(
/*buffer=*/out_buffer.data(),
/*num_samples=*/out_buffer.size(),
/*num_channels=*/chain.getOutputNumChannels(),
dtype,
/*normalize=*/false,
channels_first);
return std::tuple<py::array, int64_t>(
out_tensor, chain.getOutputSampleRate());
}
auto apply_effects_file(
const std::string& path,
const std::vector<std::vector<std::string>>& effects,
tl::optional<bool> normalize,
tl::optional<bool> channels_first,
const tl::optional<std::string>& format)
-> tl::optional<std::tuple<py::array, int64_t>> {
// Open input file
SoxFormat sf(sox_open_read(
path.c_str(),
/*signal=*/nullptr,
/*encoding=*/nullptr,
/*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
if (static_cast<sox_format_t*>(sf) == nullptr ||
sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
return {};
}
const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);
// Prepare output
std::vector<sox_sample_t> out_buffer;
out_buffer.reserve(sf->signal.length);
// Create and run SoxEffectsChain
paddleaudio::sox_effects_chain::SoxEffectsChain chain(
/*input_encoding=*/sf->encoding,
/*output_encoding=*/get_tensor_encodinginfo(dtype));
chain.addInputFile(sf);
for (const auto& effect : effects) {
chain.addEffect(effect);
}
chain.addOutputBuffer(&out_buffer);
chain.run();
// Create tensor from buffer
bool channels_first_ = channels_first.value_or(true);
auto tensor = convert_to_tensor(
/*buffer=*/out_buffer.data(),
/*num_samples=*/out_buffer.size(),
/*num_channels=*/chain.getOutputNumChannels(),
dtype,
normalize.value_or(true),
channels_first_);
return std::tuple<py::array, int64_t>(
tensor, chain.getOutputSampleRate());
}
} // namespace paddleaudio::sox_effects
// the code is from https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/effects.h with modification.
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include "paddleaudio/src/optional/optional.hpp"
namespace py = pybind11;
namespace paddleaudio::sox_effects {
auto apply_effects_fileobj(
py::object fileobj,
const std::vector<std::vector<std::string>>& effects,
tl::optional<bool> normalize,
tl::optional<bool> channels_first,
tl::optional<std::string> format)
-> tl::optional<std::tuple<py::array, int64_t>>;
void initialize_sox_effects();
void shutdown_sox_effects();
auto apply_effects_tensor(
py::array waveform,
int64_t sample_rate,
const std::vector<std::vector<std::string>>& effects,
bool channels_first) -> std::tuple<py::array, int64_t>;
auto apply_effects_file(
const std::string& path,
const std::vector<std::vector<std::string>>& effects,
tl::optional<bool> normalize,
tl::optional<bool> channels_first,
const tl::optional<std::string>& format)
-> tl::optional<std::tuple<py::array, int64_t>>;
} // namespace paddleaudio::sox_effects
// the code is from https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/effects_chain.cpp with modification.
#include <sox.h>
#include <iostream>
#include <vector>
#include "paddleaudio/src/pybind/sox/effects_chain.h"
#include "paddleaudio/src/pybind/sox/utils.h"
using namespace paddleaudio::sox_utils;
namespace paddleaudio::sox_effects_chain {
namespace {
/// helper classes for passing the location of input tensor and output buffer
///
/// drain/flow callback functions require plaing C style function signature and
/// the way to pass extra data is to attach data to sox_effect_t::priv pointer.
/// The following structs will be assigned to sox_effect_t::priv pointer which
/// gives sox_effect_t an access to input Tensor and output buffer object.
struct TensorInputPriv {
size_t index;
py::array* waveform;
int64_t sample_rate;
bool channels_first;
};
struct TensorOutputPriv {
std::vector<sox_sample_t>* buffer;
};
struct FileOutputPriv {
sox_format_t* sf;
};
/// Callback function to feed Tensor data to SoxEffectChain.
int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
// Retrieve the input Tensor and current index
auto priv = static_cast<TensorInputPriv*>(effp->priv);
auto index = priv->index;
auto tensor = *(priv->waveform);
auto num_channels = effp->out_signal.channels;
// Adjust the number of samples to read
const size_t num_samples = tensor.size();
if (index + *osamp > num_samples) {
*osamp = num_samples - index;
}
// Ensure that it's a multiple of the number of channels
*osamp -= *osamp % num_channels;
// Slice the input Tensor
// refacor this module, chunk
auto i_frame = index / num_channels;
auto num_frames = *osamp / num_channels;
std::vector<int> chunk(num_frames*num_channels);
py::buffer_info ori_info = tensor.request();
void* ptr = ori_info.ptr;
// Convert to sox_sample_t (int32_t)
switch (tensor.dtype().num()) {
//case c10::ScalarType::Float: {
case 11: {
// Need to convert to 64-bit precision so that
// values around INT32_MIN/MAX are handled correctly.
for (int idx = 0; idx < chunk.size(); ++idx) {
int frame_idx = (idx + index) / num_channels;
int channels_idx = (idx + index) % num_channels;
double elem = 0;
if (priv->channels_first) {
elem = *(float*)tensor.data(channels_idx, frame_idx);
} else {
elem = *(float*)tensor.data(frame_idx, channels_idx);
}
elem = elem * 2147483648.;
// *new_ptr = std::clamp(elem, INT32_MIN, INT32_MAX);
if (elem > INT32_MAX) {
chunk[idx] = INT32_MAX;
} else if (elem < INT32_MIN) {
chunk[idx] = INT32_MIN;
} else {
chunk[idx] = elem;
}
}
break;
}
//case c10::ScalarType::Int: {
case 5: {
for (int idx = 0; idx < chunk.size(); ++idx) {
int frame_idx = (idx + index) / num_channels;
int channels_idx = (idx + index) % num_channels;
int elem = 0;
if (priv->channels_first) {
elem = *(int*)tensor.data(channels_idx, frame_idx);
} else {
elem = *(int*)tensor.data(frame_idx, channels_idx);
}
chunk[idx] = elem;
}
break;
}
// case short
case 3: {
for (int idx = 0; idx < chunk.size(); ++idx) {
int frame_idx = (idx + index) / num_channels;
int channels_idx = (idx + index) % num_channels;
int16_t elem = 0;
if (priv->channels_first) {
elem = *(int16_t*)tensor.data(channels_idx, frame_idx);
} else {
elem = *(int16_t*)tensor.data(frame_idx, channels_idx);
}
chunk[idx] = elem * 65536;
}
break;
}
// case byte
case 1: {
for (int idx = 0; idx < chunk.size(); ++idx) {
int frame_idx = (idx + index) / num_channels;
int channels_idx = (idx + index) % num_channels;
int8_t elem = 0;
if (priv->channels_first) {
elem = *(int8_t*)tensor.data(channels_idx, frame_idx);
} else {
elem = *(int8_t*)tensor.data(frame_idx, channels_idx);
}
chunk[idx] = (elem - 128) * 16777216;
}
break;
}
default:
throw std::runtime_error("Unexpected dtype.");
}
// Write to buffer
memcpy(obuf, chunk.data(), *osamp * 4);
priv->index += *osamp;
return (priv->index == num_samples) ? SOX_EOF : SOX_SUCCESS;
}
/// Callback function to fetch data from SoxEffectChain.
int tensor_output_flow(
sox_effect_t* effp,
sox_sample_t const* ibuf,
sox_sample_t* obuf LSX_UNUSED,
size_t* isamp,
size_t* osamp) {
*osamp = 0;
// Get output buffer
auto out_buffer = static_cast<TensorOutputPriv*>(effp->priv)->buffer;
// Append at the end
out_buffer->insert(out_buffer->end(), ibuf, ibuf + *isamp);
return SOX_SUCCESS;
}
int file_output_flow(
sox_effect_t* effp,
sox_sample_t const* ibuf,
sox_sample_t* obuf LSX_UNUSED,
size_t* isamp,
size_t* osamp) {
*osamp = 0;
if (*isamp) {
auto sf = static_cast<FileOutputPriv*>(effp->priv)->sf;
if (sox_write(sf, ibuf, *isamp) != *isamp) {
if (sf->sox_errno) {
std::ostringstream stream;
stream << sf->sox_errstr << " " << sox_strerror(sf->sox_errno) << " "
<< sf->filename;
throw std::runtime_error(stream.str());
}
return SOX_EOF;
}
}
return SOX_SUCCESS;
}
sox_effect_handler_t* get_tensor_input_handler() {
static sox_effect_handler_t handler{
/*name=*/"input_tensor",
/*usage=*/NULL,
/*flags=*/SOX_EFF_MCHAN,
/*getopts=*/NULL,
/*start=*/NULL,
/*flow=*/NULL,
/*drain=*/tensor_input_drain,
/*stop=*/NULL,
/*kill=*/NULL,
/*priv_size=*/sizeof(TensorInputPriv)};
return &handler;
}
sox_effect_handler_t* get_tensor_output_handler() {
static sox_effect_handler_t handler{
/*name=*/"output_tensor",
/*usage=*/NULL,
/*flags=*/SOX_EFF_MCHAN,
/*getopts=*/NULL,
/*start=*/NULL,
/*flow=*/tensor_output_flow,
/*drain=*/NULL,
/*stop=*/NULL,
/*kill=*/NULL,
/*priv_size=*/sizeof(TensorOutputPriv)};
return &handler;
}
sox_effect_handler_t* get_file_output_handler() {
static sox_effect_handler_t handler{
/*name=*/"output_file",
/*usage=*/NULL,
/*flags=*/SOX_EFF_MCHAN,
/*getopts=*/NULL,
/*start=*/NULL,
/*flow=*/file_output_flow,
/*drain=*/NULL,
/*stop=*/NULL,
/*kill=*/NULL,
/*priv_size=*/sizeof(FileOutputPriv)};
return &handler;
}
} // namespace
SoxEffect::SoxEffect(sox_effect_t* se) noexcept : se_(se) {}
SoxEffect::~SoxEffect() {
if (se_ != nullptr) {
free(se_);
}
}
SoxEffect::operator sox_effect_t*() const {
return se_;
}
auto SoxEffect::operator->() noexcept -> sox_effect_t* {
return se_;
}
SoxEffectsChain::SoxEffectsChain(
sox_encodinginfo_t input_encoding,
sox_encodinginfo_t output_encoding)
: in_enc_(input_encoding),
out_enc_(output_encoding),
in_sig_(),
interm_sig_(),
out_sig_(),
sec_(sox_create_effects_chain(&in_enc_, &out_enc_)) {
if (!sec_) {
throw std::runtime_error("Failed to create effect chain.");
}
}
SoxEffectsChain::~SoxEffectsChain() {
if (sec_ != nullptr) {
sox_delete_effects_chain(sec_);
}
}
void SoxEffectsChain::run() {
sox_flow_effects(sec_, NULL, NULL);
}
void SoxEffectsChain::addInputTensor(
py::array* waveform,
int64_t sample_rate,
bool channels_first) {
in_sig_ = get_signalinfo(waveform, sample_rate, "wav", channels_first);
interm_sig_ = in_sig_;
SoxEffect e(sox_create_effect(get_tensor_input_handler()));
auto priv = static_cast<TensorInputPriv*>(e->priv);
priv->index = 0;
priv->waveform = waveform;
priv->sample_rate = sample_rate;
priv->channels_first = channels_first;
if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
throw std::runtime_error(
"Internal Error: Failed to add effect: input_tensor");
}
}
void SoxEffectsChain::addOutputBuffer(
std::vector<sox_sample_t>* output_buffer) {
SoxEffect e(sox_create_effect(get_tensor_output_handler()));
static_cast<TensorOutputPriv*>(e->priv)->buffer = output_buffer;
if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
throw std::runtime_error(
"Internal Error: Failed to add effect: output_tensor");
}
}
void SoxEffectsChain::addInputFile(sox_format_t* sf) {
in_sig_ = sf->signal;
interm_sig_ = in_sig_;
SoxEffect e(sox_create_effect(sox_find_effect("input")));
char* opts[] = {(char*)sf};
sox_effect_options(e, 1, opts);
if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
std::ostringstream stream;
stream << "Internal Error: Failed to add effect: input " << sf->filename;
throw std::runtime_error(stream.str());
}
}
void SoxEffectsChain::addOutputFile(sox_format_t* sf) {
out_sig_ = sf->signal;
SoxEffect e(sox_create_effect(get_file_output_handler()));
static_cast<FileOutputPriv*>(e->priv)->sf = sf;
if (sox_add_effect(sec_, e, &interm_sig_, &out_sig_) != SOX_SUCCESS) {
std::ostringstream stream;
stream << "Internal Error: Failed to add effect: output " << sf->filename;
throw std::runtime_error(stream.str());
}
}
void SoxEffectsChain::addEffect(const std::vector<std::string> effect) {
const auto num_args = effect.size();
if (num_args == 0) {
throw std::runtime_error("Invalid argument: empty effect.");
}
const auto name = effect[0];
if (UNSUPPORTED_EFFECTS.find(name) != UNSUPPORTED_EFFECTS.end()) {
std::ostringstream stream;
stream << "Unsupported effect: " << name;
throw std::runtime_error(stream.str());
}
auto returned_effect = sox_find_effect(name.c_str());
if (!returned_effect) {
std::ostringstream stream;
stream << "Unsupported effect: " << name;
throw std::runtime_error(stream.str());
}
SoxEffect e(sox_create_effect(returned_effect));
const auto num_options = num_args - 1;
std::vector<char*> opts;
for (size_t i = 1; i < num_args; ++i) {
opts.push_back((char*)effect[i].c_str());
}
if (sox_effect_options(e, num_options, num_options ? opts.data() : nullptr) !=
SOX_SUCCESS) {
std::ostringstream stream;
stream << "Invalid effect option:";
for (const auto& v : effect) {
stream << " " << v;
}
throw std::runtime_error(stream.str());
}
if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
std::ostringstream stream;
stream << "Internal Error: Failed to add effect: \"" << name;
for (size_t i = 1; i < num_args; ++i) {
stream << " " << effect[i];
}
stream << "\"";
throw std::runtime_error(stream.str());
}
}
int64_t SoxEffectsChain::getOutputNumChannels() {
return interm_sig_.channels;
}
int64_t SoxEffectsChain::getOutputSampleRate() {
return interm_sig_.rate;
}
namespace {
/// helper classes for passing file-like object to SoxEffectChain
struct FileObjInputPriv {
sox_format_t* sf;
py::object* fileobj;
bool eof_reached;
char* buffer;
uint64_t buffer_size;
};
struct FileObjOutputPriv {
sox_format_t* sf;
py::object* fileobj;
char** buffer;
size_t* buffer_size;
};
/// Callback function to feed byte string
/// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/sox.h#L1268-L1278
auto fileobj_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp)
-> int {
auto priv = static_cast<FileObjInputPriv*>(effp->priv);
auto sf = priv->sf;
auto buffer = priv->buffer;
// 1. Refresh the buffer
//
// NOTE:
// Since the underlying FILE* was opened with `fmemopen`, the only way
// libsox detect EOF is reaching the end of the buffer. (null byte won't
// help) Therefore we need to align the content at the end of buffer,
// otherwise, libsox will keep reading the content beyond intended length.
//
// Before:
//
// |<-------consumed------>|<---remaining--->|
// |***********************|-----------------|
// ^ ftell
//
// After:
//
// |<-offset->|<---remaining--->|<-new data->|
// |**********|-----------------|++++++++++++|
// ^ ftell
// NOTE:
// Do not use `sf->tell_off` here. Presumably, `tell_off` and `fseek` are
// supposed to be in sync, but there are cases (Vorbis) they are not
// in sync and `tell_off` has seemingly uninitialized value, which
// leads num_remain to be negative and cause segmentation fault
// in `memmove`.
const auto tell = ftell((FILE*)sf->fp);
if (tell < 0) {
throw std::runtime_error("Internal Error: ftell failed.");
}
const auto num_consumed = static_cast<size_t>(tell);
if (num_consumed > priv->buffer_size) {
throw std::runtime_error("Internal Error: buffer overrun.");
}
const auto num_remain = priv->buffer_size - num_consumed;
// 1.1. Fetch the data to see if there is data to fill the buffer
size_t num_refill = 0;
std::string chunk(num_consumed, '\0');
if (num_consumed && !priv->eof_reached) {
num_refill = read_fileobj(
priv->fileobj, num_consumed, const_cast<char*>(chunk.data()));
if (num_refill < num_consumed) {
priv->eof_reached = true;
}
}
const auto offset = num_consumed - num_refill;
// 1.2. Move the unconsumed data towards the beginning of buffer.
if (num_remain) {
auto src = static_cast<void*>(buffer + num_consumed);
auto dst = static_cast<void*>(buffer + offset);
memmove(dst, src, num_remain);
}
// 1.3. Refill the remaining buffer.
if (num_refill) {
auto src = static_cast<void*>(const_cast<char*>(chunk.c_str()));
auto dst = buffer + offset + num_remain;
memcpy(dst, src, num_refill);
}
// 1.4. Set the file pointer to the new offset
sf->tell_off = offset;
fseek((FILE*)sf->fp, offset, SEEK_SET);
// 2. Perform decoding operation
// The following part is practically same as "input" effect
// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/input.c#L30-L48
// At this point, osamp represents the buffer size in bytes,
// but sox_read expects the maximum number of samples ready to read.
// Normally, this is fine, but in case when the samples are not 4-byte
// aligned, (e.g. sample is 24bits), the resulting signal is not correct.
// https://github.com/pytorch/audio/issues/2083
if (sf->encoding.bits_per_sample > 0)
*osamp /= (sf->encoding.bits_per_sample / 8);
// Ensure that it's a multiple of the number of channels
*osamp -= *osamp % effp->out_signal.channels;
// Read up to *osamp samples into obuf;
// store the actual number read back to *osamp
*osamp = sox_read(sf, obuf, *osamp);
// Decoding is finished when fileobject is exhausted and sox can no longer
// decode a sample.
return (priv->eof_reached && !*osamp) ? SOX_EOF : SOX_SUCCESS;
}
auto fileobj_output_flow(
sox_effect_t* effp,
sox_sample_t const* ibuf,
sox_sample_t* obuf LSX_UNUSED,
size_t* isamp,
size_t* osamp) -> int {
*osamp = 0;
if (*isamp) {
auto priv = static_cast<FileObjOutputPriv*>(effp->priv);
auto sf = priv->sf;
auto fp = static_cast<FILE*>(sf->fp);
auto fileobj = priv->fileobj;
auto buffer = priv->buffer;
// Encode chunk
auto num_samples_written = sox_write(sf, ibuf, *isamp);
fflush(fp);
// Copy the encoded chunk to python object.
fileobj->attr("write")(py::bytes(*buffer, ftell(fp)));
// Reset FILE*
sf->tell_off = 0;
fseek(fp, 0, SEEK_SET);
if (num_samples_written != *isamp) {
if (sf->sox_errno) {
std::ostringstream stream;
stream << sf->sox_errstr << " " << sox_strerror(sf->sox_errno) << " "
<< sf->filename;
throw std::runtime_error(stream.str());
}
return SOX_EOF;
}
}
return SOX_SUCCESS;
}
auto get_fileobj_input_handler() -> sox_effect_handler_t* {
static sox_effect_handler_t handler{
/*name=*/"input_fileobj_object",
/*usage=*/nullptr,
/*flags=*/SOX_EFF_MCHAN,
/*getopts=*/nullptr,
/*start=*/nullptr,
/*flow=*/nullptr,
/*drain=*/fileobj_input_drain,
/*stop=*/nullptr,
/*kill=*/nullptr,
/*priv_size=*/sizeof(FileObjInputPriv)};
return &handler;
}
auto get_fileobj_output_handler() -> sox_effect_handler_t* {
static sox_effect_handler_t handler{
/*name=*/"output_fileobj_object",
/*usage=*/nullptr,
/*flags=*/SOX_EFF_MCHAN,
/*getopts=*/nullptr,
/*start=*/nullptr,
/*flow=*/fileobj_output_flow,
/*drain=*/nullptr,
/*stop=*/nullptr,
/*kill=*/nullptr,
/*priv_size=*/sizeof(FileObjOutputPriv)};
return &handler;
}
} // namespace
void SoxEffectsChainPyBind::addInputFileObj(
sox_format_t* sf,
char* buffer,
uint64_t buffer_size,
py::object* fileobj) {
in_sig_ = sf->signal;
interm_sig_ = in_sig_;
SoxEffect e(sox_create_effect(get_fileobj_input_handler()));
auto priv = static_cast<FileObjInputPriv*>(e->priv);
priv->sf = sf;
priv->fileobj = fileobj;
priv->eof_reached = false;
priv->buffer = buffer;
priv->buffer_size = buffer_size;
if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
throw std::runtime_error(
"Internal Error: Failed to add effect: input fileobj");
}
}
void SoxEffectsChainPyBind::addOutputFileObj(
sox_format_t* sf,
char** buffer,
size_t* buffer_size,
py::object* fileobj) {
out_sig_ = sf->signal;
SoxEffect e(sox_create_effect(get_fileobj_output_handler()));
auto priv = static_cast<FileObjOutputPriv*>(e->priv);
priv->sf = sf;
priv->fileobj = fileobj;
priv->buffer = buffer;
priv->buffer_size = buffer_size;
if (sox_add_effect(sec_, e, &interm_sig_, &out_sig_) != SOX_SUCCESS) {
throw std::runtime_error(
"Internal Error: Failed to add effect: output fileobj");
}
}
} // namespace paddleaudio::sox_effects_chain
// the code is from https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/effects_chain.h with modification.
#pragma once
#include <sox.h>
#include "paddleaudio/src/pybind/sox/utils.h"
namespace paddleaudio::sox_effects_chain {
// Helper struct to safely close sox_effect_t* pointer returned by
// sox_create_effect
struct SoxEffect {
explicit SoxEffect(sox_effect_t* se) noexcept;
SoxEffect(const SoxEffect& other) = delete;
SoxEffect(const SoxEffect&& other) = delete;
auto operator=(const SoxEffect& other) -> SoxEffect& = delete;
auto operator=(SoxEffect&& other) -> SoxEffect& = delete;
~SoxEffect();
operator sox_effect_t*() const;
auto operator->() noexcept -> sox_effect_t*;
private:
sox_effect_t* se_;
};
// Helper struct to safely close sox_effects_chain_t with handy methods
class SoxEffectsChain {
const sox_encodinginfo_t in_enc_;
const sox_encodinginfo_t out_enc_;
protected:
sox_signalinfo_t in_sig_;
sox_signalinfo_t interm_sig_;
sox_signalinfo_t out_sig_;
sox_effects_chain_t* sec_;
public:
explicit SoxEffectsChain(
sox_encodinginfo_t input_encoding,
sox_encodinginfo_t output_encoding);
SoxEffectsChain(const SoxEffectsChain& other) = delete;
SoxEffectsChain(const SoxEffectsChain&& other) = delete;
SoxEffectsChain& operator=(const SoxEffectsChain& other) = delete;
SoxEffectsChain& operator=(SoxEffectsChain&& other) = delete;
~SoxEffectsChain();
void run();
void addInputTensor(
py::array* waveform,
int64_t sample_rate,
bool channels_first);
void addInputFile(sox_format_t* sf);
void addOutputBuffer(std::vector<sox_sample_t>* output_buffer);
void addOutputFile(sox_format_t* sf);
void addEffect(const std::vector<std::string> effect);
int64_t getOutputNumChannels();
int64_t getOutputSampleRate();
};
class SoxEffectsChainPyBind : public SoxEffectsChain {
using SoxEffectsChain::SoxEffectsChain;
public:
void addInputFileObj(
sox_format_t* sf,
char* buffer,
uint64_t buffer_size,
py::object* fileobj);
void addOutputFileObj(
sox_format_t* sf,
char** buffer,
size_t* buffer_size,
py::object* fileobj);
};
} // namespace paddleaudio::sox_effects_chain
// the code is from https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/io.cpp with modification.
#include "paddleaudio/src/pybind/sox/io.h"
#include "paddleaudio/src/pybind/sox/effects.h"
#include "paddleaudio/src/pybind/sox/types.h"
#include "paddleaudio/src/pybind/sox/effects_chain.h"
#include "paddleaudio/src/pybind/sox/utils.h"
#include "paddleaudio/src/optional/optional.hpp"
using namespace paddleaudio::sox_utils;
namespace paddleaudio {
namespace sox_io {
auto get_info_file(const std::string &path,
const tl::optional<std::string> &format)
-> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
SoxFormat sf(
sox_open_read(path.data(),
/*signal=*/nullptr,
/*encoding=*/nullptr,
/*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
validate_input_file(sf, path);
return std::make_tuple(
static_cast<int64_t>(sf->signal.rate),
static_cast<int64_t>(sf->signal.length / sf->signal.channels),
static_cast<int64_t>(sf->signal.channels),
static_cast<int64_t>(sf->encoding.bits_per_sample),
get_encoding(sf->encoding.encoding));
}
std::vector<std::vector<std::string>> get_effects(
const tl::optional<int64_t>& frame_offset,
const tl::optional<int64_t>& num_frames) {
const auto offset = frame_offset.value_or(0);
if (offset < 0) {
throw std::runtime_error(
"Invalid argument: frame_offset must be non-negative.");
}
const auto frames = num_frames.value_or(-1);
if (frames == 0 || frames < -1) {
throw std::runtime_error(
"Invalid argument: num_frames must be -1 or greater than 0.");
}
std::vector<std::vector<std::string>> effects;
if (frames != -1) {
std::ostringstream os_offset, os_frames;
os_offset << offset << "s";
os_frames << "+" << frames << "s";
effects.emplace_back(
std::vector<std::string>{"trim", os_offset.str(), os_frames.str()});
} else if (offset != 0) {
std::ostringstream os_offset;
os_offset << offset << "s";
effects.emplace_back(std::vector<std::string>{"trim", os_offset.str()});
}
return effects;
}
auto get_info_fileobj(py::object fileobj,
const tl::optional<std::string> &format)
-> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
const auto capacity = [&]() {
const auto bufsiz = get_buffer_size();
const int64_t kDefaultCapacityInBytes = 4096;
return (bufsiz > kDefaultCapacityInBytes) ? bufsiz
: kDefaultCapacityInBytes;
}();
std::string buffer(capacity, '\0');
auto *buf = const_cast<char *>(buffer.data());
auto num_read = read_fileobj(&fileobj, capacity, buf);
// If the file is shorter than 256, then libsox cannot read the header.
auto buf_size = (num_read > 256) ? num_read : 256;
SoxFormat sf(sox_open_mem_read(
buf,
buf_size,
/*signal=*/nullptr,
/*encoding=*/nullptr,
/*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
// In case of streamed data, length can be 0
validate_input_memfile(sf);
return std::make_tuple(
static_cast<int64_t>(sf->signal.rate),
static_cast<int64_t>(sf->signal.length / sf->signal.channels),
static_cast<int64_t>(sf->signal.channels),
static_cast<int64_t>(sf->encoding.bits_per_sample),
get_encoding(sf->encoding.encoding));
}
tl::optional<std::tuple<py::array, int64_t>> load_audio_fileobj(
py::object fileobj,
const tl::optional<int64_t>& frame_offset,
const tl::optional<int64_t>& num_frames,
tl::optional<bool> normalize,
tl::optional<bool> channels_first,
const tl::optional<std::string>& format) {
auto effects = get_effects(frame_offset, num_frames);
return paddleaudio::sox_effects::apply_effects_fileobj(
std::move(fileobj), effects, normalize, channels_first, std::move(format));
}
tl::optional<std::tuple<py::array, int64_t>> load_audio_file(
const std::string& path,
const tl::optional<int64_t>& frame_offset,
const tl::optional<int64_t>& num_frames,
tl::optional<bool> normalize,
tl::optional<bool> channels_first,
const tl::optional<std::string>& format) {
auto effects = get_effects(frame_offset, num_frames);
return paddleaudio::sox_effects::apply_effects_file(
path, effects, normalize, channels_first, format);
}
void save_audio_file(const std::string& path,
py::array tensor,
int64_t sample_rate,
bool channels_first,
tl::optional<double> compression,
tl::optional<std::string> format,
tl::optional<std::string> encoding,
tl::optional<int64_t> bits_per_sample) {
validate_input_tensor(tensor);
const auto filetype = [&]() {
if (format.has_value()) return format.value();
return get_filetype(path);
}();
if (filetype == "amr-nb") {
const auto num_channels = tensor.shape(channels_first ? 0 : 1);
//TORCH_CHECK(num_channels == 1,
// "amr-nb format only supports single channel audio.");
assert(num_channels == 1);
} else if (filetype == "htk") {
const auto num_channels = tensor.shape(channels_first ? 0 : 1);
// TORCH_CHECK(num_channels == 1,
// "htk format only supports single channel audio.");
assert(num_channels == 1);
} else if (filetype == "gsm") {
const auto num_channels = tensor.shape(channels_first ? 0 : 1);
assert(num_channels == 1);
assert(sample_rate == 8000);
//TORCH_CHECK(num_channels == 1,
// "gsm format only supports single channel audio.");
//TORCH_CHECK(sample_rate == 8000,
// "gsm format only supports a sampling rate of 8kHz.");
}
const auto signal_info =
get_signalinfo(&tensor, sample_rate, filetype, channels_first);
const auto encoding_info = get_encodinginfo_for_save(
filetype, tensor.dtype(), compression, encoding, bits_per_sample);
SoxFormat sf(sox_open_write(path.c_str(),
&signal_info,
&encoding_info,
/*filetype=*/filetype.c_str(),
/*oob=*/nullptr,
/*overwrite_permitted=*/nullptr));
if (static_cast<sox_format_t*>(sf) == nullptr) {
throw std::runtime_error(
"Error saving audio file: failed to open file " + path);
}
paddleaudio::sox_effects_chain::SoxEffectsChain chain(
/*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()),
/*output_encoding=*/sf->encoding);
chain.addInputTensor(&tensor, sample_rate, channels_first);
chain.addOutputFile(sf);
chain.run();
}
namespace {
// helper class to automatically release buffer, to be used by
// save_audio_fileobj
struct AutoReleaseBuffer {
char* ptr;
size_t size;
AutoReleaseBuffer() : ptr(nullptr), size(0) {}
AutoReleaseBuffer(const AutoReleaseBuffer& other) = delete;
AutoReleaseBuffer(AutoReleaseBuffer&& other) = delete;
auto operator=(const AutoReleaseBuffer& other) -> AutoReleaseBuffer& = delete;
auto operator=(AutoReleaseBuffer&& other) -> AutoReleaseBuffer& = delete;
~AutoReleaseBuffer() {
if (ptr) {
free(ptr);
}
}
};
} // namespace
void save_audio_fileobj(
py::object fileobj,
py::array tensor,
int64_t sample_rate,
bool channels_first,
tl::optional<double> compression,
tl::optional<std::string> format,
tl::optional<std::string> encoding,
tl::optional<int64_t> bits_per_sample) {
if (!format.has_value()) {
throw std::runtime_error(
"`format` is required when saving to file object.");
}
const auto filetype = format.value();
if (filetype == "amr-nb") {
const auto num_channels = tensor.shape(channels_first ? 0 : 1);
if (num_channels != 1) {
throw std::runtime_error(
"amr-nb format only supports single channel audio.");
}
} else if (filetype == "htk") {
const auto num_channels = tensor.shape(channels_first ? 0 : 1);
if (num_channels != 1) {
throw std::runtime_error(
"htk format only supports single channel audio.");
}
} else if (filetype == "gsm") {
const auto num_channels = tensor.shape(channels_first ? 0 : 1);
if (num_channels != 1) {
throw std::runtime_error(
"gsm format only supports single channel audio.");
}
if (sample_rate != 8000) {
throw std::runtime_error(
"gsm format only supports a sampling rate of 8kHz.");
}
}
const auto signal_info =
get_signalinfo(&tensor, sample_rate, filetype, channels_first);
const auto encoding_info = get_encodinginfo_for_save(
filetype,
tensor.dtype(),
compression,
std::move(encoding),
bits_per_sample);
AutoReleaseBuffer buffer;
SoxFormat sf(sox_open_memstream_write(
&buffer.ptr,
&buffer.size,
&signal_info,
&encoding_info,
filetype.c_str(),
/*oob=*/nullptr));
if (static_cast<sox_format_t*>(sf) == nullptr) {
throw std::runtime_error(
"Error saving audio file: failed to open memory stream.");
}
paddleaudio::sox_effects_chain::SoxEffectsChainPyBind chain(
/*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()),
/*output_encoding=*/sf->encoding);
chain.addInputTensor(&tensor, sample_rate, channels_first);
chain.addOutputFileObj(sf, &buffer.ptr, &buffer.size, &fileobj);
chain.run();
// Closing the sox_format_t is necessary for flushing the last chunk to the
// buffer
sf.close();
fileobj.attr("write")(py::bytes(buffer.ptr, buffer.size));
}
} // namespace paddleaudio
} // namespace sox_io
// the code is from https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/io.h with modification.
#pragma once
#include "paddleaudio/src/pybind/sox/utils.h"
namespace py = pybind11;
namespace paddleaudio {
namespace sox_io {
auto get_info_file(const std::string &path,
const tl::optional<std::string> &format)
-> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;
auto get_info_fileobj(py::object fileobj,
const tl::optional<std::string> &format)
-> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;
tl::optional<std::tuple<py::array, int64_t>> load_audio_fileobj(
py::object fileobj,
const tl::optional<int64_t>& frame_offset,
const tl::optional<int64_t>& num_frames,
tl::optional<bool> normalize,
tl::optional<bool> channels_first,
const tl::optional<std::string>& format);
void save_audio_fileobj(
py::object fileobj,
py::array tensor,
int64_t sample_rate,
bool channels_first,
tl::optional<double> compression,
tl::optional<std::string> format,
tl::optional<std::string> encoding,
tl::optional<int64_t> bits_per_sample);
auto get_effects(const tl::optional<int64_t>& frame_offset,
const tl::optional<int64_t>& num_frames)
-> std::vector<std::vector<std::string>>;
tl::optional<std::tuple<py::array, int64_t>> load_audio_file(
const std::string& path,
const tl::optional<int64_t>& frame_offset,
const tl::optional<int64_t>& num_frames,
tl::optional<bool> normalize,
tl::optional<bool> channels_first,
const tl::optional<std::string>& format);
void save_audio_file(const std::string& path,
py::array tensor,
int64_t sample_rate,
bool channels_first,
tl::optional<double> compression,
tl::optional<std::string> format,
tl::optional<std::string> encoding,
tl::optional<int64_t> bits_per_sample);
} // namespace paddleaudio
} // namespace sox_io
//code is from: https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/types.cpp
#include "paddleaudio/src/pybind/sox/types.h"
#include <ostream>
#include <sstream>
namespace paddleaudio {
namespace sox_utils {
Format get_format_from_string(const std::string& format) {
if (format == "wav")
return Format::WAV;
if (format == "mp3")
return Format::MP3;
if (format == "flac")
return Format::FLAC;
if (format == "ogg" || format == "vorbis")
return Format::VORBIS;
if (format == "amr-nb")
return Format::AMR_NB;
if (format == "amr-wb")
return Format::AMR_WB;
if (format == "amb")
return Format::AMB;
if (format == "sph")
return Format::SPHERE;
if (format == "htk")
return Format::HTK;
if (format == "gsm")
return Format::GSM;
std::ostringstream stream;
stream << "Internal Error: unexpected format value: " << format;
throw std::runtime_error(stream.str());
}
std::string to_string(Encoding v) {
switch (v) {
case Encoding::UNKNOWN:
return "UNKNOWN";
case Encoding::PCM_SIGNED:
return "PCM_S";
case Encoding::PCM_UNSIGNED:
return "PCM_U";
case Encoding::PCM_FLOAT:
return "PCM_F";
case Encoding::FLAC:
return "FLAC";
case Encoding::ULAW:
return "ULAW";
case Encoding::ALAW:
return "ALAW";
case Encoding::MP3:
return "MP3";
case Encoding::VORBIS:
return "VORBIS";
case Encoding::AMR_WB:
return "AMR_WB";
case Encoding::AMR_NB:
return "AMR_NB";
case Encoding::OPUS:
return "OPUS";
default:
throw std::runtime_error("Internal Error: unexpected encoding.");
}
}
Encoding get_encoding_from_option(const tl::optional<std::string> encoding) {
if (!encoding.has_value())
return Encoding::NOT_PROVIDED;
std::string v = encoding.value();
if (v == "PCM_S")
return Encoding::PCM_SIGNED;
if (v == "PCM_U")
return Encoding::PCM_UNSIGNED;
if (v == "PCM_F")
return Encoding::PCM_FLOAT;
if (v == "ULAW")
return Encoding::ULAW;
if (v == "ALAW")
return Encoding::ALAW;
std::ostringstream stream;
stream << "Internal Error: unexpected encoding value: " << v;
throw std::runtime_error(stream.str());
}
BitDepth get_bit_depth_from_option(const tl::optional<int64_t> bit_depth) {
if (!bit_depth.has_value())
return BitDepth::NOT_PROVIDED;
int64_t v = bit_depth.value();
switch (v) {
case 8:
return BitDepth::B8;
case 16:
return BitDepth::B16;
case 24:
return BitDepth::B24;
case 32:
return BitDepth::B32;
case 64:
return BitDepth::B64;
default: {
std::ostringstream s;
s << "Internal Error: unexpected bit depth value: " << v;
throw std::runtime_error(s.str());
}
}
}
std::string get_encoding(sox_encoding_t encoding) {
switch (encoding) {
case SOX_ENCODING_UNKNOWN:
return "UNKNOWN";
case SOX_ENCODING_SIGN2:
return "PCM_S";
case SOX_ENCODING_UNSIGNED:
return "PCM_U";
case SOX_ENCODING_FLOAT:
return "PCM_F";
case SOX_ENCODING_FLAC:
return "FLAC";
case SOX_ENCODING_ULAW:
return "ULAW";
case SOX_ENCODING_ALAW:
return "ALAW";
case SOX_ENCODING_MP3:
return "MP3";
case SOX_ENCODING_VORBIS:
return "VORBIS";
case SOX_ENCODING_AMR_WB:
return "AMR_WB";
case SOX_ENCODING_AMR_NB:
return "AMR_NB";
case SOX_ENCODING_OPUS:
return "OPUS";
case SOX_ENCODING_GSM:
return "GSM";
default:
return "UNKNOWN";
}
}
} // namespace sox_utils
} // namespace paddleaudio
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
################################################################################
# sox
################################################################################
if (BUILD_SOX)
add_subdirectory(sox)
endif()
################################################################################
# kaldi
################################################################################
if (BUILD_KALDI)
add_subdirectory(kaldi)
endif()
\ No newline at end of file
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -11,3 +11,10 @@ ...@@ -11,3 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
__all__ = ['ParameterError']
class ParameterError(Exception):
"""Exception class for Parameter checking"""
pass
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册