diff --git a/.mergify.yml b/.mergify.yml index 68b2481015a87a27817cb0aeb279114e7438378f..5cb1f4865d2090965f4ed60e249a31ed684ce363 100644 --- a/.mergify.yml +++ b/.mergify.yml @@ -52,7 +52,7 @@ pull_request_rules: add: ["T2S"] - name: "auto add label=Audio" conditions: - - files~=^paddleaudio/ + - files~=^paddlespeech/audio/ actions: label: add: ["Audio"] @@ -100,7 +100,7 @@ pull_request_rules: add: ["README"] - name: "auto add label=Documentation" conditions: - - files~=^(docs/|CHANGELOG.md|paddleaudio/CHANGELOG.md) + - files~=^(docs/|CHANGELOG.md) actions: label: add: ["Documentation"] diff --git a/audio/.gitignore b/audio/.gitignore deleted file mode 100644 index 1c930053d56ac426518959387705c79f8a394a7c..0000000000000000000000000000000000000000 --- a/audio/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -.eggs -*.wav diff --git a/audio/CHANGELOG.md b/audio/CHANGELOG.md deleted file mode 100644 index 925d77696845c0765dcfbc6bbc487d898887c804..0000000000000000000000000000000000000000 --- a/audio/CHANGELOG.md +++ /dev/null @@ -1,9 +0,0 @@ -# Changelog - -Date: 2022-3-15, Author: Xiaojie Chen. - - kaldi and librosa mfcc, fbank, spectrogram. - - unit test and benchmark. - -Date: 2022-2-25, Author: Hui Zhang. - - Refactor architecture. - - dtw distance and mcd style dtw. diff --git a/audio/README.md b/audio/README.md deleted file mode 100644 index 697c017394044e862883fe7f9589d00c77127868..0000000000000000000000000000000000000000 --- a/audio/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# PaddleAudio - -PaddleAudio is an audio library for PaddlePaddle. - -## Install - -`pip install .` diff --git a/audio/docs/Makefile b/audio/docs/Makefile deleted file mode 100644 index 69fe55ecfa9aade66e1412aef0ee7d04a9bcde86..0000000000000000000000000000000000000000 --- a/audio/docs/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/audio/docs/README.md b/audio/docs/README.md deleted file mode 100644 index 20626f52bfced5b52b8fa014a0a540ed69ece8a7..0000000000000000000000000000000000000000 --- a/audio/docs/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Build docs for PaddleAudio - -Execute the following steps in **current directory**. - -## 1. Install - -`pip install Sphinx sphinx_rtd_theme` - - -## 2. Generate API docs - -Generate API docs from doc string. - -`sphinx-apidoc -fMeT -o source ../paddleaudio ../paddleaudio/utils --templatedir source/_templates` - - -## 3. Build - -`sphinx-build source _html` - - -## 4. Preview - -Open `_html/index.html` for page preview. diff --git a/audio/docs/images/paddle.png b/audio/docs/images/paddle.png deleted file mode 100644 index bc1135abfab7aa48f29392da4bca614f688314af..0000000000000000000000000000000000000000 Binary files a/audio/docs/images/paddle.png and /dev/null differ diff --git a/audio/docs/make.bat b/audio/docs/make.bat deleted file mode 100644 index 543c6b13b473ff3c586d5d97ae418d267ee795c4..0000000000000000000000000000000000000000 --- a/audio/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% - -:end -popd diff --git a/audio/paddleaudio/utils/env.py b/audio/paddleaudio/utils/env.py deleted file mode 100644 index a2d14b89ef4da32187cf8b7349cd23c546602244..0000000000000000000000000000000000000000 --- a/audio/paddleaudio/utils/env.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' -This module is used to store environmental variables in PaddleAudio. -PPAUDIO_HOME --> the root directory for storing PaddleAudio related data. Default to ~/.paddleaudio. Users can change the -├ default value through the PPAUDIO_HOME environment variable. -├─ MODEL_HOME --> Store model files. -└─ DATA_HOME --> Store automatically downloaded datasets. -''' -import os - -__all__ = [ - 'USER_HOME', - 'PPAUDIO_HOME', - 'MODEL_HOME', - 'DATA_HOME', -] - - -def _get_user_home(): - return os.path.expanduser('~') - - -def _get_ppaudio_home(): - if 'PPAUDIO_HOME' in os.environ: - home_path = os.environ['PPAUDIO_HOME'] - if os.path.exists(home_path): - if os.path.isdir(home_path): - return home_path - else: - raise RuntimeError( - 'The environment variable PPAUDIO_HOME {} is not a directory.'. - format(home_path)) - else: - return home_path - return os.path.join(_get_user_home(), '.paddleaudio') - - -def _get_sub_home(directory): - home = os.path.join(_get_ppaudio_home(), directory) - if not os.path.exists(home): - os.makedirs(home) - return home - - -USER_HOME = _get_user_home() -PPAUDIO_HOME = _get_ppaudio_home() -MODEL_HOME = _get_sub_home('models') -DATA_HOME = _get_sub_home('datasets') diff --git a/audio/setup.py b/audio/setup.py deleted file mode 100644 index 80fe07b7a91791aafd5f671f9456899178d44531..0000000000000000000000000000000000000000 --- a/audio/setup.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import glob -import os - -import setuptools -from setuptools.command.install import install -from setuptools.command.test import test - -# set the version here -VERSION = '0.0.0' - - -# Inspired by the example at https://pytest.org/latest/goodpractises.html -class TestCommand(test): - def finalize_options(self): - test.finalize_options(self) - self.test_args = [] - self.test_suite = True - - def run(self): - self.run_benchmark() - super(TestCommand, self).run() - - def run_tests(self): - # Run nose ensuring that argv simulates running nosetests directly - import nose - nose.run_exit(argv=['nosetests', '-w', 'tests']) - - def run_benchmark(self): - for benchmark_item in glob.glob('tests/benchmark/*py'): - os.system(f'pytest {benchmark_item}') - - -class InstallCommand(install): - def run(self): - install.run(self) - - -def write_version_py(filename='paddleaudio/__init__.py'): - with open(filename, "a") as f: - f.write(f"__version__ = '{VERSION}'") - - -def remove_version_py(filename='paddleaudio/__init__.py'): - with open(filename, "r") as f: - lines = f.readlines() - with open(filename, "w") as f: - for line in lines: - if "__version__" not in line: - f.write(line) - - -remove_version_py() -write_version_py() - -setuptools.setup( - name="paddleaudio", - version=VERSION, - author="", - author_email="", - description="PaddleAudio, in development", - long_description="", - long_description_content_type="text/markdown", - url="", - packages=setuptools.find_packages(include=['paddleaudio*']), - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], - python_requires='>=3.6', - install_requires=[ - 'numpy >= 1.15.0', 'scipy >= 1.0.0', 'resampy >= 0.2.2', - 'soundfile >= 0.9.0', 'colorlog', 'pathos == 0.2.8' - ], - extras_require={ - 'test': [ - 'nose', 'librosa==0.8.1', 'soundfile==0.10.3.post1', - 'torchaudio==0.10.2', 'pytest-benchmark' - ], - }, - cmdclass={ - 'install': InstallCommand, - 'test': TestCommand, - }, ) - -remove_version_py() diff --git a/audio/tests/.gitkeep b/audio/tests/.gitkeep deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/demos/audio_searching/README.md b/demos/audio_searching/README.md index e829d991aa9863259d20b07c9dc6af664eb8dc27..db38d14ed4d61d74833fcd81413d74af20fe473d 100644 --- a/demos/audio_searching/README.md +++ b/demos/audio_searching/README.md @@ -89,7 +89,7 @@ Then to start the system server, and it provides HTTP backend services. Then start the server with Fastapi. ```bash - export PYTHONPATH=$PYTHONPATH:./src:../../paddleaudio + export PYTHONPATH=$PYTHONPATH:./src python src/audio_search.py ``` diff --git a/demos/audio_searching/README_cn.md b/demos/audio_searching/README_cn.md index c13742af7a1613a089e1e14c069ec7a3340dd669..6d38b91f59f1edc181505fecefda036749fad350 100644 --- a/demos/audio_searching/README_cn.md +++ b/demos/audio_searching/README_cn.md @@ -91,7 +91,7 @@ ffce340b3790 minio/minio:RELEASE.2020-12-03T00-03-10Z "/usr/bin/docker-ent…" 启动用 Fastapi 构建的服务 ```bash - export PYTHONPATH=$PYTHONPATH:./src:../../paddleaudio + export PYTHONPATH=$PYTHONPATH:./src python src/audio_search.py ``` diff --git a/audio/docs/source/_static/custom.css b/docs/source/audio/_static/custom.css similarity index 100% rename from audio/docs/source/_static/custom.css rename to docs/source/audio/_static/custom.css diff --git a/audio/docs/source/_templates/module.rst_t b/docs/source/audio/_templates/module.rst_t similarity index 100% rename from audio/docs/source/_templates/module.rst_t rename to docs/source/audio/_templates/module.rst_t diff --git a/audio/docs/source/_templates/package.rst_t b/docs/source/audio/_templates/package.rst_t similarity index 100% rename from audio/docs/source/_templates/package.rst_t rename to docs/source/audio/_templates/package.rst_t diff --git a/audio/docs/source/_templates/toc.rst_t b/docs/source/audio/_templates/toc.rst_t similarity index 100% rename from audio/docs/source/_templates/toc.rst_t rename to docs/source/audio/_templates/toc.rst_t diff --git a/audio/docs/source/conf.py b/docs/source/audio/conf.py similarity index 100% rename from audio/docs/source/conf.py rename to docs/source/audio/conf.py diff --git a/audio/docs/source/index.rst b/docs/source/audio/index.rst similarity index 100% rename from audio/docs/source/index.rst rename to docs/source/audio/index.rst diff --git a/docs/source/cls/custom_dataset.md b/docs/source/cls/custom_dataset.md index aaf5943c55fe7c2b7df8c9da06724eef42ba9134..e39dcf12d94b8baf40bf58782b3fd11d3c133f4f 100644 --- a/docs/source/cls/custom_dataset.md +++ b/docs/source/cls/custom_dataset.md @@ -1,8 +1,8 @@ # Customize Dataset for Audio Classification -Following this tutorial you can customize your dataset for audio classification task by using `paddlespeech` and `paddleaudio`. +Following this tutorial you can customize your dataset for audio classification task by using `paddlespeech`. -A base class of classification dataset is `paddleaudio.dataset.AudioClassificationDataset`. To customize your dataset you should write a dataset class derived from `AudioClassificationDataset`. +A base class of classification dataset is `paddlespeech.audio.dataset.AudioClassificationDataset`. To customize your dataset you should write a dataset class derived from `AudioClassificationDataset`. Assuming you have some wave files that stored in your own directory. You should prepare a meta file with the information of filepaths and labels. For example the absolute path of it is `/PATH/TO/META_FILE.txt`: ``` @@ -14,7 +14,7 @@ Assuming you have some wave files that stored in your own directory. You should Here is an example to build your custom dataset in `custom_dataset.py`: ```python -from paddleaudio.datasets.dataset import AudioClassificationDataset +from paddlespeech.audio.datasets.dataset import AudioClassificationDataset class CustomDataset(AudioClassificationDataset): meta_file = '/PATH/TO/META_FILE.txt' @@ -48,7 +48,7 @@ class CustomDataset(AudioClassificationDataset): Then you can build dataset and data loader from `CustomDataset`: ```python import paddle -from paddleaudio.features import LogMelSpectrogram +from paddlespeech.audio.features import LogMelSpectrogram from custom_dataset import CustomDataset diff --git a/examples/esc50/cls0/conf/panns.yaml b/examples/esc50/cls0/conf/panns.yaml index 3a9d42aa5e52de31c6c461d471988bb2067e5cdf..1f0323f0d316aacb7697015082053f14b649a739 100644 --- a/examples/esc50/cls0/conf/panns.yaml +++ b/examples/esc50/cls0/conf/panns.yaml @@ -1,5 +1,5 @@ data: - dataset: 'paddleaudio.datasets:ESC50' + dataset: 'paddlespeech.audio.datasets:ESC50' num_classes: 50 train: mode: 'train' diff --git a/examples/hey_snips/kws0/conf/mdtc.yaml b/examples/hey_snips/kws0/conf/mdtc.yaml index 4bd0708ceb88c79cc8172fb5271879a858768e45..76e47bc7c4ba7604f68331bbaa5e2f26befe3ffd 100644 --- a/examples/hey_snips/kws0/conf/mdtc.yaml +++ b/examples/hey_snips/kws0/conf/mdtc.yaml @@ -2,7 +2,7 @@ ########################################### # Data # ########################################### -dataset: 'paddleaudio.datasets:HeySnips' +dataset: 'paddlespeech.audio.datasets:HeySnips' data_dir: '/PATH/TO/DATA/hey_snips_research_6k_en_train_eval_clean_ter' ############################################ diff --git a/examples/voxceleb/sv0/local/data_prepare.py b/examples/voxceleb/sv0/local/data_prepare.py index b4486b6f00cb91957dd0b27dffbc56dfba086df5..e5a5dff7bb96fa773546447bc5956153d9619bed 100644 --- a/examples/voxceleb/sv0/local/data_prepare.py +++ b/examples/voxceleb/sv0/local/data_prepare.py @@ -14,9 +14,9 @@ import argparse import paddle -from paddleaudio.datasets.voxceleb import VoxCeleb from yacs.config import CfgNode +from paddlespeech.audio.datasets.voxceleb import VoxCeleb from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.training.seeding import seed_everything diff --git a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py index 0d0163f159e518d152bce3366a3cc187181e4364..7ad9bd6ec699ad95fe7b864b32d0d6b989a73cff 100644 --- a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py @@ -21,9 +21,9 @@ import os from typing import List import tqdm -from paddleaudio import load as load_audio from yacs.config import CfgNode +from paddlespeech.audio import load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py index ffd0d212ddda7e31eeaf171f28f5096aaee5ea06..40adf53de6f611c1219f169c8e7bc7b899b219c0 100644 --- a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py @@ -22,9 +22,9 @@ import os import random import tqdm -from paddleaudio import load as load_audio from yacs.config import CfgNode +from paddlespeech.audio import load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/audio/paddleaudio/__init__.py b/paddlespeech/audio/__init__.py similarity index 100% rename from audio/paddleaudio/__init__.py rename to paddlespeech/audio/__init__.py diff --git a/audio/paddleaudio/backends/__init__.py b/paddlespeech/audio/backends/__init__.py similarity index 100% rename from audio/paddleaudio/backends/__init__.py rename to paddlespeech/audio/backends/__init__.py diff --git a/audio/paddleaudio/backends/soundfile_backend.py b/paddlespeech/audio/backends/soundfile_backend.py similarity index 100% rename from audio/paddleaudio/backends/soundfile_backend.py rename to paddlespeech/audio/backends/soundfile_backend.py diff --git a/audio/paddleaudio/backends/sox_backend.py b/paddlespeech/audio/backends/sox_backend.py similarity index 100% rename from audio/paddleaudio/backends/sox_backend.py rename to paddlespeech/audio/backends/sox_backend.py diff --git a/audio/paddleaudio/compliance/__init__.py b/paddlespeech/audio/compliance/__init__.py similarity index 100% rename from audio/paddleaudio/compliance/__init__.py rename to paddlespeech/audio/compliance/__init__.py diff --git a/audio/paddleaudio/compliance/kaldi.py b/paddlespeech/audio/compliance/kaldi.py similarity index 100% rename from audio/paddleaudio/compliance/kaldi.py rename to paddlespeech/audio/compliance/kaldi.py diff --git a/audio/paddleaudio/compliance/librosa.py b/paddlespeech/audio/compliance/librosa.py similarity index 100% rename from audio/paddleaudio/compliance/librosa.py rename to paddlespeech/audio/compliance/librosa.py diff --git a/audio/paddleaudio/datasets/__init__.py b/paddlespeech/audio/datasets/__init__.py similarity index 100% rename from audio/paddleaudio/datasets/__init__.py rename to paddlespeech/audio/datasets/__init__.py diff --git a/audio/paddleaudio/datasets/dataset.py b/paddlespeech/audio/datasets/dataset.py similarity index 100% rename from audio/paddleaudio/datasets/dataset.py rename to paddlespeech/audio/datasets/dataset.py diff --git a/audio/paddleaudio/datasets/esc50.py b/paddlespeech/audio/datasets/esc50.py similarity index 100% rename from audio/paddleaudio/datasets/esc50.py rename to paddlespeech/audio/datasets/esc50.py diff --git a/audio/paddleaudio/datasets/gtzan.py b/paddlespeech/audio/datasets/gtzan.py similarity index 100% rename from audio/paddleaudio/datasets/gtzan.py rename to paddlespeech/audio/datasets/gtzan.py diff --git a/audio/paddleaudio/datasets/hey_snips.py b/paddlespeech/audio/datasets/hey_snips.py similarity index 100% rename from audio/paddleaudio/datasets/hey_snips.py rename to paddlespeech/audio/datasets/hey_snips.py diff --git a/audio/paddleaudio/datasets/rirs_noises.py b/paddlespeech/audio/datasets/rirs_noises.py similarity index 100% rename from audio/paddleaudio/datasets/rirs_noises.py rename to paddlespeech/audio/datasets/rirs_noises.py diff --git a/audio/paddleaudio/datasets/tess.py b/paddlespeech/audio/datasets/tess.py similarity index 100% rename from audio/paddleaudio/datasets/tess.py rename to paddlespeech/audio/datasets/tess.py diff --git a/audio/paddleaudio/datasets/urban_sound.py b/paddlespeech/audio/datasets/urban_sound.py similarity index 100% rename from audio/paddleaudio/datasets/urban_sound.py rename to paddlespeech/audio/datasets/urban_sound.py diff --git a/audio/paddleaudio/datasets/voxceleb.py b/paddlespeech/audio/datasets/voxceleb.py similarity index 100% rename from audio/paddleaudio/datasets/voxceleb.py rename to paddlespeech/audio/datasets/voxceleb.py diff --git a/audio/paddleaudio/features/__init__.py b/paddlespeech/audio/features/__init__.py similarity index 100% rename from audio/paddleaudio/features/__init__.py rename to paddlespeech/audio/features/__init__.py diff --git a/audio/paddleaudio/features/layers.py b/paddlespeech/audio/features/layers.py similarity index 100% rename from audio/paddleaudio/features/layers.py rename to paddlespeech/audio/features/layers.py diff --git a/audio/paddleaudio/functional/__init__.py b/paddlespeech/audio/functional/__init__.py similarity index 100% rename from audio/paddleaudio/functional/__init__.py rename to paddlespeech/audio/functional/__init__.py diff --git a/audio/paddleaudio/functional/functional.py b/paddlespeech/audio/functional/functional.py similarity index 100% rename from audio/paddleaudio/functional/functional.py rename to paddlespeech/audio/functional/functional.py diff --git a/audio/paddleaudio/functional/window.py b/paddlespeech/audio/functional/window.py similarity index 100% rename from audio/paddleaudio/functional/window.py rename to paddlespeech/audio/functional/window.py diff --git a/audio/paddleaudio/io/__init__.py b/paddlespeech/audio/io/__init__.py similarity index 100% rename from audio/paddleaudio/io/__init__.py rename to paddlespeech/audio/io/__init__.py diff --git a/audio/paddleaudio/metric/__init__.py b/paddlespeech/audio/metric/__init__.py similarity index 100% rename from audio/paddleaudio/metric/__init__.py rename to paddlespeech/audio/metric/__init__.py diff --git a/audio/paddleaudio/metric/eer.py b/paddlespeech/audio/metric/eer.py similarity index 100% rename from audio/paddleaudio/metric/eer.py rename to paddlespeech/audio/metric/eer.py diff --git a/audio/paddleaudio/sox_effects/__init__.py b/paddlespeech/audio/sox_effects/__init__.py similarity index 100% rename from audio/paddleaudio/sox_effects/__init__.py rename to paddlespeech/audio/sox_effects/__init__.py diff --git a/audio/paddleaudio/utils/__init__.py b/paddlespeech/audio/utils/__init__.py similarity index 88% rename from audio/paddleaudio/utils/__init__.py rename to paddlespeech/audio/utils/__init__.py index afb9cedd889613c2942b196059c138abfa1cb733..742f9f8efdf50b5992712fa7f2d48b0a16902dd5 100644 --- a/audio/paddleaudio/utils/__init__.py +++ b/paddlespeech/audio/utils/__init__.py @@ -11,13 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from ...cli.utils import DATA_HOME +from ...cli.utils import MODEL_HOME from .download import decompress from .download import download_and_decompress from .download import load_state_dict_from_url -from .env import DATA_HOME -from .env import MODEL_HOME -from .env import PPAUDIO_HOME -from .env import USER_HOME from .error import ParameterError from .log import Logger from .log import logger diff --git a/audio/paddleaudio/utils/download.py b/paddlespeech/audio/utils/download.py similarity index 100% rename from audio/paddleaudio/utils/download.py rename to paddlespeech/audio/utils/download.py diff --git a/audio/paddleaudio/utils/error.py b/paddlespeech/audio/utils/error.py similarity index 100% rename from audio/paddleaudio/utils/error.py rename to paddlespeech/audio/utils/error.py diff --git a/audio/paddleaudio/utils/log.py b/paddlespeech/audio/utils/log.py similarity index 100% rename from audio/paddleaudio/utils/log.py rename to paddlespeech/audio/utils/log.py diff --git a/audio/paddleaudio/utils/numeric.py b/paddlespeech/audio/utils/numeric.py similarity index 100% rename from audio/paddleaudio/utils/numeric.py rename to paddlespeech/audio/utils/numeric.py diff --git a/audio/paddleaudio/utils/time.py b/paddlespeech/audio/utils/time.py similarity index 100% rename from audio/paddleaudio/utils/time.py rename to paddlespeech/audio/utils/time.py diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index d31379b8893192c3c114b3712d77175ee2301617..f4e8baea6af743f3450b29d1b3e5dc08cc2d80b3 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -27,6 +27,8 @@ from paddleaudio.features import LogMelSpectrogram from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper +from paddlespeech.audio import load +from paddlespeech.audio.features import LogMelSpectrogram __all__ = ['CLSExecutor'] diff --git a/paddlespeech/cli/utils.py b/paddlespeech/cli/utils.py index 128767e627091dc636da2900e5e65b58bdd650ca..21c887e997c6e294431654b35b747bab44cffa78 100644 --- a/paddlespeech/cli/utils.py +++ b/paddlespeech/cli/utils.py @@ -24,11 +24,11 @@ from typing import Any from typing import Dict import paddle -import paddleaudio import requests import yaml from paddle.framework import load +import paddlespeech.audio from . import download from .entry import commands try: @@ -190,6 +190,7 @@ def _get_sub_home(directory): PPSPEECH_HOME = _get_paddlespcceh_home() MODEL_HOME = _get_sub_home('models') CONF_HOME = _get_sub_home('conf') +DATA_HOME = _get_sub_home('datasets') def _md5(text: str): @@ -281,7 +282,7 @@ def _note_one_stat(cls_name, params={}): if 'audio_file' in params: try: - _, sr = paddleaudio.load(params['audio_file']) + _, sr = paddlespeech.audio.load(params['audio_file']) except Exception: sr = -1 diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py index d049ba7da2f95704a8cff06659fe4cb72ca6355b..c736a53e158b6d4200344de9728edf73404a1604 100644 --- a/paddlespeech/cli/vector/infer.py +++ b/paddlespeech/cli/vector/infer.py @@ -29,6 +29,8 @@ from yacs.config import CfgNode from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.modules.sid_model import SpeakerIdetification diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py index ee566ed4f8e29171a41364b8e86bb143fb570796..fe1c93fa891a1690cd6e2f3ea374fe8073c147b9 100644 --- a/paddlespeech/cls/exps/panns/deploy/predict.py +++ b/paddlespeech/cls/exps/panns/deploy/predict.py @@ -16,11 +16,12 @@ import os import numpy as np from paddle import inference -from paddleaudio.backends import load as load_audio -from paddleaudio.datasets import ESC50 -from paddleaudio.features import melspectrogram from scipy.special import softmax +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.datasets import ESC50 +from paddlespeech.audio.features import melspectrogram + # yapf: disable parser = argparse.ArgumentParser() parser.add_argument("--model_dir", type=str, required=True, default="./export", help="The directory to static model.") diff --git a/paddlespeech/cls/exps/panns/export_model.py b/paddlespeech/cls/exps/panns/export_model.py index 63b22981adb62d213f0b2bd79ae9d4b180d06591..e62d58f02dd1d5f809d9b45de9a8eda07d2c5f69 100644 --- a/paddlespeech/cls/exps/panns/export_model.py +++ b/paddlespeech/cls/exps/panns/export_model.py @@ -15,8 +15,8 @@ import argparse import os import paddle -from paddleaudio.datasets import ESC50 +from paddlespeech.audio.datasets import ESC50 from paddlespeech.cls.models import cnn14 from paddlespeech.cls.models import SoundClassifier diff --git a/paddlespeech/cls/exps/panns/predict.py b/paddlespeech/cls/exps/panns/predict.py index d0b963545c74228c4e7ed6e95dc6a3ae8226c1a7..97759a89d6dbc3f535dc1b4211b6166420291121 100644 --- a/paddlespeech/cls/exps/panns/predict.py +++ b/paddlespeech/cls/exps/panns/predict.py @@ -17,10 +17,10 @@ import os import paddle import paddle.nn.functional as F import yaml -from paddleaudio.backends import load as load_audio -from paddleaudio.features import LogMelSpectrogram -from paddleaudio.utils import logger +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.features import LogMelSpectrogram +from paddlespeech.audio.utils import logger from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import diff --git a/paddlespeech/cls/exps/panns/train.py b/paddlespeech/cls/exps/panns/train.py index 8e06273dedb57b8fed789ec2402254311cfc03f6..fba38a01c7f74503f9f20cbada2b3a3c3ce11eed 100644 --- a/paddlespeech/cls/exps/panns/train.py +++ b/paddlespeech/cls/exps/panns/train.py @@ -16,10 +16,10 @@ import os import paddle import yaml -from paddleaudio.features import LogMelSpectrogram -from paddleaudio.utils import logger -from paddleaudio.utils import Timer +from paddlespeech.audio.features import LogMelSpectrogram +from paddlespeech.audio.utils import logger +from paddlespeech.audio.utils import Timer from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import diff --git a/paddlespeech/cls/models/panns/panns.py b/paddlespeech/cls/models/panns/panns.py index b442b2fd1224df90d0ec519124d2fc65397928c5..f2a1b9aead9f0d1be679dca64d735fb4e83424d8 100644 --- a/paddlespeech/cls/models/panns/panns.py +++ b/paddlespeech/cls/models/panns/panns.py @@ -15,8 +15,9 @@ import os import paddle.nn as nn import paddle.nn.functional as F -from paddleaudio.utils.download import load_state_dict_from_url -from paddleaudio.utils.env import MODEL_HOME + +from paddlespeech.audio.utils.download import load_state_dict_from_url +from paddlespeech.audio.utils.env import MODEL_HOME __all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6'] diff --git a/paddlespeech/kws/exps/mdtc/train.py b/paddlespeech/kws/exps/mdtc/train.py index 5a9ca92d16ff3eada36840da6914397227005b49..94e45d59048b3a6b514a4585b31d3c2b34f33d85 100644 --- a/paddlespeech/kws/exps/mdtc/train.py +++ b/paddlespeech/kws/exps/mdtc/train.py @@ -14,10 +14,10 @@ import os import paddle -from paddleaudio.utils import logger -from paddleaudio.utils import Timer from yacs.config import CfgNode +from paddlespeech.audio.utils import logger +from paddlespeech.audio.utils import Timer from paddlespeech.kws.exps.mdtc.collate import collate_features from paddlespeech.kws.models.loss import max_pooling_loss from paddlespeech.kws.models.mdtc import KWSModel diff --git a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py index 22329d5e028ebed0a87af69c30eba1e6513d6226..ac5720fd5e315f76ee4003886e996881369bcdba 100644 --- a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py +++ b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py @@ -14,10 +14,11 @@ """Contains the audio featurizer class.""" import numpy as np import paddle -import paddleaudio.compliance.kaldi as kaldi from python_speech_features import delta from python_speech_features import mfcc +import paddlespeech.audio.compliance.kaldi as kaldi + class AudioFeaturizer(): """Audio featurizer, for extracting features from audio contents of diff --git a/paddlespeech/s2t/transform/spectrogram.py b/paddlespeech/s2t/transform/spectrogram.py index 2a93bedc87edbb9f33799fb145e19de4e0ee9d57..19f0237bff0effeec29ee96e98928448e72f057d 100644 --- a/paddlespeech/s2t/transform/spectrogram.py +++ b/paddlespeech/s2t/transform/spectrogram.py @@ -15,9 +15,10 @@ import librosa import numpy as np import paddle -import paddleaudio.compliance.kaldi as kaldi from python_speech_features import logfbank +import paddlespeech.audio.compliance.kaldi as kaldi + def stft(x, n_fft, diff --git a/paddlespeech/server/engine/vector/python/vector_engine.py b/paddlespeech/server/engine/vector/python/vector_engine.py index 854303701d1717478b5083a27879a7071434702f..3c72f55d4b61328db8ca91b976d4f34071974195 100644 --- a/paddlespeech/server/engine/vector/python/vector_engine.py +++ b/paddlespeech/server/engine/vector/python/vector_engine.py @@ -16,9 +16,9 @@ from collections import OrderedDict import numpy as np import paddle -from paddleaudio.backends import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.cli.log import logger from paddlespeech.cli.vector.infer import VectorExecutor from paddlespeech.server.engine.base_engine import BaseEngine diff --git a/paddlespeech/server/util.py b/paddlespeech/server/util.py index 13f2ddf6ee00477290d7e66f662c4ab8d2862f5f..32546a3300b0ce837c4f2491ef6fd3884d1eb913 100644 --- a/paddlespeech/server/util.py +++ b/paddlespeech/server/util.py @@ -24,11 +24,11 @@ from typing import Any from typing import Dict import paddle -import paddleaudio import requests import yaml from paddle.framework import load +import paddlespeech.audio from .entry import client_commands from .entry import server_commands from paddlespeech.cli import download @@ -289,7 +289,7 @@ def _note_one_stat(cls_name, params={}): if 'audio_file' in params: try: - _, sr = paddleaudio.load(params['audio_file']) + _, sr = paddlespeech.audio.load(params['audio_file']) except Exception: sr = -1 diff --git a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py index e8d91bf3a50425c567b68eb4a408f1046f3b96ce..cd4538bb52d67c6608ea0a7a9667eac48dfbc937 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py @@ -16,10 +16,10 @@ import os import time import paddle -from paddleaudio.backends import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn diff --git a/paddlespeech/vector/exps/ecapa_tdnn/test.py b/paddlespeech/vector/exps/ecapa_tdnn/test.py index f15dbf9b7a111720de9481b9ce62104d47ea9e95..6c87dbe7bd01849b978f2653b044cf6e1f287581 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/test.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/test.py @@ -18,10 +18,10 @@ import numpy as np import paddle from paddle.io import BatchSampler from paddle.io import DataLoader -from paddleaudio.metric import compute_eer from tqdm import tqdm from yacs.config import CfgNode +from paddlespeech.audio.metric import compute_eer from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import batch_feature_normalize from paddlespeech.vector.io.dataset import CSVDataset diff --git a/paddlespeech/vector/exps/ecapa_tdnn/train.py b/paddlespeech/vector/exps/ecapa_tdnn/train.py index bf014045d0a85d253d9ef6056cf402a2042988b9..961b75e2934d314ac8ced2d0302e965f2e1cf8bb 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/train.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/train.py @@ -20,9 +20,9 @@ import paddle from paddle.io import BatchSampler from paddle.io import DataLoader from paddle.io import DistributedBatchSampler -from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.io.augment import waveform_augment diff --git a/paddlespeech/vector/io/dataset.py b/paddlespeech/vector/io/dataset.py index 1b514f3d624f1f8ddce610835670bc4f680ecbd7..245b29592edcd1b96eda2434afb3992fccb1236c 100644 --- a/paddlespeech/vector/io/dataset.py +++ b/paddlespeech/vector/io/dataset.py @@ -15,9 +15,9 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset -from paddleaudio import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram +from paddlespeech.audio import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log logger = Log(__name__).getlog() diff --git a/paddlespeech/vector/io/dataset_from_json.py b/paddlespeech/vector/io/dataset_from_json.py index bf04e1132950287d77786b6dc7050322a09b4226..12e845771268a11d3e5d17e63d0cb47e5bff9de3 100644 --- a/paddlespeech/vector/io/dataset_from_json.py +++ b/paddlespeech/vector/io/dataset_from_json.py @@ -16,9 +16,10 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset -from paddleaudio import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram -from paddleaudio.compliance.librosa import mfcc + +from paddlespeech.audio import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram +from paddlespeech.audio.compliance.librosa import mfcc @dataclass diff --git a/setup.py b/setup.py index 657de6c5f8bab1a2b5efae66359da07f98fc6967..679549b4d8dc4b2c4906b1e35fb6312ab9872d1e 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,7 @@ from setuptools import find_packages from setuptools import setup from setuptools.command.develop import develop from setuptools.command.install import install +from setuptools.command.test import test HERE = Path(os.path.abspath(os.path.dirname(__file__))) @@ -31,42 +32,13 @@ VERSION = '0.0.0' COMMITID = 'none' base = [ - "editdistance", - "g2p_en", - "g2pM", - "h5py", - "inflect", - "jieba", - "jsonlines", - "kaldiio", - "librosa==0.8.1", - "loguru", - "matplotlib", - "nara_wpe", - "onnxruntime", - "pandas", - "paddleaudio", - "paddlenlp", - "paddlespeech_feat", - "praatio==5.0.0", - "pypinyin", - "pypinyin-dict", - "python-dateutil", - "pyworld", - "resampy==0.2.2", - "sacrebleu", - "scipy", - "sentencepiece~=0.1.96", - "soundfile~=0.10", - "textgrid", - "timer", - "tqdm", - "typeguard", - "visualdl", - "webrtcvad", - "yacs~=0.1.8", - "prettytable", - "zhon", + "editdistance", "g2p_en", "g2pM", "h5py", "inflect", "jieba", "jsonlines", + "kaldiio", "librosa==0.8.1", "loguru", "matplotlib", "nara_wpe", + "onnxruntime", "pandas", "paddlenlp", "paddlespeech_feat", "praatio==5.0.0", + "pypinyin", "pypinyin-dict", "python-dateutil", "pyworld", "resampy==0.2.2", + "sacrebleu", "scipy", "sentencepiece~=0.1.96", "soundfile~=0.10", + "textgrid", "timer", "tqdm", "typeguard", "visualdl", "webrtcvad", + "yacs~=0.1.8", "prettytable", "zhon", 'colorlog', 'pathos == 0.2.8' ] server = [ @@ -177,7 +149,19 @@ class InstallCommand(install): install.run(self) - # cmd: python setup.py upload +class TestCommand(test): + def finalize_options(self): + test.finalize_options(self) + self.test_args = [] + self.test_suite = True + + def run_tests(self): + # Run nose ensuring that argv simulates running nosetests directly + import nose + nose.run_exit(argv=['nosetests', '-w', 'tests']) + + +# cmd: python setup.py upload class UploadCommand(Command): description = "Build and publish the package." user_options = [] @@ -279,11 +263,13 @@ setup_info = dict( "sphinx", "sphinx-rtd-theme", "numpydoc", "myst_parser", "recommonmark>=0.5.0", "sphinx-markdown-tables", "sphinx-autobuild" ], + 'test': ['nose', 'torchaudio==0.10.2'], }, cmdclass={ 'develop': DevelopCommand, 'install': InstallCommand, 'upload': UploadCommand, + 'test': TestCommand, }, # Package info diff --git a/audio/tests/benchmark/README.md b/tests/benchmark/audio/README.md similarity index 97% rename from audio/tests/benchmark/README.md rename to tests/benchmark/audio/README.md index b9034100d4be4468d1508f076fc596b7763ba1dd..9cade74e0bdd8d847ba77849d6e597e259f029a2 100644 --- a/audio/tests/benchmark/README.md +++ b/tests/benchmark/audio/README.md @@ -15,7 +15,6 @@ Result: ========================================================================== test session starts ========================================================================== platform linux -- Python 3.7.7, pytest-7.0.1, pluggy-1.0.0 benchmark: 3.4.1 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000) -rootdir: /ssd3/chenxiaojie06/PaddleSpeech/DeepSpeech/paddleaudio plugins: typeguard-2.12.1, benchmark-3.4.1, anyio-3.5.0 collected 4 items diff --git a/audio/tests/benchmark/log_melspectrogram.py b/tests/benchmark/audio/log_melspectrogram.py similarity index 87% rename from audio/tests/benchmark/log_melspectrogram.py rename to tests/benchmark/audio/log_melspectrogram.py index 9832aed4d1b80a4565efac8a551946feb7a7a117..c85fcecfbadb394807eee4e9fcb2b5b23fcc20ab 100644 --- a/audio/tests/benchmark/log_melspectrogram.py +++ b/tests/benchmark/audio/log_melspectrogram.py @@ -17,15 +17,17 @@ import urllib.request import librosa import numpy as np import paddle -import paddleaudio import torch import torchaudio +import paddlespeech.audio + wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' if not os.path.isfile(os.path.basename(wav_url)): urllib.request.urlretrieve(wav_url, os.path.basename(wav_url)) -waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url))) +waveform, sr = paddlespeech.audio.load( + os.path.abspath(os.path.basename(wav_url))) waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0) waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0) @@ -55,7 +57,7 @@ def enable_gpu_device(): paddle.set_device('gpu') -log_mel_extractor = paddleaudio.features.LogMelSpectrogram( +log_mel_extractor = paddlespeech.audio.features.LogMelSpectrogram( **mel_conf, f_min=0.0, top_db=80.0, dtype=waveform_tensor.dtype) @@ -65,20 +67,20 @@ def log_melspectrogram(): def test_log_melspect_cpu(benchmark): enable_cpu_device() - feature_paddleaudio = benchmark(log_melspectrogram) + feature_audio = benchmark(log_melspectrogram) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_log_melspect_gpu(benchmark): enable_gpu_device() - feature_paddleaudio = benchmark(log_melspectrogram) + feature_audio = benchmark(log_melspectrogram) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=2) + feature_librosa, feature_audio, decimal=2) mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram( @@ -102,11 +104,11 @@ def test_log_melspect_cpu_torchaudio(benchmark): waveform_tensor_torch = waveform_tensor_torch.to('cpu') amplitude_to_DB = amplitude_to_DB.to('cpu') - feature_paddleaudio = benchmark(log_melspectrogram_torchaudio) + feature_audio = benchmark(log_melspectrogram_torchaudio) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_log_melspect_gpu_torchaudio(benchmark): diff --git a/audio/tests/benchmark/melspectrogram.py b/tests/benchmark/audio/melspectrogram.py similarity index 85% rename from audio/tests/benchmark/melspectrogram.py rename to tests/benchmark/audio/melspectrogram.py index 5fe3f2481820810a394350b56bdd3c315e08cb46..498158941d55f5fae9fb952b6cae2c984fe9b574 100644 --- a/audio/tests/benchmark/melspectrogram.py +++ b/tests/benchmark/audio/melspectrogram.py @@ -17,15 +17,17 @@ import urllib.request import librosa import numpy as np import paddle -import paddleaudio import torch import torchaudio +import paddlespeech.audio + wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' if not os.path.isfile(os.path.basename(wav_url)): urllib.request.urlretrieve(wav_url, os.path.basename(wav_url)) -waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url))) +waveform, sr = paddlespeech.audio.load( + os.path.abspath(os.path.basename(wav_url))) waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0) waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0) @@ -55,7 +57,7 @@ def enable_gpu_device(): paddle.set_device('gpu') -mel_extractor = paddleaudio.features.MelSpectrogram( +mel_extractor = paddlespeech.audio.features.MelSpectrogram( **mel_conf, f_min=0.0, dtype=waveform_tensor.dtype) @@ -65,18 +67,18 @@ def melspectrogram(): def test_melspect_cpu(benchmark): enable_cpu_device() - feature_paddleaudio = benchmark(melspectrogram) + feature_audio = benchmark(melspectrogram) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_melspect_gpu(benchmark): enable_gpu_device() - feature_paddleaudio = benchmark(melspectrogram) + feature_audio = benchmark(melspectrogram) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram( @@ -91,10 +93,10 @@ def test_melspect_cpu_torchaudio(benchmark): global waveform_tensor_torch, mel_extractor_torchaudio mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu') waveform_tensor_torch = waveform_tensor_torch.to('cpu') - feature_paddleaudio = benchmark(melspectrogram_torchaudio) + feature_audio = benchmark(melspectrogram_torchaudio) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_melspect_gpu_torchaudio(benchmark): diff --git a/audio/tests/benchmark/mfcc.py b/tests/benchmark/audio/mfcc.py similarity index 87% rename from audio/tests/benchmark/mfcc.py rename to tests/benchmark/audio/mfcc.py index c6a8c85f90905442a8c2ee19ac52b1f0727aa50a..4e286de907aada17f5a0eda033403bfda03f15ad 100644 --- a/audio/tests/benchmark/mfcc.py +++ b/tests/benchmark/audio/mfcc.py @@ -17,15 +17,17 @@ import urllib.request import librosa import numpy as np import paddle -import paddleaudio import torch import torchaudio +import paddlespeech.audio + wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' if not os.path.isfile(os.path.basename(wav_url)): urllib.request.urlretrieve(wav_url, os.path.basename(wav_url)) -waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url))) +waveform, sr = paddlespeech.audio.load( + os.path.abspath(os.path.basename(wav_url))) waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0) waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0) @@ -64,7 +66,7 @@ def enable_gpu_device(): paddle.set_device('gpu') -mfcc_extractor = paddleaudio.features.MFCC( +mfcc_extractor = paddlespeech.audio.features.MFCC( **mfcc_conf, f_min=0.0, dtype=waveform_tensor.dtype) @@ -74,18 +76,18 @@ def mfcc(): def test_mfcc_cpu(benchmark): enable_cpu_device() - feature_paddleaudio = benchmark(mfcc) + feature_audio = benchmark(mfcc) feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_mfcc_gpu(benchmark): enable_gpu_device() - feature_paddleaudio = benchmark(mfcc) + feature_audio = benchmark(mfcc) feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) del mel_conf_torchaudio['sample_rate'] @@ -103,10 +105,10 @@ def test_mfcc_cpu_torchaudio(benchmark): mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cpu') waveform_tensor_torch = waveform_tensor_torch.to('cpu') - feature_paddleaudio = benchmark(mfcc_torchaudio) + feature_audio = benchmark(mfcc_torchaudio) feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_mfcc_gpu_torchaudio(benchmark): diff --git a/audio/tests/backends/__init__.py b/tests/unit/audio/backends/__init__.py similarity index 100% rename from audio/tests/backends/__init__.py rename to tests/unit/audio/backends/__init__.py diff --git a/audio/tests/backends/base.py b/tests/unit/audio/backends/base.py similarity index 100% rename from audio/tests/backends/base.py rename to tests/unit/audio/backends/base.py diff --git a/audio/tests/backends/soundfile/__init__.py b/tests/unit/audio/backends/soundfile/__init__.py similarity index 100% rename from audio/tests/backends/soundfile/__init__.py rename to tests/unit/audio/backends/soundfile/__init__.py diff --git a/audio/tests/backends/soundfile/test_io.py b/tests/unit/audio/backends/soundfile/test_io.py similarity index 90% rename from audio/tests/backends/soundfile/test_io.py rename to tests/unit/audio/backends/soundfile/test_io.py index 9d092902da49e4651574201fa6d050d2a12b9c92..26276751f7d89579bed8d958699ab0ac348f0414 100644 --- a/audio/tests/backends/soundfile/test_io.py +++ b/tests/unit/audio/backends/soundfile/test_io.py @@ -16,16 +16,16 @@ import os import unittest import numpy as np -import paddleaudio import soundfile as sf +import paddlespeech.audio from ..base import BackendTest class TestIO(BackendTest): def test_load_mono_channel(self): sf_data, sf_sr = sf.read(self.files[0]) - pa_data, pa_sr = paddleaudio.load( + pa_data, pa_sr = paddlespeech.audio.load( self.files[0], normal=False, dtype='float64') self.assertEqual(sf_data.dtype, pa_data.dtype) @@ -35,7 +35,7 @@ class TestIO(BackendTest): def test_load_multi_channels(self): sf_data, sf_sr = sf.read(self.files[1]) sf_data = sf_data.T # Channel dim first - pa_data, pa_sr = paddleaudio.load( + pa_data, pa_sr = paddlespeech.audio.load( self.files[1], mono=False, normal=False, dtype='float64') self.assertEqual(sf_data.dtype, pa_data.dtype) @@ -49,7 +49,7 @@ class TestIO(BackendTest): pa_tmp_file = 'pa_tmp.wav' sf.write(sf_tmp_file, waveform, sr) - paddleaudio.save(waveform, sr, pa_tmp_file) + paddlespeech.audio.save(waveform, sr, pa_tmp_file) self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file)) for file in [sf_tmp_file, pa_tmp_file]: @@ -62,7 +62,7 @@ class TestIO(BackendTest): pa_tmp_file = 'pa_tmp.wav' sf.write(sf_tmp_file, waveform.T, sr) - paddleaudio.save(waveform.T, sr, pa_tmp_file) + paddlespeech.audio.save(waveform.T, sr, pa_tmp_file) self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file)) for file in [sf_tmp_file, pa_tmp_file]: diff --git a/audio/tests/features/__init__.py b/tests/unit/audio/features/__init__.py similarity index 100% rename from audio/tests/features/__init__.py rename to tests/unit/audio/features/__init__.py diff --git a/audio/tests/features/base.py b/tests/unit/audio/features/base.py similarity index 97% rename from audio/tests/features/base.py rename to tests/unit/audio/features/base.py index 476f6b8eeb7f14247fa00fd0943741c2eca53e66..6d59f72b5c0206bdc2f3ea88bc15996cb58c3972 100644 --- a/audio/tests/features/base.py +++ b/tests/unit/audio/features/base.py @@ -17,7 +17,8 @@ import urllib.request import numpy as np import paddle -from paddleaudio import load + +from paddlespeech.audio import load wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' diff --git a/audio/tests/features/test_istft.py b/tests/unit/audio/features/test_istft.py similarity index 96% rename from audio/tests/features/test_istft.py rename to tests/unit/audio/features/test_istft.py index 9cf8cdd65582c0300d59749db621155eebd3faee..f1e6e4e339a168906ba19b339b649074dfdf9d5f 100644 --- a/audio/tests/features/test_istft.py +++ b/tests/unit/audio/features/test_istft.py @@ -15,9 +15,9 @@ import unittest import numpy as np import paddle -from paddleaudio.functional.window import get_window from .base import FeatTest +from paddlespeech.audio.functional.window import get_window from paddlespeech.s2t.transform.spectrogram import IStft from paddlespeech.s2t.transform.spectrogram import Stft diff --git a/audio/tests/features/test_kaldi.py b/tests/unit/audio/features/test_kaldi.py similarity index 87% rename from audio/tests/features/test_kaldi.py rename to tests/unit/audio/features/test_kaldi.py index 00a576f6f48ee71405f5942ff961ae8f6e8edf55..2b0ece890b206a7e42525154a3eb5f4ffc02a472 100644 --- a/audio/tests/features/test_kaldi.py +++ b/tests/unit/audio/features/test_kaldi.py @@ -15,10 +15,10 @@ import unittest import numpy as np import paddle -import paddleaudio import torch import torchaudio +import paddlespeech.audio from .base import FeatTest @@ -40,17 +40,17 @@ class TestKaldi(FeatTest): self.window_size, periodic=False, dtype=eval(f'torch.{self.dtype}')).pow(0.85) - p_hann_window = paddleaudio.functional.window.get_window( + p_hann_window = paddlespeech.audio.functional.window.get_window( 'hann', self.window_size, fftbins=False, dtype=eval(f'paddle.{self.dtype}')) - p_hamm_window = paddleaudio.functional.window.get_window( + p_hamm_window = paddlespeech.audio.functional.window.get_window( 'hamming', self.window_size, fftbins=False, dtype=eval(f'paddle.{self.dtype}')) - p_povey_window = paddleaudio.functional.window.get_window( + p_povey_window = paddlespeech.audio.functional.window.get_window( 'hann', self.window_size, fftbins=False, @@ -63,7 +63,7 @@ class TestKaldi(FeatTest): def test_fbank(self): ta_features = torchaudio.compliance.kaldi.fbank( torch.from_numpy(self.waveform.astype(self.dtype))) - pa_features = paddleaudio.compliance.kaldi.fbank( + pa_features = paddlespeech.audio.compliance.kaldi.fbank( paddle.to_tensor(self.waveform.astype(self.dtype))) np.testing.assert_array_almost_equal( ta_features, pa_features, decimal=4) @@ -71,7 +71,7 @@ class TestKaldi(FeatTest): def test_mfcc(self): ta_features = torchaudio.compliance.kaldi.mfcc( torch.from_numpy(self.waveform.astype(self.dtype))) - pa_features = paddleaudio.compliance.kaldi.mfcc( + pa_features = paddlespeech.audio.compliance.kaldi.mfcc( paddle.to_tensor(self.waveform.astype(self.dtype))) np.testing.assert_array_almost_equal( ta_features, pa_features, decimal=4) diff --git a/audio/tests/features/test_librosa.py b/tests/unit/audio/features/test_librosa.py similarity index 89% rename from audio/tests/features/test_librosa.py rename to tests/unit/audio/features/test_librosa.py index a1d3e8400dbc62924b68a1519605231d5da70bd8..ffdec3e788860320faf0ce28d0afb05ddc0238ba 100644 --- a/audio/tests/features/test_librosa.py +++ b/tests/unit/audio/features/test_librosa.py @@ -16,10 +16,10 @@ import unittest import librosa import numpy as np import paddle -import paddleaudio -from paddleaudio.functional.window import get_window +import paddlespeech.audio from .base import FeatTest +from paddlespeech.audio.functional.window import get_window class TestLibrosa(FeatTest): @@ -117,7 +117,7 @@ class TestLibrosa(FeatTest): htk=False, norm='slaney', dtype=self.waveform.dtype, ) - feature_compliance = paddleaudio.compliance.librosa.compute_fbank_matrix( + feature_compliance = paddlespeech.audio.compliance.librosa.compute_fbank_matrix( sr=self.sr, n_fft=self.n_fft, n_mels=self.n_mels, @@ -127,7 +127,7 @@ class TestLibrosa(FeatTest): norm='slaney', dtype=self.waveform.dtype, ) x = paddle.to_tensor(self.waveform) - feature_functional = paddleaudio.functional.compute_fbank_matrix( + feature_functional = paddlespeech.audio.functional.compute_fbank_matrix( sr=self.sr, n_fft=self.n_fft, n_mels=self.n_mels, @@ -156,8 +156,8 @@ class TestLibrosa(FeatTest): n_mels=self.n_mels, fmin=self.fmin) - # paddleaudio.compliance.librosa: - feature_compliance = paddleaudio.compliance.librosa.melspectrogram( + # paddlespeech.audio.compliance.librosa: + feature_compliance = paddlespeech.audio.compliance.librosa.melspectrogram( x=self.waveform, sr=self.sr, window_size=self.n_fft, @@ -166,10 +166,10 @@ class TestLibrosa(FeatTest): fmin=self.fmin, to_db=False) - # paddleaudio.features.layer + # paddlespeech.audio.features.layer x = paddle.to_tensor( self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim. - feature_extractor = paddleaudio.features.MelSpectrogram( + feature_extractor = paddlespeech.audio.features.MelSpectrogram( sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, @@ -198,8 +198,8 @@ class TestLibrosa(FeatTest): fmin=self.fmin) feature_librosa = librosa.power_to_db(feature_librosa, top_db=None) - # paddleaudio.compliance.librosa: - feature_compliance = paddleaudio.compliance.librosa.melspectrogram( + # paddlespeech.audio.compliance.librosa: + feature_compliance = paddlespeech.audio.compliance.librosa.melspectrogram( x=self.waveform, sr=self.sr, window_size=self.n_fft, @@ -207,10 +207,10 @@ class TestLibrosa(FeatTest): n_mels=self.n_mels, fmin=self.fmin) - # paddleaudio.features.layer + # paddlespeech.audio.features.layer x = paddle.to_tensor( self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim. - feature_extractor = paddleaudio.features.LogMelSpectrogram( + feature_extractor = paddlespeech.audio.features.LogMelSpectrogram( sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, @@ -243,8 +243,8 @@ class TestLibrosa(FeatTest): n_mels=self.n_mels, fmin=self.fmin) - # paddleaudio.compliance.librosa: - feature_compliance = paddleaudio.compliance.librosa.mfcc( + # paddlespeech.audio.compliance.librosa: + feature_compliance = paddlespeech.audio.compliance.librosa.mfcc( x=self.waveform, sr=self.sr, n_mfcc=self.n_mfcc, @@ -257,10 +257,10 @@ class TestLibrosa(FeatTest): fmin=self.fmin, top_db=self.top_db) - # paddleaudio.features.layer + # paddlespeech.audio.features.layer x = paddle.to_tensor( self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim. - feature_extractor = paddleaudio.features.MFCC( + feature_extractor = paddlespeech.audio.features.MFCC( sr=self.sr, n_mfcc=self.n_mfcc, n_fft=self.n_fft, diff --git a/audio/tests/features/test_log_melspectrogram.py b/tests/unit/audio/features/test_log_melspectrogram.py similarity index 90% rename from audio/tests/features/test_log_melspectrogram.py rename to tests/unit/audio/features/test_log_melspectrogram.py index 0383c2b8b200a261cbb3e9a8a354f432e28e10a2..59eb73e8ca3ae1671f36b482cffa98032ba7d9d1 100644 --- a/audio/tests/features/test_log_melspectrogram.py +++ b/tests/unit/audio/features/test_log_melspectrogram.py @@ -15,8 +15,8 @@ import unittest import numpy as np import paddle -import paddleaudio +import paddlespeech.audio from .base import FeatTest from paddlespeech.s2t.transform.spectrogram import LogMelSpectrogram @@ -33,8 +33,7 @@ class TestLogMelSpectrogram(FeatTest): ps_res = ps_melspect(self.waveform.T).squeeze(1).T x = paddle.to_tensor(self.waveform) - # paddlespeech.s2t的特征存在幅度谱和功率谱滥用的情况 - ps_melspect = paddleaudio.features.LogMelSpectrogram( + ps_melspect = paddlespeech.audio.features.LogMelSpectrogram( self.sr, self.n_fft, self.hop_length, diff --git a/audio/tests/features/test_spectrogram.py b/tests/unit/audio/features/test_spectrogram.py similarity index 93% rename from audio/tests/features/test_spectrogram.py rename to tests/unit/audio/features/test_spectrogram.py index 1774fe61975c4b4ae11b7ff2c9200a4d67499efe..7d908a7ef5b8a12a950161a5a8ea2997f7ef4275 100644 --- a/audio/tests/features/test_spectrogram.py +++ b/tests/unit/audio/features/test_spectrogram.py @@ -15,8 +15,8 @@ import unittest import numpy as np import paddle -import paddleaudio +import paddlespeech.audio from .base import FeatTest from paddlespeech.s2t.transform.spectrogram import Spectrogram @@ -31,7 +31,7 @@ class TestSpectrogram(FeatTest): ps_res = ps_spect(self.waveform.T).squeeze(1).T # Magnitude x = paddle.to_tensor(self.waveform) - pa_spect = paddleaudio.features.Spectrogram( + pa_spect = paddlespeech.audio.features.Spectrogram( self.n_fft, self.hop_length, power=1.0) pa_res = pa_spect(x).squeeze(0).numpy() diff --git a/audio/tests/features/test_stft.py b/tests/unit/audio/features/test_stft.py similarity index 95% rename from audio/tests/features/test_stft.py rename to tests/unit/audio/features/test_stft.py index 58792ffe2477058958a4e31ed122263306e83388..03448ca806abf5a83659f601f46ba91ece862759 100644 --- a/audio/tests/features/test_stft.py +++ b/tests/unit/audio/features/test_stft.py @@ -15,9 +15,9 @@ import unittest import numpy as np import paddle -from paddleaudio.functional.window import get_window from .base import FeatTest +from paddlespeech.audio.functional.window import get_window from paddlespeech.s2t.transform.spectrogram import Stft