diff --git a/examples/esc50/cls0/local/infer.sh b/examples/esc50/cls0/local/infer.sh new file mode 100755 index 0000000000000000000000000000000000000000..57fc157a45222a8a9350ab098ec7ed9b5efe2c63 --- /dev/null +++ b/examples/esc50/cls0/local/infer.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +device=$1 +audio_file=$2 +ckpt_dir=$3 +feat_backend=$4 + +python3 ${BIN_DIR}/predict.py \ +--device ${device} \ +--wav ${audio_file} \ +--feat_backend ${feat_backend} \ +--top_k 10 \ +--checkpoint ${ckpt_dir}/model.pdparams \ No newline at end of file diff --git a/examples/esc50/cls0/local/train.sh b/examples/esc50/cls0/local/train.sh new file mode 100755 index 0000000000000000000000000000000000000000..194904723a0a14e8b4218ef43ed18ec8a11f6eb5 --- /dev/null +++ b/examples/esc50/cls0/local/train.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +ngpu=$1 +device=$2 +feat_backend=$3 + +num_epochs=50 +batch_size=16 +ckpt_dir=./checkpoint +save_freq=10 + +if [ ${ngpu} -gt 1 ]; then + python3 -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES ${BIN_DIR}/train.py \ + --epochs ${num_epochs} \ + --feat_backend ${feat_backend} \ + --batch_size ${batch_size} \ + --checkpoint_dir ${ckpt_dir} \ + --save_freq ${save_freq} +else + python3 ${BIN_DIR}/train.py \ + --device ${device} \ + --epochs ${num_epochs} \ + --feat_backend ${feat_backend} \ + --batch_size ${batch_size} \ + --checkpoint_dir ${ckpt_dir} \ + --save_freq ${save_freq} +fi diff --git a/examples/esc50/cls0/path.sh b/examples/esc50/cls0/path.sh index 38a242a4ab3dd01e29873e8f827f9bdb4656fb57..2cc73e27ae5b77947547ade1c700e8591e665975 100644 --- a/examples/esc50/cls0/path.sh +++ b/examples/esc50/cls0/path.sh @@ -1,3 +1,4 @@ +#!/bin/bash export MAIN_ROOT=`realpath ${PWD}/../../../` export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH} @@ -8,4 +9,5 @@ export PYTHONDONTWRITEBYTECODE=1 export PYTHONIOENCODING=UTF-8 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/ +MODEL=PANNs +export BIN_DIR=${MAIN_ROOT}/paddlespeech/cls/exps/${MODEL} \ No newline at end of file diff --git a/examples/esc50/cls0/run.sh b/examples/esc50/cls0/run.sh index 6d3a09c6d78c07b65c296f76db3d58f2d6c85c99..e75ad51776e44bbeacf4c4c65203aa084198c2d0 100755 --- a/examples/esc50/cls0/run.sh +++ b/examples/esc50/cls0/run.sh @@ -11,41 +11,17 @@ fi stage=$1 stop_stage=100 - -num_epochs=50 -batch_size=16 -ckpt_dir=./checkpoint -save_freq=10 feat_backend=numpy if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then - if [ ${ngpu} -gt 1 ]; then - python -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES local/train.py \ - --epochs ${num_epochs} \ - --feat_backend ${feat_backend} \ - --batch_size ${batch_size} \ - --checkpoint_dir ${ckpt_dir} \ - --save_freq ${save_freq} - else - python local/train.py \ - --device ${device} \ - --epochs ${num_epochs} \ - --feat_backend ${feat_backend} \ - --batch_size ${batch_size} \ - --checkpoint_dir ${ckpt_dir} \ - --save_freq ${save_freq} - fi + ./local/train.sh ${ngpu} ${device} ${feat_backend} || exit -1 fi audio_file=~/cat.wav -ckpt=./checkpoint/epoch_50/model.pdparams +ckpt_dir=./checkpoint/epoch_50 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then - python local/predict.py \ - --device ${device} \ - --wav ${audio_file} \ - --feat_backend ${feat_backend} \ - --top_k 10 \ - --checkpoint ${ckpt} + ./local/infer.sh ${device} ${audio_file} ${ckpt_dir} ${feat_backend} || exit -1 fi + exit 0 \ No newline at end of file diff --git a/paddleaudio/__init__.py b/paddleaudio/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2685cf57c600b5931f17f5257a9443796662b916 --- /dev/null +++ b/paddleaudio/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .backends import * +from .features import * diff --git a/paddlespeech/cls/backends/__init__.py b/paddleaudio/backends/__init__.py similarity index 100% rename from paddlespeech/cls/backends/__init__.py rename to paddleaudio/backends/__init__.py diff --git a/paddlespeech/cls/backends/audio.py b/paddleaudio/backends/audio.py similarity index 100% rename from paddlespeech/cls/backends/audio.py rename to paddleaudio/backends/audio.py diff --git a/paddlespeech/cls/datasets/__init__.py b/paddleaudio/datasets/__init__.py similarity index 100% rename from paddlespeech/cls/datasets/__init__.py rename to paddleaudio/datasets/__init__.py diff --git a/paddlespeech/cls/datasets/dataset.py b/paddleaudio/datasets/dataset.py similarity index 100% rename from paddlespeech/cls/datasets/dataset.py rename to paddleaudio/datasets/dataset.py diff --git a/paddlespeech/cls/datasets/esc50.py b/paddleaudio/datasets/esc50.py similarity index 100% rename from paddlespeech/cls/datasets/esc50.py rename to paddleaudio/datasets/esc50.py diff --git a/paddlespeech/cls/datasets/gtzan.py b/paddleaudio/datasets/gtzan.py similarity index 100% rename from paddlespeech/cls/datasets/gtzan.py rename to paddleaudio/datasets/gtzan.py diff --git a/paddlespeech/cls/datasets/tess.py b/paddleaudio/datasets/tess.py similarity index 100% rename from paddlespeech/cls/datasets/tess.py rename to paddleaudio/datasets/tess.py diff --git a/paddlespeech/cls/datasets/urban_sound.py b/paddleaudio/datasets/urban_sound.py similarity index 100% rename from paddlespeech/cls/datasets/urban_sound.py rename to paddleaudio/datasets/urban_sound.py diff --git a/paddlespeech/cls/features/__init__.py b/paddleaudio/features/__init__.py similarity index 100% rename from paddlespeech/cls/features/__init__.py rename to paddleaudio/features/__init__.py diff --git a/paddlespeech/cls/features/augment.py b/paddleaudio/features/augment.py similarity index 100% rename from paddlespeech/cls/features/augment.py rename to paddleaudio/features/augment.py diff --git a/paddlespeech/cls/features/core.py b/paddleaudio/features/core.py similarity index 100% rename from paddlespeech/cls/features/core.py rename to paddleaudio/features/core.py diff --git a/paddlespeech/cls/features/spectrum.py b/paddleaudio/features/spectrum.py similarity index 100% rename from paddlespeech/cls/features/spectrum.py rename to paddleaudio/features/spectrum.py diff --git a/paddlespeech/cls/features/window.py b/paddleaudio/features/window.py similarity index 100% rename from paddlespeech/cls/features/window.py rename to paddleaudio/features/window.py diff --git a/paddlespeech/cls/utils/__init__.py b/paddleaudio/utils/__init__.py similarity index 100% rename from paddlespeech/cls/utils/__init__.py rename to paddleaudio/utils/__init__.py diff --git a/paddlespeech/cls/utils/download.py b/paddleaudio/utils/download.py similarity index 65% rename from paddlespeech/cls/utils/download.py rename to paddleaudio/utils/download.py index 0a36f29b97f024afea5347773de0db576e8ed875..45a8e57ba0ad31f0921fd03a5a2156f02d34a1cb 100644 --- a/paddlespeech/cls/utils/download.py +++ b/paddleaudio/utils/download.py @@ -17,7 +17,6 @@ from typing import List from paddle.framework import load as load_state_dict from paddle.utils import download -from pathos.multiprocessing import ProcessPool from .log import logger @@ -32,27 +31,18 @@ def decompress(file: str): download._decompress(file) -def download_and_decompress(archives: List[Dict[str, str]], - path: str, - n_workers: int=0): +def download_and_decompress(archives: List[Dict[str, str]], path: str): """ Download archieves and decompress to specific path. """ if not os.path.isdir(path): os.makedirs(path) - if n_workers <= 0: - for archive in archives: - assert 'url' in archive and 'md5' in archive, \ - 'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archieve.keys())}' + for archive in archives: + assert 'url' in archive and 'md5' in archive, \ + 'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archieve.keys())}' - download.get_path_from_url(archive['url'], path, archive['md5']) - else: - pool = ProcessPool(nodes=n_workers) - pool.imap(download.get_path_from_url, [_['url'] for _ in archives], - [path] * len(archives), [_['md5'] for _ in archives]) - pool.close() - pool.join() + download.get_path_from_url(archive['url'], path, archive['md5']) def load_state_dict_from_url(url: str, path: str, md5: str=None): diff --git a/paddlespeech/cls/utils/env.py b/paddleaudio/utils/env.py similarity index 64% rename from paddlespeech/cls/utils/env.py rename to paddleaudio/utils/env.py index c455af0008b90a8ed95ce72ac9b6cbc90cf37900..59c6b62197b82c5783afd864d7c8c8fc6c43b4a6 100644 --- a/paddlespeech/cls/utils/env.py +++ b/paddleaudio/utils/env.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. ''' -This module is used to store environmental variables in PaddleSpeech. -PPSPEECH_HOME --> the root directory for storing PaddleSpeech related data. Default to ~/.paddlespeech. Users can change the -├ default value through the PPSPEECH_HOME environment variable. +This module is used to store environmental variables in PaddleAudio. +PPAUDIO_HOME --> the root directory for storing PaddleAudio related data. Default to ~/.paddleaudio. Users can change the +├ default value through the PPAUDIO_HOME environment variable. ├─ MODEL_HOME --> Store model files. └─ DATA_HOME --> Store automatically downloaded datasets. ''' @@ -25,29 +25,29 @@ def _get_user_home(): return os.path.expanduser('~') -def _get_package_home(): - if 'PPSPEECH_HOME' in os.environ: - home_path = os.environ['PPSPEECH_HOME'] +def _get_ppaudio_home(): + if 'PPAUDIO_HOME' in os.environ: + home_path = os.environ['PPAUDIO_HOME'] if os.path.exists(home_path): if os.path.isdir(home_path): return home_path else: raise RuntimeError( - 'The environment variable PPSPEECH_HOME {} is not a directory.'. + 'The environment variable PPAUDIO_HOME {} is not a directory.'. format(home_path)) else: return home_path - return os.path.join(_get_user_home(), '.paddlespeech') + return os.path.join(_get_user_home(), '.paddleaudio') def _get_sub_home(directory): - home = os.path.join(_get_package_home(), directory) + home = os.path.join(_get_ppaudio_home(), directory) if not os.path.exists(home): os.makedirs(home) return home USER_HOME = _get_user_home() -PPSPEECH_HOME = _get_package_home() -MODEL_HOME = _get_sub_home('pretrained_models') +PPAUDIO_HOME = _get_ppaudio_home() +MODEL_HOME = _get_sub_home('models') DATA_HOME = _get_sub_home('datasets') diff --git a/paddlespeech/cls/utils/error.py b/paddleaudio/utils/error.py similarity index 100% rename from paddlespeech/cls/utils/error.py rename to paddleaudio/utils/error.py diff --git a/paddlespeech/cls/utils/log.py b/paddleaudio/utils/log.py similarity index 96% rename from paddlespeech/cls/utils/log.py rename to paddleaudio/utils/log.py index f4146c4f594ad6981b5913cdcb508189ad7684f5..5e7db68a937b3e0bd97f4bfe151225821b4598e2 100644 --- a/paddlespeech/cls/utils/log.py +++ b/paddleaudio/utils/log.py @@ -55,13 +55,13 @@ log_config = { class Logger(object): ''' - Deafult logger in PaddleSpeech + Deafult logger in PaddleAudio Args: - name(str) : Logger name, default is 'PaddleSpeech' + name(str) : Logger name, default is 'PaddleAudio' ''' def __init__(self, name: str=None): - name = 'PaddleSpeech' if not name else name + name = 'PaddleAudio' if not name else name self.logger = logging.getLogger(name) for key, conf in log_config.items(): diff --git a/paddlespeech/cls/utils/time.py b/paddleaudio/utils/time.py similarity index 100% rename from paddlespeech/cls/utils/time.py rename to paddleaudio/utils/time.py diff --git a/paddlespeech/cls/__init__.py b/paddlespeech/cls/__init__.py index 2685cf57c600b5931f17f5257a9443796662b916..185a92b8d94d3426d616c0624f0f2ee04339349e 100644 --- a/paddlespeech/cls/__init__.py +++ b/paddlespeech/cls/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .backends import * -from .features import * diff --git a/paddlespeech/cls/models/__init__.py b/paddlespeech/cls/exps/PANNs/__init__.py similarity index 100% rename from paddlespeech/cls/models/__init__.py rename to paddlespeech/cls/exps/PANNs/__init__.py diff --git a/examples/esc50/cls0/local/deploy/python/predict.py b/paddlespeech/cls/exps/PANNs/deploy/python/predict.py similarity index 100% rename from examples/esc50/cls0/local/deploy/python/predict.py rename to paddlespeech/cls/exps/PANNs/deploy/python/predict.py diff --git a/examples/esc50/cls0/local/export_model.py b/paddlespeech/cls/exps/PANNs/export_model.py similarity index 92% rename from examples/esc50/cls0/local/export_model.py rename to paddlespeech/cls/exps/PANNs/export_model.py index 87dd527c315f1d09421ab28aca9a4fdec7dfe727..4dac52376456b078dbbbb2a9cc816ecfbdf4afe7 100644 --- a/examples/esc50/cls0/local/export_model.py +++ b/paddlespeech/cls/exps/PANNs/export_model.py @@ -15,10 +15,10 @@ import argparse import os import paddle -from model import SoundClassifier -from paddlespeech.cls.datasets import ESC50 -from paddlespeech.cls.models.panns import cnn14 +from .model import SoundClassifier +from .panns import cnn14 +from paddleaudio.datasets import ESC50 # yapf: disable parser = argparse.ArgumentParser(__doc__) diff --git a/examples/esc50/cls0/local/model.py b/paddlespeech/cls/exps/PANNs/model.py similarity index 100% rename from examples/esc50/cls0/local/model.py rename to paddlespeech/cls/exps/PANNs/model.py diff --git a/paddlespeech/cls/models/panns.py b/paddlespeech/cls/exps/PANNs/panns.py similarity index 99% rename from paddlespeech/cls/models/panns.py rename to paddlespeech/cls/exps/PANNs/panns.py index 1c68f06f634747361d0929190afdc465ac75bbb1..6d2dac56ac23d9b3322e49703f98e15faf936fd0 100644 --- a/paddlespeech/cls/models/panns.py +++ b/paddlespeech/cls/exps/PANNs/panns.py @@ -16,8 +16,8 @@ import os import paddle.nn as nn import paddle.nn.functional as F -from ..utils.download import load_state_dict_from_url -from ..utils.env import MODEL_HOME +from paddleaudio.utils.download import load_state_dict_from_url +from paddleaudio.utils.env import MODEL_HOME __all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6'] diff --git a/examples/esc50/cls0/local/predict.py b/paddlespeech/cls/exps/PANNs/predict.py similarity index 90% rename from examples/esc50/cls0/local/predict.py rename to paddlespeech/cls/exps/PANNs/predict.py index a6e38a35f0548ad8e0e27758c703b0de23a13118..2d97ab1b9b8bd2dd005fb352a949373e99ad61e5 100644 --- a/examples/esc50/cls0/local/predict.py +++ b/paddlespeech/cls/exps/PANNs/predict.py @@ -17,12 +17,12 @@ import numpy as np import paddle import paddle.nn.functional as F from model import SoundClassifier +from panns import cnn14 -from paddlespeech.cls.backends import load as load_audio -from paddlespeech.cls.datasets import ESC50 -from paddlespeech.cls.features import LogMelSpectrogram -from paddlespeech.cls.features import melspectrogram -from paddlespeech.cls.models.panns import cnn14 +from paddleaudio.backends import load as load_audio +from paddleaudio.datasets import ESC50 +from paddleaudio.features import LogMelSpectrogram +from paddleaudio.features import melspectrogram # yapf: disable parser = argparse.ArgumentParser(__doc__) diff --git a/examples/esc50/cls0/local/train.py b/paddlespeech/cls/exps/PANNs/train.py similarity index 96% rename from examples/esc50/cls0/local/train.py rename to paddlespeech/cls/exps/PANNs/train.py index 7a0301878ada4ba9ecd376c19d673ce5d544daad..a3fb01ef1fc427e9c872fec4863f8a6a4efc9f71 100644 --- a/examples/esc50/cls0/local/train.py +++ b/paddlespeech/cls/exps/PANNs/train.py @@ -16,12 +16,12 @@ import os import paddle from model import SoundClassifier +from panns import cnn14 -from paddlespeech.cls.datasets import ESC50 -from paddlespeech.cls.features import LogMelSpectrogram -from paddlespeech.cls.models.panns import cnn14 -from paddlespeech.cls.utils import logger -from paddlespeech.cls.utils import Timer +from paddleaudio.datasets import ESC50 +from paddleaudio.features import LogMelSpectrogram +from paddleaudio.utils import logger +from paddleaudio.utils import Timer # yapf: disable parser = argparse.ArgumentParser(__doc__) diff --git a/paddlespeech/cls/exps/__init__.py b/paddlespeech/cls/exps/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..185a92b8d94d3426d616c0624f0f2ee04339349e --- /dev/null +++ b/paddlespeech/cls/exps/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/requirements.txt b/requirements.txt index c6cb556c1fff0d3dbd57559af50b40613894b6dd..4456ccc29537767ea3bc0fc31f51538e96595842 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -colorlog ConfigArgParse coverage distro @@ -19,10 +18,10 @@ matplotlib nara_wpe nltk numba +paddleaudio paddlespeech_ctcdecoders paddlespeech_feat pandas -pathos phkit Pillow praatio~=4.1 diff --git a/setup.py b/setup.py index d07db7881d72371430e959eaa2237e4ba9d2d288..310eed1e75b1b6e489df467707dd60c959a1c4e1 100644 --- a/setup.py +++ b/setup.py @@ -173,7 +173,7 @@ setup_info = dict( # Package info packages=find_packages(exclude=('utils', 'tests', 'tests.*', 'examples*', - 'third_party*', 'tools*')), + 'paddleaudio*', 'third_party*', 'tools*')), zip_safe=True, classifiers=[ 'Development Status :: 3 - Alpha', diff --git a/setup_audio.py b/setup_audio.py new file mode 100644 index 0000000000000000000000000000000000000000..24c9bb9b98691bd7c9d8f0efe7e2cabdd4664399 --- /dev/null +++ b/setup_audio.py @@ -0,0 +1,41 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import setuptools + +# set the version here +version = '0.1.0a' + +setuptools.setup( + name="paddleaudio", + version=version, + author="", + author_email="", + description="PaddleAudio, in development", + long_description="", + long_description_content_type="text/markdown", + url="", + packages=setuptools.find_packages(include=['paddleaudio*']), + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires='>=3.6', + install_requires=[ + 'numpy >= 1.15.0', + 'scipy >= 1.0.0', + 'resampy >= 0.2.2', + 'soundfile >= 0.9.0', + 'colorlog', + ], )