Add paddlespeech.cls and esc50 example.

2c531d78 · KP · b12ae34e · 2c531d78 · 2c531d78 · 2c531d78
36 changed file
--- a/examples/esc50/cls0/local/infer.sh
+++ b/examples/esc50/cls0/local/infer.sh
+#!/bin/bash
+device=$1
+audio_file=$2
+ckpt_dir=$3
+feat_backend=$4
+python3 ${BIN_DIR}/predict.py \
+--device ${device} \
+--wav ${audio_file} \
+--feat_backend ${feat_backend} \
+--top_k 10 \
+--checkpoint ${ckpt_dir}/model.pdparams
\ No newline at end of file
--- a/examples/esc50/cls0/local/train.sh
+++ b/examples/esc50/cls0/local/train.sh
+#!/bin/bash
+ngpu=$1
+device=$2
+feat_backend=$3
+num_epochs=50
+batch_size=16
+ckpt_dir=./checkpoint
+save_freq=10
+if [ ${ngpu} -gt 1 ]; then
+    python3 -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES ${BIN_DIR}/train.py \
+    --epochs ${num_epochs} \
+    --feat_backend ${feat_backend} \
+    --batch_size ${batch_size} \
+    --checkpoint_dir ${ckpt_dir} \
+    --save_freq ${save_freq}
+else
+    python3 ${BIN_DIR}/train.py \
+    --device ${device} \
+    --epochs ${num_epochs} \
+    --feat_backend ${feat_backend} \
+    --batch_size ${batch_size} \
+    --checkpoint_dir ${ckpt_dir} \
+    --save_freq ${save_freq}
+fi
--- a/examples/esc50/cls0/path.sh
+++ b/examples/esc50/cls0/path.sh
+#!/bin/bash
 export MAIN_ROOT=`realpath ${PWD}/../../../`
 export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
@@ -8,4 +9,5 @@ export PYTHONDONTWRITEBYTECODE=1
 export PYTHONIOENCODING=UTF-8
 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/
+MODEL=PANNs
+export BIN_DIR=${MAIN_ROOT}/paddlespeech/cls/exps/${MODEL}
\ No newline at end of file
--- a/examples/esc50/cls0/run.sh
+++ b/examples/esc50/cls0/run.sh
@@ -11,41 +11,17 @@ fi
 stage=$1
 stop_stage=100
-num_epochs=50
-batch_size=16
-ckpt_dir=./checkpoint
-save_freq=10
 feat_backend=numpy
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    if [ ${ngpu} -gt 1 ]; then
+    ./local/train.sh ${ngpu} ${device} ${feat_backend} || exit -1
-        python -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES local/train.py \
-        --epochs ${num_epochs} \
-        --feat_backend ${feat_backend} \
-        --batch_size ${batch_size} \
-        --checkpoint_dir ${ckpt_dir} \
-        --save_freq ${save_freq}
-    else
-        python local/train.py \
-        --device ${device} \
-        --epochs ${num_epochs} \
-        --feat_backend ${feat_backend} \
-        --batch_size ${batch_size} \
-        --checkpoint_dir ${ckpt_dir} \
-        --save_freq ${save_freq}
-    fi
 fi
 audio_file=~/cat.wav
-ckpt=./checkpoint/epoch_50/model.pdparams
+ckpt_dir=./checkpoint/epoch_50
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    python local/predict.py \
+    ./local/infer.sh ${device} ${audio_file} ${ckpt_dir} ${feat_backend} || exit -1
-    --device ${device} \
-    --wav ${audio_file} \
-    --feat_backend ${feat_backend} \
-    --top_k 10 \
-    --checkpoint ${ckpt}
 fi
 exit 0
\ No newline at end of file
--- a/paddleaudio/__init__.py
+++ b/paddleaudio/__init__.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .backends import *
+from .features import *
--- a/paddlespeech/cls/backends/__init__.py
+++ b/paddlespeech/cls/backends/__init__.py
--- a/paddlespeech/cls/backends/audio.py
+++ b/paddlespeech/cls/backends/audio.py
--- a/paddlespeech/cls/datasets/__init__.py
+++ b/paddlespeech/cls/datasets/__init__.py
--- a/paddlespeech/cls/datasets/dataset.py
+++ b/paddlespeech/cls/datasets/dataset.py
--- a/paddlespeech/cls/datasets/esc50.py
+++ b/paddlespeech/cls/datasets/esc50.py
--- a/paddlespeech/cls/datasets/gtzan.py
+++ b/paddlespeech/cls/datasets/gtzan.py
--- a/paddlespeech/cls/datasets/tess.py
+++ b/paddlespeech/cls/datasets/tess.py
--- a/paddlespeech/cls/datasets/urban_sound.py
+++ b/paddlespeech/cls/datasets/urban_sound.py
--- a/paddlespeech/cls/features/__init__.py
+++ b/paddlespeech/cls/features/__init__.py
--- a/paddlespeech/cls/features/augment.py
+++ b/paddlespeech/cls/features/augment.py
--- a/paddlespeech/cls/features/core.py
+++ b/paddlespeech/cls/features/core.py
--- a/paddlespeech/cls/features/spectrum.py
+++ b/paddlespeech/cls/features/spectrum.py
--- a/paddlespeech/cls/features/window.py
+++ b/paddlespeech/cls/features/window.py
--- a/paddlespeech/cls/utils/__init__.py
+++ b/paddlespeech/cls/utils/__init__.py
--- a/paddlespeech/cls/utils/download.py
+++ b/paddlespeech/cls/utils/download.py
@@ -17,7 +17,6 @@ from typing import List
 from paddle.framework import load as load_state_dict
 from paddle.utils import download
-from pathos.multiprocessing import ProcessPool
 from .log import logger
@@ -32,27 +31,18 @@ def decompress(file: str):
    download._decompress(file)
-def download_and_decompress(archives: List[Dict[str, str]],
+def download_and_decompress(archives: List[Dict[str, str]], path: str):
-                            path: str,
-                            n_workers: int=0):
    """
    Download archieves and decompress to specific path.
    """
    if not os.path.isdir(path):
        os.makedirs(path)
-    if n_workers <= 0:
+    for archive in archives:
-        for archive in archives:
+        assert 'url' in archive and 'md5' in archive, \
-            assert 'url' in archive and 'md5' in archive, \
+            'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archieve.keys())}'
-                'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archieve.keys())}'
-            download.get_path_from_url(archive['url'], path, archive['md5'])
+        download.get_path_from_url(archive['url'], path, archive['md5'])
-    else:
-        pool = ProcessPool(nodes=n_workers)
-        pool.imap(download.get_path_from_url, [_['url'] for _ in archives],
-                  [path] * len(archives), [_['md5'] for _ in archives])
-        pool.close()
-        pool.join()
 def load_state_dict_from_url(url: str, path: str, md5: str=None):

--- a/paddlespeech/cls/utils/env.py
+++ b/paddlespeech/cls/utils/env.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 '''
-This module is used to store environmental variables in PaddleSpeech.
+This module is used to store environmental variables in PaddleAudio.
-PPSPEECH_HOME     -->  the root directory for storing PaddleSpeech related data. Default to ~/.paddlespeech. Users can change the
+PPAUDIO_HOME     -->  the root directory for storing PaddleAudio related data. Default to ~/.paddleaudio. Users can change the
-├                            default value through the PPSPEECH_HOME environment variable.
+├                            default value through the PPAUDIO_HOME environment variable.
 ├─ MODEL_HOME    -->  Store model files.
 └─ DATA_HOME     -->  Store automatically downloaded datasets.
 '''
@@ -25,29 +25,29 @@ def _get_user_home():
    return os.path.expanduser('~')
-def _get_package_home():
+def _get_ppaudio_home():
-    if 'PPSPEECH_HOME' in os.environ:
+    if 'PPAUDIO_HOME' in os.environ:
-        home_path = os.environ['PPSPEECH_HOME']
+        home_path = os.environ['PPAUDIO_HOME']
        if os.path.exists(home_path):
            if os.path.isdir(home_path):
                return home_path
            else:
                raise RuntimeError(
-                    'The environment variable PPSPEECH_HOME {} is not a directory.'.
+                    'The environment variable PPAUDIO_HOME {} is not a directory.'.
                    format(home_path))
        else:
            return home_path
-    return os.path.join(_get_user_home(), '.paddlespeech')
+    return os.path.join(_get_user_home(), '.paddleaudio')
 def _get_sub_home(directory):
-    home = os.path.join(_get_package_home(), directory)
+    home = os.path.join(_get_ppaudio_home(), directory)
    if not os.path.exists(home):
        os.makedirs(home)
    return home
 USER_HOME = _get_user_home()
-PPSPEECH_HOME = _get_package_home()
+PPAUDIO_HOME = _get_ppaudio_home()
-MODEL_HOME = _get_sub_home('pretrained_models')
+MODEL_HOME = _get_sub_home('models')
 DATA_HOME = _get_sub_home('datasets')
--- a/paddlespeech/cls/utils/error.py
+++ b/paddlespeech/cls/utils/error.py
--- a/paddlespeech/cls/utils/log.py
+++ b/paddlespeech/cls/utils/log.py
@@ -55,13 +55,13 @@ log_config = {
 class Logger(object):
    '''
-    Deafult logger in PaddleSpeech
+    Deafult logger in PaddleAudio
    Args:
-        name(str) : Logger name, default is 'PaddleSpeech'
+        name(str) : Logger name, default is 'PaddleAudio'
    '''
    def __init__(self, name: str=None):
-        name = 'PaddleSpeech' if not name else name
+        name = 'PaddleAudio' if not name else name
        self.logger = logging.getLogger(name)
        for key, conf in log_config.items():

--- a/paddlespeech/cls/utils/time.py
+++ b/paddlespeech/cls/utils/time.py
--- a/paddlespeech/cls/__init__.py
+++ b/paddlespeech/cls/__init__.py
@@ -11,5 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .backends import *
-from .features import *
--- a/paddlespeech/cls/models/__init__.py
+++ b/paddlespeech/cls/models/__init__.py
--- a/examples/esc50/cls0/local/deploy/python/predict.py
+++ b/examples/esc50/cls0/local/deploy/python/predict.py
--- a/examples/esc50/cls0/local/export_model.py
+++ b/examples/esc50/cls0/local/export_model.py
@@ -15,10 +15,10 @@ import argparse
 import os
 import paddle
-from model import SoundClassifier
-from paddlespeech.cls.datasets import ESC50
+from .model import SoundClassifier
-from paddlespeech.cls.models.panns import cnn14
+from .panns import cnn14
+from paddleaudio.datasets import ESC50
 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)

--- a/examples/esc50/cls0/local/model.py
+++ b/examples/esc50/cls0/local/model.py
--- a/paddlespeech/cls/models/panns.py
+++ b/paddlespeech/cls/models/panns.py
@@ -16,8 +16,8 @@ import os
 import paddle.nn as nn
 import paddle.nn.functional as F
-from ..utils.download import load_state_dict_from_url
+from paddleaudio.utils.download import load_state_dict_from_url
-from ..utils.env import MODEL_HOME
+from paddleaudio.utils.env import MODEL_HOME
 __all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6']

--- a/examples/esc50/cls0/local/predict.py
+++ b/examples/esc50/cls0/local/predict.py
@@ -17,12 +17,12 @@ import numpy as np
 import paddle
 import paddle.nn.functional as F
 from model import SoundClassifier
+from panns import cnn14
-from paddlespeech.cls.backends import load as load_audio
+from paddleaudio.backends import load as load_audio
-from paddlespeech.cls.datasets import ESC50
+from paddleaudio.datasets import ESC50
-from paddlespeech.cls.features import LogMelSpectrogram
+from paddleaudio.features import LogMelSpectrogram
-from paddlespeech.cls.features import melspectrogram
+from paddleaudio.features import melspectrogram
-from paddlespeech.cls.models.panns import cnn14
 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)

--- a/examples/esc50/cls0/local/train.py
+++ b/examples/esc50/cls0/local/train.py
@@ -16,12 +16,12 @@ import os
 import paddle
 from model import SoundClassifier
+from panns import cnn14
-from paddlespeech.cls.datasets import ESC50
+from paddleaudio.datasets import ESC50
-from paddlespeech.cls.features import LogMelSpectrogram
+from paddleaudio.features import LogMelSpectrogram
-from paddlespeech.cls.models.panns import cnn14
+from paddleaudio.utils import logger
-from paddlespeech.cls.utils import logger
+from paddleaudio.utils import Timer
-from paddlespeech.cls.utils import Timer
 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)

--- a/paddlespeech/cls/exps/__init__.py
+++ b/paddlespeech/cls/exps/__init__.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/requirements.txt
+++ b/requirements.txt
-colorlog
 ConfigArgParse
 coverage
 distro
@@ -19,10 +18,10 @@ matplotlib
 nara_wpe
 nltk
 numba
+paddleaudio
 paddlespeech_ctcdecoders
 paddlespeech_feat
 pandas
-pathos
 phkit
 Pillow
 praatio~=4.1

--- a/setup.py
+++ b/setup.py
@@ -173,7 +173,7 @@ setup_info = dict(
    # Package info
    packages=find_packages(exclude=('utils', 'tests', 'tests.*', 'examples*',
-                                    'third_party*', 'tools*')),
+                                    'paddleaudio*', 'third_party*', 'tools*')),
    zip_safe=True,
    classifiers=[
        'Development Status :: 3 - Alpha',

--- a/setup_audio.py
+++ b/setup_audio.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import setuptools
+# set the version here
+version = '0.1.0a'
+setuptools.setup(
+    name="paddleaudio",
+    version=version,
+    author="",
+    author_email="",
+    description="PaddleAudio, in development",
+    long_description="",
+    long_description_content_type="text/markdown",
+    url="",
+    packages=setuptools.find_packages(include=['paddleaudio*']),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    python_requires='>=3.6',
+    install_requires=[
+        'numpy >= 1.15.0',
+        'scipy >= 1.0.0',
+        'resampy >= 0.2.2',
+        'soundfile >= 0.9.0',
+        'colorlog',
+    ], )