diff --git a/examples/esc50/cls0/conf/panns.yaml b/examples/esc50/cls0/conf/panns.yaml index 1f0323f0d316aacb7697015082053f14b649a739..3a9d42aa5e52de31c6c461d471988bb2067e5cdf 100644 --- a/examples/esc50/cls0/conf/panns.yaml +++ b/examples/esc50/cls0/conf/panns.yaml @@ -1,5 +1,5 @@ data: - dataset: 'paddlespeech.audio.datasets:ESC50' + dataset: 'paddleaudio.datasets:ESC50' num_classes: 50 train: mode: 'train' diff --git a/examples/hey_snips/kws0/conf/mdtc.yaml b/examples/hey_snips/kws0/conf/mdtc.yaml index 54d05947289a32fe874e24381e201de580a671eb..857d36d465ad8cef0b088412c829f0fd4476808f 100644 --- a/examples/hey_snips/kws0/conf/mdtc.yaml +++ b/examples/hey_snips/kws0/conf/mdtc.yaml @@ -2,7 +2,7 @@ ########################################### # Data # ########################################### -dataset: 'paddlespeech.audio.datasets:HeySnips' +dataset: 'paddleaudio.datasets:HeySnips' data_dir: '../tests/hey_snips_research_6k_en_train_eval_clean_ter' ############################################ diff --git a/examples/voxceleb/sv0/local/data_prepare.py b/examples/voxceleb/sv0/local/data_prepare.py index e5a5dff7bb96fa773546447bc5956153d9619bed..b4486b6f00cb91957dd0b27dffbc56dfba086df5 100644 --- a/examples/voxceleb/sv0/local/data_prepare.py +++ b/examples/voxceleb/sv0/local/data_prepare.py @@ -14,9 +14,9 @@ import argparse import paddle +from paddleaudio.datasets.voxceleb import VoxCeleb from yacs.config import CfgNode -from paddlespeech.audio.datasets.voxceleb import VoxCeleb from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.training.seeding import seed_everything diff --git a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py index 233977bae49efae522de1a206a0a96ca588483ec..11908fe63bb59a5ab50730349335255ec399123f 100644 --- a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py @@ -21,9 +21,9 @@ import os from typing import List import tqdm +from paddleaudio.backends import soundfile_load as load_audio from yacs.config import CfgNode -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py index 49c234a431656e289fc5218c87a79c33064a98c2..ebeb598a45104f74f243e466d4a1f526c6aa03f9 100644 --- a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py @@ -22,9 +22,9 @@ import os import random import tqdm +from paddleaudio.backends import soundfile_load as load_audio from yacs.config import CfgNode -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index 5ace7fe0da6e17576a6e366447060fba5b425bc3..5e9b5acec936d2556787738b39b6b12eb818cb7c 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -25,8 +25,8 @@ import yaml from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio.soundfile_backend import soundfile_load as load -from paddlespeech.audio.features import LogMelSpectrogram +from paddleaudio.backends import soundfile_load as load +from paddleaudio.features import LogMelSpectrogram __all__ = ['CLSExecutor'] diff --git a/paddlespeech/cli/kws/infer.py b/paddlespeech/cli/kws/infer.py index bd15e80e6622c49b94d6e3e71d74f2004850c2a7..ce2f3f4611bec72f18af4bc3e5c985ce80116c3a 100644 --- a/paddlespeech/cli/kws/infer.py +++ b/paddlespeech/cli/kws/infer.py @@ -20,12 +20,12 @@ from typing import Union import paddle import yaml +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.kaldi import fbank as kaldi_fbank from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.kaldi import fbank as kaldi_fbank __all__ = ['KWSExecutor'] @@ -139,7 +139,7 @@ class KWSExecutor(BaseExecutor): Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet). """ assert os.path.isfile(audio_file) - waveform, _ = load(audio_file) + waveform, _ = load_audio(audio_file) if isinstance(audio_file, (str, os.PathLike)): logger.debug("Preprocessing audio_file:" + audio_file) diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py index 5a66b4861ab47efd71da2ac05ae4e56b714e1f2c..57a78165604aa31368c112307a00e3ec4b570109 100644 --- a/paddlespeech/cli/vector/infer.py +++ b/paddlespeech/cli/vector/infer.py @@ -22,13 +22,13 @@ from typing import Union import paddle import soundfile +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.modules.sid_model import SpeakerIdetification diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py index 93eee74b9201cb4acba5d53fbb71fd2dd04fe231..ae46890bd670f261df4d6775706f91597eeb9ac6 100644 --- a/paddlespeech/cls/exps/panns/deploy/predict.py +++ b/paddlespeech/cls/exps/panns/deploy/predict.py @@ -16,12 +16,11 @@ import os import numpy as np from paddle import inference +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.datasets import ESC50 +from paddleaudio.features import melspectrogram from scipy.special import softmax -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.datasets import ESC50 -from paddlespeech.audio.features import melspectrogram - # yapf: disable parser = argparse.ArgumentParser() parser.add_argument("--model_dir", type=str, required=True, default="./export", help="The directory to static model.") diff --git a/paddlespeech/cls/exps/panns/export_model.py b/paddlespeech/cls/exps/panns/export_model.py index e62d58f02dd1d5f809d9b45de9a8eda07d2c5f69..63b22981adb62d213f0b2bd79ae9d4b180d06591 100644 --- a/paddlespeech/cls/exps/panns/export_model.py +++ b/paddlespeech/cls/exps/panns/export_model.py @@ -15,8 +15,8 @@ import argparse import os import paddle +from paddleaudio.datasets import ESC50 -from paddlespeech.audio.datasets import ESC50 from paddlespeech.cls.models import cnn14 from paddlespeech.cls.models import SoundClassifier diff --git a/paddlespeech/cls/exps/panns/predict.py b/paddlespeech/cls/exps/panns/predict.py index 97759a89d6dbc3f535dc1b4211b6166420291121..feeee24e3cb58cb9dddcb0e1aaa473598fddbee5 100644 --- a/paddlespeech/cls/exps/panns/predict.py +++ b/paddlespeech/cls/exps/panns/predict.py @@ -17,10 +17,10 @@ import os import paddle import paddle.nn.functional as F import yaml +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.features import LogMelSpectrogram +from paddleaudio.utils import logger -from paddlespeech.audio.backends import load as load_audio -from paddlespeech.audio.features import LogMelSpectrogram -from paddlespeech.audio.utils import logger from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import diff --git a/paddlespeech/cls/exps/panns/train.py b/paddlespeech/cls/exps/panns/train.py index fba38a01c7f74503f9f20cbada2b3a3c3ce11eed..9258ab516fc115a66c0ac60c4912b66413c6c662 100644 --- a/paddlespeech/cls/exps/panns/train.py +++ b/paddlespeech/cls/exps/panns/train.py @@ -17,9 +17,9 @@ import os import paddle import yaml -from paddlespeech.audio.features import LogMelSpectrogram -from paddlespeech.audio.utils import logger -from paddlespeech.audio.utils import Timer +from paddleaudio.features import LogMelSpectrogram +from paddleaudio.utils import logger +from paddlesaudio.utils import Timer from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import diff --git a/paddlespeech/cls/models/panns/panns.py b/paddlespeech/cls/models/panns/panns.py index 37deae80c847757d716acf123f037b60d7fd3d91..6f9af9b5280e46949d5981e332c833cc0c3bb5d4 100644 --- a/paddlespeech/cls/models/panns/panns.py +++ b/paddlespeech/cls/models/panns/panns.py @@ -15,8 +15,8 @@ import os import paddle.nn as nn import paddle.nn.functional as F +from paddleaudio.utils.download import load_state_dict_from_url -from paddlespeech.audio.utils.download import load_state_dict_from_url from paddlespeech.utils.env import MODEL_HOME __all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6'] diff --git a/paddlespeech/kws/exps/mdtc/train.py b/paddlespeech/kws/exps/mdtc/train.py index 94e45d59048b3a6b514a4585b31d3c2b34f33d85..5a9ca92d16ff3eada36840da6914397227005b49 100644 --- a/paddlespeech/kws/exps/mdtc/train.py +++ b/paddlespeech/kws/exps/mdtc/train.py @@ -14,10 +14,10 @@ import os import paddle +from paddleaudio.utils import logger +from paddleaudio.utils import Timer from yacs.config import CfgNode -from paddlespeech.audio.utils import logger -from paddlespeech.audio.utils import Timer from paddlespeech.kws.exps.mdtc.collate import collate_features from paddlespeech.kws.models.loss import max_pooling_loss from paddlespeech.kws.models.mdtc import KWSModel diff --git a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py index ac5720fd5e315f76ee4003886e996881369bcdba..22329d5e028ebed0a87af69c30eba1e6513d6226 100644 --- a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py +++ b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py @@ -14,11 +14,10 @@ """Contains the audio featurizer class.""" import numpy as np import paddle +import paddleaudio.compliance.kaldi as kaldi from python_speech_features import delta from python_speech_features import mfcc -import paddlespeech.audio.compliance.kaldi as kaldi - class AudioFeaturizer(): """Audio featurizer, for extracting features from audio contents of diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index 8a98494926fde029a19f7290e57e6da7347f59bd..80f187282bd09fd1b15de22a004b11d377bcd056 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -28,10 +28,10 @@ from typing import Tuple import paddle from paddle import jit from paddle import nn +from paddleaudio.utils.tensor_utils import add_sos_eos +from paddleaudio.utils.tensor_utils import pad_sequence +from paddleaudio.utils.tensor_utils import th_accuracy -from paddlespeech.audio.utils.tensor_utils import add_sos_eos -from paddlespeech.audio.utils.tensor_utils import pad_sequence -from paddlespeech.audio.utils.tensor_utils import th_accuracy from paddlespeech.s2t.decoders.scorers.ctc import CTCPrefixScorer from paddlespeech.s2t.frontend.utility import IGNORE_ID from paddlespeech.s2t.frontend.utility import load_cmvn diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py index e8b61bc0d47cb5296cfe410a490b40242e6b98d6..4b68c1ae1e52defb6a39611c21b29166bf34047e 100644 --- a/paddlespeech/s2t/models/u2_st/u2_st.py +++ b/paddlespeech/s2t/models/u2_st/u2_st.py @@ -24,9 +24,9 @@ from typing import Tuple import paddle from paddle import jit from paddle import nn +from paddleaudio.utils.tensor_utils import add_sos_eos +from paddleaudio.utils.tensor_utils import th_accuracy -from paddlespeech.audio.utils.tensor_utils import add_sos_eos -from paddlespeech.audio.utils.tensor_utils import th_accuracy from paddlespeech.s2t.frontend.utility import IGNORE_ID from paddlespeech.s2t.frontend.utility import load_cmvn from paddlespeech.s2t.modules.cmvn import GlobalCMVN diff --git a/paddlespeech/server/engine/vector/python/vector_engine.py b/paddlespeech/server/engine/vector/python/vector_engine.py index e617c3650b569e6c6e91167f4afe497ba6bbf826..7d86f3df76ffb3d56e1668ba06070c2001728805 100644 --- a/paddlespeech/server/engine/vector/python/vector_engine.py +++ b/paddlespeech/server/engine/vector/python/vector_engine.py @@ -16,9 +16,9 @@ from collections import OrderedDict import numpy as np import paddle +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.cli.log import logger from paddlespeech.cli.vector.infer import VectorExecutor from paddlespeech.server.engine.base_engine import BaseEngine diff --git a/paddlespeech/server/util.py b/paddlespeech/server/util.py index 32546a3300b0ce837c4f2491ef6fd3884d1eb913..6aa6fd589ad2f1f475768c93080856f879e083b2 100644 --- a/paddlespeech/server/util.py +++ b/paddlespeech/server/util.py @@ -24,11 +24,11 @@ from typing import Any from typing import Dict import paddle +import paddleaudio import requests import yaml from paddle.framework import load -import paddlespeech.audio from .entry import client_commands from .entry import server_commands from paddlespeech.cli import download @@ -289,7 +289,7 @@ def _note_one_stat(cls_name, params={}): if 'audio_file' in params: try: - _, sr = paddlespeech.audio.load(params['audio_file']) + _, sr = paddleaudio.backends.soundfile_load(params['audio_file']) except Exception: sr = -1 diff --git a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py index e9203ef99c0924a9ffcb07efa669399824373917..821b1deed86ef8f3de7f3f55fd7c1664ba1e4d4d 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py @@ -16,10 +16,10 @@ import os import time import paddle +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn diff --git a/paddlespeech/vector/exps/ecapa_tdnn/test.py b/paddlespeech/vector/exps/ecapa_tdnn/test.py index 6c87dbe7bd01849b978f2653b044cf6e1f287581..f15dbf9b7a111720de9481b9ce62104d47ea9e95 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/test.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/test.py @@ -18,10 +18,10 @@ import numpy as np import paddle from paddle.io import BatchSampler from paddle.io import DataLoader +from paddleaudio.metric import compute_eer from tqdm import tqdm from yacs.config import CfgNode -from paddlespeech.audio.metric import compute_eer from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import batch_feature_normalize from paddlespeech.vector.io.dataset import CSVDataset diff --git a/paddlespeech/vector/exps/ecapa_tdnn/train.py b/paddlespeech/vector/exps/ecapa_tdnn/train.py index 961b75e2934d314ac8ced2d0302e965f2e1cf8bb..bf014045d0a85d253d9ef6056cf402a2042988b9 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/train.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/train.py @@ -20,9 +20,9 @@ import paddle from paddle.io import BatchSampler from paddle.io import DataLoader from paddle.io import DistributedBatchSampler +from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.io.augment import waveform_augment diff --git a/paddlespeech/vector/io/dataset.py b/paddlespeech/vector/io/dataset.py index c9d56b5eae0a63c097c65e611239edb10656623f..dff8ad9fdc13185bf49a66c17716b03c07dc8bbe 100644 --- a/paddlespeech/vector/io/dataset.py +++ b/paddlespeech/vector/io/dataset.py @@ -15,10 +15,9 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio - -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log logger = Log(__name__).getlog() diff --git a/paddlespeech/vector/io/dataset_from_json.py b/paddlespeech/vector/io/dataset_from_json.py index 32960e45641715a3053fb492df0bd6b3fa65dd55..852f39a94c33d4e62e5d9d82bb64a026c8159771 100644 --- a/paddlespeech/vector/io/dataset_from_json.py +++ b/paddlespeech/vector/io/dataset_from_json.py @@ -16,10 +16,9 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset - -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.librosa import melspectrogram -from paddlespeech.audio.compliance.librosa import mfcc +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram +from paddleaudio.compliance.librosa import mfcc @dataclass