From b336ccfe8d80fb04b707e88c7e0eba47b9d58efb Mon Sep 17 00:00:00 2001 From: Yang Zhou Date: Sun, 18 Sep 2022 12:43:55 +0800 Subject: [PATCH] update audio api in apps --- examples/esc50/cls0/conf/panns.yaml | 2 +- examples/hey_snips/kws0/conf/mdtc.yaml | 2 +- examples/voxceleb/sv0/local/data_prepare.py | 2 +- .../sv0/local/make_rirs_noise_csv_dataset_from_json.py | 2 +- .../voxceleb/sv0/local/make_vox_csv_dataset_from_json.py | 2 +- paddlespeech/cli/cls/infer.py | 4 ++-- paddlespeech/cli/kws/infer.py | 6 +++--- paddlespeech/cli/vector/infer.py | 4 ++-- paddlespeech/cls/exps/panns/deploy/predict.py | 7 +++---- paddlespeech/cls/exps/panns/export_model.py | 2 +- paddlespeech/cls/exps/panns/predict.py | 6 +++--- paddlespeech/cls/exps/panns/train.py | 6 +++--- paddlespeech/cls/models/panns/panns.py | 2 +- paddlespeech/kws/exps/mdtc/train.py | 4 ++-- paddlespeech/s2t/frontend/featurizer/audio_featurizer.py | 3 +-- paddlespeech/s2t/models/u2/u2.py | 6 +++--- paddlespeech/s2t/models/u2_st/u2_st.py | 4 ++-- paddlespeech/server/engine/vector/python/vector_engine.py | 4 ++-- paddlespeech/server/util.py | 4 ++-- paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py | 4 ++-- paddlespeech/vector/exps/ecapa_tdnn/test.py | 2 +- paddlespeech/vector/exps/ecapa_tdnn/train.py | 2 +- paddlespeech/vector/io/dataset.py | 5 ++--- paddlespeech/vector/io/dataset_from_json.py | 7 +++---- 24 files changed, 44 insertions(+), 48 deletions(-) diff --git a/examples/esc50/cls0/conf/panns.yaml b/examples/esc50/cls0/conf/panns.yaml index 1f0323f0..3a9d42aa 100644 --- a/examples/esc50/cls0/conf/panns.yaml +++ b/examples/esc50/cls0/conf/panns.yaml @@ -1,5 +1,5 @@ data: - dataset: 'paddlespeech.audio.datasets:ESC50' + dataset: 'paddleaudio.datasets:ESC50' num_classes: 50 train: mode: 'train' diff --git a/examples/hey_snips/kws0/conf/mdtc.yaml b/examples/hey_snips/kws0/conf/mdtc.yaml index 54d05947..857d36d4 100644 --- a/examples/hey_snips/kws0/conf/mdtc.yaml +++ b/examples/hey_snips/kws0/conf/mdtc.yaml @@ -2,7 +2,7 @@ ########################################### # Data # ########################################### -dataset: 'paddlespeech.audio.datasets:HeySnips' +dataset: 'paddleaudio.datasets:HeySnips' data_dir: '../tests/hey_snips_research_6k_en_train_eval_clean_ter' ############################################ diff --git a/examples/voxceleb/sv0/local/data_prepare.py b/examples/voxceleb/sv0/local/data_prepare.py index e5a5dff7..b4486b6f 100644 --- a/examples/voxceleb/sv0/local/data_prepare.py +++ b/examples/voxceleb/sv0/local/data_prepare.py @@ -14,9 +14,9 @@ import argparse import paddle +from paddleaudio.datasets.voxceleb import VoxCeleb from yacs.config import CfgNode -from paddlespeech.audio.datasets.voxceleb import VoxCeleb from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.training.seeding import seed_everything diff --git a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py index 233977ba..11908fe6 100644 --- a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py @@ -21,9 +21,9 @@ import os from typing import List import tqdm +from paddleaudio.backends import soundfile_load as load_audio from yacs.config import CfgNode -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py index 49c234a4..ebeb598a 100644 --- a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py @@ -22,9 +22,9 @@ import os import random import tqdm +from paddleaudio.backends import soundfile_load as load_audio from yacs.config import CfgNode -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index 5ace7fe0..5e9b5ace 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -25,8 +25,8 @@ import yaml from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio.soundfile_backend import soundfile_load as load -from paddlespeech.audio.features import LogMelSpectrogram +from paddleaudio.backends import soundfile_load as load +from paddleaudio.features import LogMelSpectrogram __all__ = ['CLSExecutor'] diff --git a/paddlespeech/cli/kws/infer.py b/paddlespeech/cli/kws/infer.py index bd15e80e..ce2f3f46 100644 --- a/paddlespeech/cli/kws/infer.py +++ b/paddlespeech/cli/kws/infer.py @@ -20,12 +20,12 @@ from typing import Union import paddle import yaml +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.kaldi import fbank as kaldi_fbank from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.kaldi import fbank as kaldi_fbank __all__ = ['KWSExecutor'] @@ -139,7 +139,7 @@ class KWSExecutor(BaseExecutor): Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet). """ assert os.path.isfile(audio_file) - waveform, _ = load(audio_file) + waveform, _ = load_audio(audio_file) if isinstance(audio_file, (str, os.PathLike)): logger.debug("Preprocessing audio_file:" + audio_file) diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py index 5a66b486..57a78165 100644 --- a/paddlespeech/cli/vector/infer.py +++ b/paddlespeech/cli/vector/infer.py @@ -22,13 +22,13 @@ from typing import Union import paddle import soundfile +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.modules.sid_model import SpeakerIdetification diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py index 93eee74b..ae46890b 100644 --- a/paddlespeech/cls/exps/panns/deploy/predict.py +++ b/paddlespeech/cls/exps/panns/deploy/predict.py @@ -16,12 +16,11 @@ import os import numpy as np from paddle import inference +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.datasets import ESC50 +from paddleaudio.features import melspectrogram from scipy.special import softmax -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.datasets import ESC50 -from paddlespeech.audio.features import melspectrogram - # yapf: disable parser = argparse.ArgumentParser() parser.add_argument("--model_dir", type=str, required=True, default="./export", help="The directory to static model.") diff --git a/paddlespeech/cls/exps/panns/export_model.py b/paddlespeech/cls/exps/panns/export_model.py index e62d58f0..63b22981 100644 --- a/paddlespeech/cls/exps/panns/export_model.py +++ b/paddlespeech/cls/exps/panns/export_model.py @@ -15,8 +15,8 @@ import argparse import os import paddle +from paddleaudio.datasets import ESC50 -from paddlespeech.audio.datasets import ESC50 from paddlespeech.cls.models import cnn14 from paddlespeech.cls.models import SoundClassifier diff --git a/paddlespeech/cls/exps/panns/predict.py b/paddlespeech/cls/exps/panns/predict.py index 97759a89..feeee24e 100644 --- a/paddlespeech/cls/exps/panns/predict.py +++ b/paddlespeech/cls/exps/panns/predict.py @@ -17,10 +17,10 @@ import os import paddle import paddle.nn.functional as F import yaml +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.features import LogMelSpectrogram +from paddleaudio.utils import logger -from paddlespeech.audio.backends import load as load_audio -from paddlespeech.audio.features import LogMelSpectrogram -from paddlespeech.audio.utils import logger from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import diff --git a/paddlespeech/cls/exps/panns/train.py b/paddlespeech/cls/exps/panns/train.py index fba38a01..9258ab51 100644 --- a/paddlespeech/cls/exps/panns/train.py +++ b/paddlespeech/cls/exps/panns/train.py @@ -17,9 +17,9 @@ import os import paddle import yaml -from paddlespeech.audio.features import LogMelSpectrogram -from paddlespeech.audio.utils import logger -from paddlespeech.audio.utils import Timer +from paddleaudio.features import LogMelSpectrogram +from paddleaudio.utils import logger +from paddlesaudio.utils import Timer from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import diff --git a/paddlespeech/cls/models/panns/panns.py b/paddlespeech/cls/models/panns/panns.py index 37deae80..6f9af9b5 100644 --- a/paddlespeech/cls/models/panns/panns.py +++ b/paddlespeech/cls/models/panns/panns.py @@ -15,8 +15,8 @@ import os import paddle.nn as nn import paddle.nn.functional as F +from paddleaudio.utils.download import load_state_dict_from_url -from paddlespeech.audio.utils.download import load_state_dict_from_url from paddlespeech.utils.env import MODEL_HOME __all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6'] diff --git a/paddlespeech/kws/exps/mdtc/train.py b/paddlespeech/kws/exps/mdtc/train.py index 94e45d59..5a9ca92d 100644 --- a/paddlespeech/kws/exps/mdtc/train.py +++ b/paddlespeech/kws/exps/mdtc/train.py @@ -14,10 +14,10 @@ import os import paddle +from paddleaudio.utils import logger +from paddleaudio.utils import Timer from yacs.config import CfgNode -from paddlespeech.audio.utils import logger -from paddlespeech.audio.utils import Timer from paddlespeech.kws.exps.mdtc.collate import collate_features from paddlespeech.kws.models.loss import max_pooling_loss from paddlespeech.kws.models.mdtc import KWSModel diff --git a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py index ac5720fd..22329d5e 100644 --- a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py +++ b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py @@ -14,11 +14,10 @@ """Contains the audio featurizer class.""" import numpy as np import paddle +import paddleaudio.compliance.kaldi as kaldi from python_speech_features import delta from python_speech_features import mfcc -import paddlespeech.audio.compliance.kaldi as kaldi - class AudioFeaturizer(): """Audio featurizer, for extracting features from audio contents of diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index 8a984949..80f18728 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -28,10 +28,10 @@ from typing import Tuple import paddle from paddle import jit from paddle import nn +from paddleaudio.utils.tensor_utils import add_sos_eos +from paddleaudio.utils.tensor_utils import pad_sequence +from paddleaudio.utils.tensor_utils import th_accuracy -from paddlespeech.audio.utils.tensor_utils import add_sos_eos -from paddlespeech.audio.utils.tensor_utils import pad_sequence -from paddlespeech.audio.utils.tensor_utils import th_accuracy from paddlespeech.s2t.decoders.scorers.ctc import CTCPrefixScorer from paddlespeech.s2t.frontend.utility import IGNORE_ID from paddlespeech.s2t.frontend.utility import load_cmvn diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py index e8b61bc0..4b68c1ae 100644 --- a/paddlespeech/s2t/models/u2_st/u2_st.py +++ b/paddlespeech/s2t/models/u2_st/u2_st.py @@ -24,9 +24,9 @@ from typing import Tuple import paddle from paddle import jit from paddle import nn +from paddleaudio.utils.tensor_utils import add_sos_eos +from paddleaudio.utils.tensor_utils import th_accuracy -from paddlespeech.audio.utils.tensor_utils import add_sos_eos -from paddlespeech.audio.utils.tensor_utils import th_accuracy from paddlespeech.s2t.frontend.utility import IGNORE_ID from paddlespeech.s2t.frontend.utility import load_cmvn from paddlespeech.s2t.modules.cmvn import GlobalCMVN diff --git a/paddlespeech/server/engine/vector/python/vector_engine.py b/paddlespeech/server/engine/vector/python/vector_engine.py index e617c365..7d86f3df 100644 --- a/paddlespeech/server/engine/vector/python/vector_engine.py +++ b/paddlespeech/server/engine/vector/python/vector_engine.py @@ -16,9 +16,9 @@ from collections import OrderedDict import numpy as np import paddle +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.cli.log import logger from paddlespeech.cli.vector.infer import VectorExecutor from paddlespeech.server.engine.base_engine import BaseEngine diff --git a/paddlespeech/server/util.py b/paddlespeech/server/util.py index 32546a33..6aa6fd58 100644 --- a/paddlespeech/server/util.py +++ b/paddlespeech/server/util.py @@ -24,11 +24,11 @@ from typing import Any from typing import Dict import paddle +import paddleaudio import requests import yaml from paddle.framework import load -import paddlespeech.audio from .entry import client_commands from .entry import server_commands from paddlespeech.cli import download @@ -289,7 +289,7 @@ def _note_one_stat(cls_name, params={}): if 'audio_file' in params: try: - _, sr = paddlespeech.audio.load(params['audio_file']) + _, sr = paddleaudio.backends.soundfile_load(params['audio_file']) except Exception: sr = -1 diff --git a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py index e9203ef9..821b1dee 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py @@ -16,10 +16,10 @@ import os import time import paddle +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn diff --git a/paddlespeech/vector/exps/ecapa_tdnn/test.py b/paddlespeech/vector/exps/ecapa_tdnn/test.py index 6c87dbe7..f15dbf9b 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/test.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/test.py @@ -18,10 +18,10 @@ import numpy as np import paddle from paddle.io import BatchSampler from paddle.io import DataLoader +from paddleaudio.metric import compute_eer from tqdm import tqdm from yacs.config import CfgNode -from paddlespeech.audio.metric import compute_eer from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import batch_feature_normalize from paddlespeech.vector.io.dataset import CSVDataset diff --git a/paddlespeech/vector/exps/ecapa_tdnn/train.py b/paddlespeech/vector/exps/ecapa_tdnn/train.py index 961b75e2..bf014045 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/train.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/train.py @@ -20,9 +20,9 @@ import paddle from paddle.io import BatchSampler from paddle.io import DataLoader from paddle.io import DistributedBatchSampler +from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.io.augment import waveform_augment diff --git a/paddlespeech/vector/io/dataset.py b/paddlespeech/vector/io/dataset.py index c9d56b5e..dff8ad9f 100644 --- a/paddlespeech/vector/io/dataset.py +++ b/paddlespeech/vector/io/dataset.py @@ -15,10 +15,9 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio - -from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log logger = Log(__name__).getlog() diff --git a/paddlespeech/vector/io/dataset_from_json.py b/paddlespeech/vector/io/dataset_from_json.py index 32960e45..852f39a9 100644 --- a/paddlespeech/vector/io/dataset_from_json.py +++ b/paddlespeech/vector/io/dataset_from_json.py @@ -16,10 +16,9 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset - -from paddlespeech.audio.soundfile_backend import soundfile_load as load_audio -from paddlespeech.audio.compliance.librosa import melspectrogram -from paddlespeech.audio.compliance.librosa import mfcc +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram +from paddleaudio.compliance.librosa import mfcc @dataclass -- GitLab