未验证 提交 4f1462d7 编写于 作者: K KP 提交者: GitHub

Update usage of new APIs (#5331)

上级 6ec25e3c
......@@ -20,9 +20,11 @@ from typing import List
import numpy as np
import paddle
from paddleaudio.backends import load as load_audio
from paddleaudio.features import melspectrogram
from paddleaudio.models.panns import cnn14
from paddleaudio.utils import logger
from paddleaudio.transforms import LogMelSpectrogram
from paddleaudio.utils import Timer, get_logger
logger = get_logger()
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
......@@ -57,10 +59,7 @@ def batchify(data: List[List[float]], sample_rate: int, batch_size: int,
"""
Extract features from waveforms and create batches.
"""
examples = []
for waveform in data:
feats = melspectrogram(waveform, sample_rate, **kwargs).transpose()
examples.append(feats)
examples = data
# Seperates data into some batches.
one_batch = []
......@@ -74,6 +73,7 @@ def batchify(data: List[List[float]], sample_rate: int, batch_size: int,
def predict(model,
feature_extractor,
data: List[List[float]],
sample_rate: int,
batch_size: int = 1):
......@@ -83,10 +83,8 @@ def predict(model,
batches = batchify(data, sample_rate, batch_size)
results = None
model.eval()
for batch in batches:
feats = paddle.to_tensor(batch).unsqueeze(1) \
# (batch_size, num_frames, num_melbins) -> (batch_size, 1, num_frames, num_melbins)
for waveforms in batches:
feats = feature_extractor(paddle.to_tensor(waveforms))
audioset_scores = model(feats)
if results is None:
results = audioset_scores.numpy()
......@@ -98,11 +96,13 @@ def predict(model,
if __name__ == '__main__':
paddle.set_device(args.device)
feature_extractor = LogMelSpectrogram(
sr=16000, n_fft=512, hop_length=320, n_mels=64, f_min=50)
model = cnn14(pretrained=True, extract_embedding=False)
waveform, sr = load_audio(args.wav, sr=None)
time, data = split(waveform, int(args.sample_duration * sr),
int(args.hop_duration * sr))
results = predict(model, data, sr, batch_size=8)
results = predict(model, feature_extractor, data, sr, batch_size=8)
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
......
......@@ -22,8 +22,8 @@ import paddle.nn.functional as F
from model import SoundClassifier
from paddleaudio.backends import load as load_audio
from paddleaudio.datasets import ESC50
from paddleaudio.features import melspectrogram
from paddleaudio.models.panns import cnn14
from paddleaudio.transforms import LogMelSpectrogram
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
......@@ -34,24 +34,19 @@ parser.add_argument("--checkpoint", type=str, required=True, help="Checkpoint of
args = parser.parse_args()
# yapf: enable
def extract_features(file: str, **kwargs):
waveform, sr = load_audio(args.wav, sr=None)
feats = melspectrogram(waveform, sr, **kwargs).transpose()
return feats
if __name__ == '__main__':
paddle.set_device(args.device)
model = SoundClassifier(backbone=cnn14(pretrained=False,
extract_embedding=True),
num_class=len(ESC50.label_list))
feature_extractor = LogMelSpectrogram(
sr=16000, n_fft=512, hop_length=320, n_mels=64, f_min=50)
model = SoundClassifier(
backbone=cnn14(pretrained=False, extract_embedding=True),
num_class=len(ESC50.label_list))
model.set_state_dict(paddle.load(args.checkpoint))
model.eval()
feats = extract_features(args.wav)
feats = paddle.to_tensor(np.expand_dims(feats, 0))
waveform, sr = load_audio(args.wav)
feats = feature_extractor(paddle.to_tensor(waveform).unsqueeze(0))
logits = model(feats)
probs = F.softmax(logits, axis=1).numpy()
......
......@@ -21,7 +21,10 @@ import paddle.nn.functional as F
from model import SoundClassifier
from paddleaudio.datasets import ESC50
from paddleaudio.models.panns import cnn14
from paddleaudio.utils import Timer, logger
from paddleaudio.transforms import LogMelSpectrogram
from paddleaudio.utils import Timer, get_logger
logger = get_logger()
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
......@@ -41,14 +44,16 @@ if __name__ == "__main__":
nranks = paddle.distributed.get_world_size()
local_rank = paddle.distributed.get_rank()
feature_extractor = LogMelSpectrogram(
sr=16000, n_fft=512, hop_length=320, n_mels=64, f_min=50)
backbone = cnn14(pretrained=True, extract_embedding=True)
model = SoundClassifier(backbone, num_class=len(ESC50.label_list))
optimizer = paddle.optimizer.Adam(learning_rate=args.learning_rate,
parameters=model.parameters())
optimizer = paddle.optimizer.Adam(
learning_rate=args.learning_rate, parameters=model.parameters())
criterion = paddle.nn.loss.CrossEntropyLoss()
train_ds = ESC50(mode='train', feat_type='melspectrogram')
dev_ds = ESC50(mode='dev', feat_type='melspectrogram')
train_ds = ESC50(mode='train')
dev_ds = ESC50(mode='dev')
train_sampler = paddle.io.DistributedBatchSampler(
train_ds, batch_size=args.batch_size, shuffle=True, drop_last=False)
......@@ -71,7 +76,8 @@ if __name__ == "__main__":
num_corrects = 0
num_samples = 0
for batch_idx, batch in enumerate(train_loader):
feats, labels = batch
waveforms, labels = batch
feats = feature_extractor(waveforms)
logits = model(feats)
loss = criterion(logits, labels)
......@@ -110,10 +116,11 @@ if __name__ == "__main__":
num_samples = 0
if epoch % args.save_freq == 0 and batch_idx + 1 == steps_per_epoch and local_rank == 0:
dev_sampler = paddle.io.BatchSampler(dev_ds,
batch_size=args.batch_size,
shuffle=False,
drop_last=False)
dev_sampler = paddle.io.BatchSampler(
dev_ds,
batch_size=args.batch_size,
shuffle=False,
drop_last=False)
dev_loader = paddle.io.DataLoader(
dev_ds,
batch_sampler=dev_sampler,
......@@ -126,7 +133,8 @@ if __name__ == "__main__":
num_samples = 0
with logger.processing('Evaluation on validation dataset'):
for batch_idx, batch in enumerate(dev_loader):
feats, labels = batch
waveforms, labels = batch
feats = feature_extractor(waveforms)
logits = model(feats)
preds = paddle.argmax(logits, axis=1)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册