提交 62a887f7 编写于 作者: C Corentin Jemine

Backup of the encoder (1M steps)

上级 9692bea9
from synthesizer.datasets.audio import inv_mel_spectrogram
from synthesizer.audio import inv_mel_spectrogram
from synthesizer.hparams import hparams
from synthesizer import synthesizer
import sounddevice as sd
......
......@@ -3,11 +3,11 @@ import matplotlib.pyplot as plt
import librosa.display
import librosa
import numpy as np
import sounddevice
import webrtcvad
import struct
from encoder.params_data import *
int16_max = (2 ** 15) - 1
def load(fpath):
......@@ -121,16 +121,16 @@ def plot_mel_filterbank(frames):
plt.tight_layout()
plt.show()
def play_wave(wav, blocking=False):
sounddevice.stop()
sounddevice.play(wav, sampling_rate, blocking=blocking)
def rec_wave(duration, blocking=True, verbose=True):
if verbose:
print("Recording %d seconds of audio" % duration)
wav = sounddevice.rec(duration * sampling_rate, sampling_rate, 1)
if blocking:
sounddevice.wait()
if verbose:
print("Done recording!")
return wav.squeeze()
# def play_wave(wav, blocking=False):
# sounddevice.stop()
# sounddevice.play(wav, sampling_rate, blocking=blocking)
#
# def rec_wave(duration, blocking=True, verbose=True):
# if verbose:
# print("Recording %d seconds of audio" % duration)
# wav = sounddevice.rec(duration * sampling_rate, sampling_rate, 1)
# if blocking:
# sounddevice.wait()
# if verbose:
# print("Done recording!")
# return wav.squeeze()
......@@ -9,6 +9,7 @@ from matplotlib import cm
_model = None # type: SpeakerEncoder
_device = None # type: torch.device
def load_model(weights_fpath, device=None):
"""
Loads the model in memory. If this function is not explicitely called, it will be run on the
......
......@@ -2,7 +2,7 @@ from synthesizer.datasets import audio
from multiprocessing.pool import Pool
from functools import partial
from itertools import chain
from encoder import inference as speaker_encoder
# from encoder import inference as speaker_encoder
from pathlib import Path
from tqdm import tqdm
import numpy as np
......@@ -32,7 +32,7 @@ def preprocess_librispeech(datasets_root: Path, out_dir: Path, wav_out_dir: Path
speaker_dirs = list(chain.from_iterable(input_dir.glob("*") for input_dir in input_dirs))
func = partial(preprocess_speaker, mel_out_dir=mel_out_dir, wav_out_dir=wav_out_dir,
skip_existing=skip_existing, hparams=hparams)
job = Pool().imap(func, speaker_dirs)
job = Pool(1).imap(func, speaker_dirs)
for speaker_metadata in tqdm(job, "LibriSpeech", len(speaker_dirs), unit="speakers"):
for metadatum in speaker_metadata:
metadata_file.write("|".join(str(x) for x in metadatum) + "\n")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册