From 6944770f678f0545ef503efd6ec87ac65db0a016 Mon Sep 17 00:00:00 2001 From: Matheus Fillipe Date: Wed, 8 Jul 2020 23:51:58 -0300 Subject: [PATCH] Export and replay generated wavs in toolbox (#402) --- requirements.txt | 1 + toolbox/__init__.py | 49 +++++++++++++++++++++++++++++++++++++++++++++ toolbox/ui.py | 37 ++++++++++++++++++++++++++++++++-- 3 files changed, 85 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index e513934..de7a59a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ numpy>=1.14.0 scipy>=1.0.0 tqdm sounddevice +SoundFile Unidecode inflect PyQt5 diff --git a/toolbox/__init__.py b/toolbox/__init__.py index 48f39b1..c2ccd2a 100644 --- a/toolbox/__init__.py +++ b/toolbox/__init__.py @@ -34,6 +34,9 @@ recognized_datasets = [ "VCTK-Corpus/wav48", ] +#Maximum of generated wavs to keep on memory +MAX_WAVES = 15 + class Toolbox: def __init__(self, datasets_root, enc_models_dir, syn_models_dir, voc_models_dir, low_mem): sys.excepthook = self.excepthook @@ -43,6 +46,10 @@ class Toolbox: self.current_generated = (None, None, None, None) # speaker_name, spec, breaks, wav self.synthesizer = None # type: Synthesizer + self.current_wav = None + self.waves_list = [] + self.waves_count = 0 + self.waves_namelist = [] # Initialize the events and the interface self.ui = UI() @@ -82,8 +89,17 @@ class Toolbox: self.ui.play_button.clicked.connect(func) self.ui.stop_button.clicked.connect(self.ui.stop) self.ui.record_button.clicked.connect(self.record) + + #Audio self.ui.setup_audio_devices(Synthesizer.sample_rate) + #Wav playback & save + func = lambda: self.replay_last_wav() + self.ui.replay_wav_button.clicked.connect(func) + func = lambda: self.export_current_wave() + self.ui.export_wav_button.clicked.connect(func) + self.ui.waves_cb.currentIndexChanged.connect(self.set_current_wav) + # Generation func = lambda: self.synthesize() or self.vocode() self.ui.generate_button.clicked.connect(func) @@ -93,6 +109,15 @@ class Toolbox: # UMAP legend self.ui.clear_button.clicked.connect(self.clear_utterances) + def set_current_wav(self, index): + self.current_wav = self.waves_list[index] + + def export_current_wave(self): + self.ui.save_audio_file(self.current_wav, Synthesizer.sample_rate) + + def replay_last_wav(self): + self.ui.play(self.current_wav, Synthesizer.sample_rate) + def reset_ui(self, encoder_models_dir, synthesizer_models_dir, vocoder_models_dir): self.ui.populate_browser(self.datasets_root, recognized_datasets, 0, True) self.ui.populate_models(encoder_models_dir, synthesizer_models_dir, vocoder_models_dir) @@ -212,6 +237,30 @@ class Toolbox: wav = wav / np.abs(wav).max() * 0.97 self.ui.play(wav, Synthesizer.sample_rate) + # Name it (history displayed in combobox) + # TODO better naming for the combobox items? + wav_name = str(self.waves_count + 1) + + #Update waves combobox + self.waves_count += 1 + if self.waves_count > MAX_WAVES: + self.waves_list.pop() + self.waves_namelist.pop() + self.waves_list.insert(0, wav) + self.waves_namelist.insert(0, wav_name) + + self.ui.waves_cb.disconnect() + self.ui.waves_cb_model.setStringList(self.waves_namelist) + self.ui.waves_cb.setCurrentIndex(0) + self.ui.waves_cb.currentIndexChanged.connect(self.set_current_wav) + + # Update current wav + self.set_current_wav(0) + + #Enable replay and save buttons: + self.ui.replay_wav_button.setDisabled(False) + self.ui.export_wav_button.setDisabled(False) + # Compute the embedding # TODO: this is problematic with different sampling rates, gotta fix it if not encoder.is_loaded(): diff --git a/toolbox/ui.py b/toolbox/ui.py index 831000b..a3ecd07 100644 --- a/toolbox/ui.py +++ b/toolbox/ui.py @@ -1,12 +1,13 @@ from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas from matplotlib.figure import Figure -from PyQt5.QtCore import Qt +from PyQt5.QtCore import Qt, QStringListModel from PyQt5.QtWidgets import * from encoder.inference import plot_embedding_as_heatmap from toolbox.utterance import Utterance from pathlib import Path from typing import List, Set import sounddevice as sd +import soundfile as sf import matplotlib.pyplot as plt import numpy as np # from sklearn.manifold import TSNE # You can try with TSNE if you like, I prefer UMAP @@ -137,7 +138,21 @@ class UI(QDialog): self.umap_ax.set_yticks([]) self.umap_ax.figure.canvas.draw() - def setup_audio_devices(self,sample_rate): + def save_audio_file(self, wav, sample_rate): + dialog = QFileDialog() + dialog.setDefaultSuffix(".wav") + fpath, _ = dialog.getSaveFileName( + parent=self, + caption="Select a path to save the audio file", + filter="Audio Files (*.flac *.wav)" + ) + if fpath: + #Default format is wav + if Path(fpath).suffix == "": + fpath += ".wav" + sf.write(fpath, wav, sample_rate) + + def setup_audio_devices(self, sample_rate): input_devices = [] output_devices = [] for device in sd.query_devices(): @@ -389,6 +404,8 @@ class UI(QDialog): self.generate_button.setDisabled(True) self.synthesize_button.setDisabled(True) self.vocode_button.setDisabled(True) + self.replay_wav_button.setDisabled(True) + self.export_wav_button.setDisabled(True) [self.log("") for _ in range(self.max_log_lines)] def __init__(self): @@ -537,6 +554,22 @@ class UI(QDialog): layout.addWidget(self.vocode_button) gen_layout.addLayout(layout) + + #Replay & Save Audio + layout2 = QHBoxLayout() + self.replay_wav_button = QPushButton("Replay") + self.replay_wav_button.setToolTip("Replay last generated vocoder") + layout2.addWidget(self.replay_wav_button) + self.export_wav_button = QPushButton("Export") + self.export_wav_button.setToolTip("Save last generated vocoder audio in filesystem as a wav file") + layout2.addWidget(self.export_wav_button) + self.waves_cb_model = QStringListModel() + self.waves_cb = QComboBox() + self.waves_cb.setModel(self.waves_cb_model) + self.waves_cb.setToolTip("Select one of the last generated waves in this section for replaying or exporting") + layout2.addWidget(self.waves_cb) + gen_layout.addLayout(layout2) + self.loading_bar = QProgressBar() gen_layout.addWidget(self.loading_bar) -- GitLab