Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
mrywhh
Real-Time-Voice-Cloning
提交
6944770f
R
Real-Time-Voice-Cloning
项目概览
mrywhh
/
Real-Time-Voice-Cloning
落后 Fork 源项目 12 个版本
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
R
Real-Time-Voice-Cloning
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
6944770f
编写于
7月 08, 2020
作者:
M
Matheus Fillipe
提交者:
GitHub
7月 08, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Export and replay generated wavs in toolbox (#402)
上级
f49f64f6
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
85 addition
and
2 deletion
+85
-2
requirements.txt
requirements.txt
+1
-0
toolbox/__init__.py
toolbox/__init__.py
+49
-0
toolbox/ui.py
toolbox/ui.py
+35
-2
未找到文件。
requirements.txt
浏览文件 @
6944770f
...
@@ -7,6 +7,7 @@ numpy>=1.14.0
...
@@ -7,6 +7,7 @@ numpy>=1.14.0
scipy
>=1.0.0
scipy
>=1.0.0
tqdm
tqdm
sounddevice
sounddevice
SoundFile
Unidecode
Unidecode
inflect
inflect
PyQt5
PyQt5
...
...
toolbox/__init__.py
浏览文件 @
6944770f
...
@@ -34,6 +34,9 @@ recognized_datasets = [
...
@@ -34,6 +34,9 @@ recognized_datasets = [
"VCTK-Corpus/wav48"
,
"VCTK-Corpus/wav48"
,
]
]
#Maximum of generated wavs to keep on memory
MAX_WAVES
=
15
class
Toolbox
:
class
Toolbox
:
def
__init__
(
self
,
datasets_root
,
enc_models_dir
,
syn_models_dir
,
voc_models_dir
,
low_mem
):
def
__init__
(
self
,
datasets_root
,
enc_models_dir
,
syn_models_dir
,
voc_models_dir
,
low_mem
):
sys
.
excepthook
=
self
.
excepthook
sys
.
excepthook
=
self
.
excepthook
...
@@ -43,6 +46,10 @@ class Toolbox:
...
@@ -43,6 +46,10 @@ class Toolbox:
self
.
current_generated
=
(
None
,
None
,
None
,
None
)
# speaker_name, spec, breaks, wav
self
.
current_generated
=
(
None
,
None
,
None
,
None
)
# speaker_name, spec, breaks, wav
self
.
synthesizer
=
None
# type: Synthesizer
self
.
synthesizer
=
None
# type: Synthesizer
self
.
current_wav
=
None
self
.
waves_list
=
[]
self
.
waves_count
=
0
self
.
waves_namelist
=
[]
# Initialize the events and the interface
# Initialize the events and the interface
self
.
ui
=
UI
()
self
.
ui
=
UI
()
...
@@ -82,8 +89,17 @@ class Toolbox:
...
@@ -82,8 +89,17 @@ class Toolbox:
self
.
ui
.
play_button
.
clicked
.
connect
(
func
)
self
.
ui
.
play_button
.
clicked
.
connect
(
func
)
self
.
ui
.
stop_button
.
clicked
.
connect
(
self
.
ui
.
stop
)
self
.
ui
.
stop_button
.
clicked
.
connect
(
self
.
ui
.
stop
)
self
.
ui
.
record_button
.
clicked
.
connect
(
self
.
record
)
self
.
ui
.
record_button
.
clicked
.
connect
(
self
.
record
)
#Audio
self
.
ui
.
setup_audio_devices
(
Synthesizer
.
sample_rate
)
self
.
ui
.
setup_audio_devices
(
Synthesizer
.
sample_rate
)
#Wav playback & save
func
=
lambda
:
self
.
replay_last_wav
()
self
.
ui
.
replay_wav_button
.
clicked
.
connect
(
func
)
func
=
lambda
:
self
.
export_current_wave
()
self
.
ui
.
export_wav_button
.
clicked
.
connect
(
func
)
self
.
ui
.
waves_cb
.
currentIndexChanged
.
connect
(
self
.
set_current_wav
)
# Generation
# Generation
func
=
lambda
:
self
.
synthesize
()
or
self
.
vocode
()
func
=
lambda
:
self
.
synthesize
()
or
self
.
vocode
()
self
.
ui
.
generate_button
.
clicked
.
connect
(
func
)
self
.
ui
.
generate_button
.
clicked
.
connect
(
func
)
...
@@ -93,6 +109,15 @@ class Toolbox:
...
@@ -93,6 +109,15 @@ class Toolbox:
# UMAP legend
# UMAP legend
self
.
ui
.
clear_button
.
clicked
.
connect
(
self
.
clear_utterances
)
self
.
ui
.
clear_button
.
clicked
.
connect
(
self
.
clear_utterances
)
def
set_current_wav
(
self
,
index
):
self
.
current_wav
=
self
.
waves_list
[
index
]
def
export_current_wave
(
self
):
self
.
ui
.
save_audio_file
(
self
.
current_wav
,
Synthesizer
.
sample_rate
)
def
replay_last_wav
(
self
):
self
.
ui
.
play
(
self
.
current_wav
,
Synthesizer
.
sample_rate
)
def
reset_ui
(
self
,
encoder_models_dir
,
synthesizer_models_dir
,
vocoder_models_dir
):
def
reset_ui
(
self
,
encoder_models_dir
,
synthesizer_models_dir
,
vocoder_models_dir
):
self
.
ui
.
populate_browser
(
self
.
datasets_root
,
recognized_datasets
,
0
,
True
)
self
.
ui
.
populate_browser
(
self
.
datasets_root
,
recognized_datasets
,
0
,
True
)
self
.
ui
.
populate_models
(
encoder_models_dir
,
synthesizer_models_dir
,
vocoder_models_dir
)
self
.
ui
.
populate_models
(
encoder_models_dir
,
synthesizer_models_dir
,
vocoder_models_dir
)
...
@@ -212,6 +237,30 @@ class Toolbox:
...
@@ -212,6 +237,30 @@ class Toolbox:
wav
=
wav
/
np
.
abs
(
wav
).
max
()
*
0.97
wav
=
wav
/
np
.
abs
(
wav
).
max
()
*
0.97
self
.
ui
.
play
(
wav
,
Synthesizer
.
sample_rate
)
self
.
ui
.
play
(
wav
,
Synthesizer
.
sample_rate
)
# Name it (history displayed in combobox)
# TODO better naming for the combobox items?
wav_name
=
str
(
self
.
waves_count
+
1
)
#Update waves combobox
self
.
waves_count
+=
1
if
self
.
waves_count
>
MAX_WAVES
:
self
.
waves_list
.
pop
()
self
.
waves_namelist
.
pop
()
self
.
waves_list
.
insert
(
0
,
wav
)
self
.
waves_namelist
.
insert
(
0
,
wav_name
)
self
.
ui
.
waves_cb
.
disconnect
()
self
.
ui
.
waves_cb_model
.
setStringList
(
self
.
waves_namelist
)
self
.
ui
.
waves_cb
.
setCurrentIndex
(
0
)
self
.
ui
.
waves_cb
.
currentIndexChanged
.
connect
(
self
.
set_current_wav
)
# Update current wav
self
.
set_current_wav
(
0
)
#Enable replay and save buttons:
self
.
ui
.
replay_wav_button
.
setDisabled
(
False
)
self
.
ui
.
export_wav_button
.
setDisabled
(
False
)
# Compute the embedding
# Compute the embedding
# TODO: this is problematic with different sampling rates, gotta fix it
# TODO: this is problematic with different sampling rates, gotta fix it
if
not
encoder
.
is_loaded
():
if
not
encoder
.
is_loaded
():
...
...
toolbox/ui.py
浏览文件 @
6944770f
from
matplotlib.backends.backend_qt5agg
import
FigureCanvasQTAgg
as
FigureCanvas
from
matplotlib.backends.backend_qt5agg
import
FigureCanvasQTAgg
as
FigureCanvas
from
matplotlib.figure
import
Figure
from
matplotlib.figure
import
Figure
from
PyQt5.QtCore
import
Qt
from
PyQt5.QtCore
import
Qt
,
QStringListModel
from
PyQt5.QtWidgets
import
*
from
PyQt5.QtWidgets
import
*
from
encoder.inference
import
plot_embedding_as_heatmap
from
encoder.inference
import
plot_embedding_as_heatmap
from
toolbox.utterance
import
Utterance
from
toolbox.utterance
import
Utterance
from
pathlib
import
Path
from
pathlib
import
Path
from
typing
import
List
,
Set
from
typing
import
List
,
Set
import
sounddevice
as
sd
import
sounddevice
as
sd
import
soundfile
as
sf
import
matplotlib.pyplot
as
plt
import
matplotlib.pyplot
as
plt
import
numpy
as
np
import
numpy
as
np
# from sklearn.manifold import TSNE # You can try with TSNE if you like, I prefer UMAP
# from sklearn.manifold import TSNE # You can try with TSNE if you like, I prefer UMAP
...
@@ -137,7 +138,21 @@ class UI(QDialog):
...
@@ -137,7 +138,21 @@ class UI(QDialog):
self
.
umap_ax
.
set_yticks
([])
self
.
umap_ax
.
set_yticks
([])
self
.
umap_ax
.
figure
.
canvas
.
draw
()
self
.
umap_ax
.
figure
.
canvas
.
draw
()
def
setup_audio_devices
(
self
,
sample_rate
):
def
save_audio_file
(
self
,
wav
,
sample_rate
):
dialog
=
QFileDialog
()
dialog
.
setDefaultSuffix
(
".wav"
)
fpath
,
_
=
dialog
.
getSaveFileName
(
parent
=
self
,
caption
=
"Select a path to save the audio file"
,
filter
=
"Audio Files (*.flac *.wav)"
)
if
fpath
:
#Default format is wav
if
Path
(
fpath
).
suffix
==
""
:
fpath
+=
".wav"
sf
.
write
(
fpath
,
wav
,
sample_rate
)
def
setup_audio_devices
(
self
,
sample_rate
):
input_devices
=
[]
input_devices
=
[]
output_devices
=
[]
output_devices
=
[]
for
device
in
sd
.
query_devices
():
for
device
in
sd
.
query_devices
():
...
@@ -389,6 +404,8 @@ class UI(QDialog):
...
@@ -389,6 +404,8 @@ class UI(QDialog):
self
.
generate_button
.
setDisabled
(
True
)
self
.
generate_button
.
setDisabled
(
True
)
self
.
synthesize_button
.
setDisabled
(
True
)
self
.
synthesize_button
.
setDisabled
(
True
)
self
.
vocode_button
.
setDisabled
(
True
)
self
.
vocode_button
.
setDisabled
(
True
)
self
.
replay_wav_button
.
setDisabled
(
True
)
self
.
export_wav_button
.
setDisabled
(
True
)
[
self
.
log
(
""
)
for
_
in
range
(
self
.
max_log_lines
)]
[
self
.
log
(
""
)
for
_
in
range
(
self
.
max_log_lines
)]
def
__init__
(
self
):
def
__init__
(
self
):
...
@@ -537,6 +554,22 @@ class UI(QDialog):
...
@@ -537,6 +554,22 @@ class UI(QDialog):
layout
.
addWidget
(
self
.
vocode_button
)
layout
.
addWidget
(
self
.
vocode_button
)
gen_layout
.
addLayout
(
layout
)
gen_layout
.
addLayout
(
layout
)
#Replay & Save Audio
layout2
=
QHBoxLayout
()
self
.
replay_wav_button
=
QPushButton
(
"Replay"
)
self
.
replay_wav_button
.
setToolTip
(
"Replay last generated vocoder"
)
layout2
.
addWidget
(
self
.
replay_wav_button
)
self
.
export_wav_button
=
QPushButton
(
"Export"
)
self
.
export_wav_button
.
setToolTip
(
"Save last generated vocoder audio in filesystem as a wav file"
)
layout2
.
addWidget
(
self
.
export_wav_button
)
self
.
waves_cb_model
=
QStringListModel
()
self
.
waves_cb
=
QComboBox
()
self
.
waves_cb
.
setModel
(
self
.
waves_cb_model
)
self
.
waves_cb
.
setToolTip
(
"Select one of the last generated waves in this section for replaying or exporting"
)
layout2
.
addWidget
(
self
.
waves_cb
)
gen_layout
.
addLayout
(
layout2
)
self
.
loading_bar
=
QProgressBar
()
self
.
loading_bar
=
QProgressBar
()
gen_layout
.
addWidget
(
self
.
loading_bar
)
gen_layout
.
addWidget
(
self
.
loading_bar
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录