Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
mrywhh
Real-Time-Voice-Cloning
提交
f8dc5fce
R
Real-Time-Voice-Cloning
项目概览
mrywhh
/
Real-Time-Voice-Cloning
落后 Fork 源项目 12 个版本
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
R
Real-Time-Voice-Cloning
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
f8dc5fce
编写于
2月 11, 2019
作者:
C
Corentin Jemine
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Included VC2 as a dataset for the speaker encoder
上级
5396a54a
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
63 addition
and
12 deletion
+63
-12
SV2TTS/audio.py
SV2TTS/audio.py
+1
-1
SV2TTS/config.py
SV2TTS/config.py
+2
-1
SV2TTS/debug.py
SV2TTS/debug.py
+2
-2
SV2TTS/model.py
SV2TTS/model.py
+1
-4
SV2TTS/preprocess.py
SV2TTS/preprocess.py
+56
-1
SV2TTS/train.py
SV2TTS/train.py
+1
-3
未找到文件。
SV2TTS/audio.py
浏览文件 @
f8dc5fce
...
...
@@ -8,7 +8,7 @@ import struct
from
scipy.ndimage.morphology
import
binary_dilation
from
params_data
import
*
int16_max
=
32768
int16_max
=
(
2
**
15
)
-
1
def
load
(
fpath
):
"""
...
...
SV2TTS/config.py
浏览文件 @
f8dc5fce
...
...
@@ -6,7 +6,8 @@ project_root = fileio.abspath(fileio.leafdir(__file__))
librispeech_root
=
"E://Datasets/LibriSpeech"
librispeech_datasets
=
[
"train-other-500"
]
voxceleb1_root
=
"E://Datasets/VoxCeleb1"
voxceleb_datasets
=
[
"voxceleb1"
]
voxceleb2_root
=
"E://Datasets/VoxCeleb2"
voxceleb_datasets
=
[
"voxceleb1"
,
"voxceleb2"
]
anglophone_nationalites
=
[
'australia'
,
'canada'
,
'ireland'
,
'uk'
,
'usa'
]
clean_data_root
=
"E://Datasets//SpeakerEncoder"
all_datasets
=
librispeech_datasets
+
voxceleb_datasets
...
...
SV2TTS/debug.py
浏览文件 @
f8dc5fce
...
...
@@ -7,8 +7,8 @@ from data_objects.speaker_verification_dataset import SpeakerVerificationDataset
from
config
import
*
if
__name__
==
'__main__'
:
dataset
=
SpeakerVerificationDataset
(
all_datasets
)
loader
=
SpeakerVerificationDataLoader
(
dataset
,
3
,
4
,
num_workers
=
3
)
dataset
=
SpeakerVerificationDataset
(
[
'voxceleb2'
]
)
loader
=
SpeakerVerificationDataLoader
(
dataset
,
4
,
5
,
num_workers
=
3
)
for
batch
in
loader
:
SpeakerMatrixUI
(
batch
.
speakers
,
batch
.
partial_utterances
)
\ No newline at end of file
SV2TTS/model.py
浏览文件 @
f8dc5fce
...
...
@@ -99,16 +99,13 @@ class SpeakerEncoder(nn.Module):
loss
=
self
.
loss_fn
(
sim_matrix
,
torch
.
from_numpy
(
ground_truth
).
long
())
# EER (not backpropagated)
sim_matrix
=
sim_matrix
.
detach
().
numpy
()
with
torch
.
no_grad
():
## Imabalanced EER
inv_argmax
=
lambda
i
:
np
.
eye
(
1
,
speakers_per_batch
,
i
,
dtype
=
np
.
int
)[
0
]
labels
=
np
.
array
([
inv_argmax
(
i
)
for
i
in
ground_truth
])
preds
=
sim_matrix
preds
=
sim_matrix
.
detach
().
numpy
()
# Snippet from https://yangcha.github.io/EER-ROC/
fpr
,
tpr
,
thresholds
=
roc_curve
(
labels
.
flatten
(),
preds
.
flatten
())
eer
=
brentq
(
lambda
x
:
1.
-
x
-
interp1d
(
fpr
,
tpr
)(
x
),
0.
,
1.
)
# thresh = interp1d(fpr, thresholds)(eer)
return
loss
,
eer
\ No newline at end of file
SV2TTS/preprocess.py
浏览文件 @
f8dc5fce
import
sys
from
vlibs.ui
import
console
from
vlibs
import
fileio
from
config
import
*
...
...
@@ -168,6 +169,60 @@ def preprocess_voxceleb1(n_speakers=None, n_utterances=None):
logger
.
finalize
()
def
preprocess_voxceleb2
(
n_speakers
=
None
,
n_utterances
=
None
):
fileio
.
ensure_dir
(
clean_data_root
)
dataset_name
=
"voxceleb2"
out_dir
=
fileio
.
ensure_dir
(
fileio
.
join
(
clean_data_root
,
dataset_name
))
logger
=
DatasetLog
(
clean_data_root
,
dataset_name
)
# Get the speaker directories
speakers_root
=
fileio
.
join
(
voxceleb2_root
,
"dev"
,
"aac"
)
speaker_ids
=
fileio
.
listdir
(
speakers_root
)[:
n_speakers
]
print
(
"Preprocessing data for %d speakers."
%
len
(
speaker_ids
))
# Function to preprocess utterances for one speaker
def
preprocess_speaker
(
speaker_id
):
print
(
"Starting speaker %s"
%
speaker_id
)
speaker_name
=
"VoxCeleb2_%s"
%
speaker_id
speaker_in_dir
=
fileio
.
join
(
speakers_root
,
speaker_id
)
speaker_out_dir
=
fileio
.
ensure_dir
(
fileio
.
join
(
out_dir
,
speaker_name
))
fileio
.
resetdir
(
speaker_out_dir
)
sources_file
=
open
(
fileio
.
join
(
speaker_out_dir
,
"sources.txt"
),
'w'
)
fpaths
=
fileio
.
get_files
(
speaker_in_dir
,
r
"\.m4a"
,
recursive
=
True
)[:
n_utterances
]
for
i
,
in_fpath
in
enumerate
(
fpaths
):
# Load and preprocess the waveform
wave
=
audio
.
load
(
in_fpath
)
wave
=
preprocess_wave
(
wave
)
if
len
(
wave
)
==
0
:
print
(
'Warning: audio file %s is entirely silent after processing.'
%
in_fpath
,
file
=
sys
.
stderr
)
continue
# Create and save the mel spectrogram
frames
=
audio
.
wave_to_mel_filterbank
(
wave
)
if
len
(
frames
)
<
partial_utterance_length
:
continue
video_id
=
fileio
.
leaf
(
fileio
.
leafdir
(
in_fpath
))
fname
=
video_id
+
'_'
+
fileio
.
leaf
(
in_fpath
).
replace
(
".m4a"
,
".npy"
)
out_fpath
=
fileio
.
join
(
speaker_out_dir
,
fname
)
np
.
save
(
out_fpath
,
frames
)
logger
.
add_sample
(
duration
=
len
(
wave
)
/
sampling_rate
)
sources_file
.
write
(
"%s %s
\n
"
%
(
fname
,
in_fpath
))
sources_file
.
close
()
print
(
"Speaker %s done!"
%
speaker_id
)
# Process the utterances for each speaker
with
ThreadPool
(
8
)
as
pool
:
list
(
pool
.
imap
(
preprocess_speaker
,
speaker_ids
))
logger
.
finalize
()
if
__name__
==
'__main__'
:
# preprocess_librispeech()
preprocess_voxceleb1
()
# preprocess_voxceleb1()
preprocess_voxceleb2
()
SV2TTS/train.py
浏览文件 @
f8dc5fce
...
...
@@ -10,8 +10,8 @@ import torch
# Specify the run ID here. Note: visdom will group together run IDs starting with the same prefix
# followed by an underscore.
run_id
=
None
run_id
=
'first_debug'
run_id
=
'debug_eer2'
run_id
=
'all'
implementation_doc
=
{
'Lr decay'
:
None
,
...
...
@@ -36,7 +36,6 @@ if __name__ == '__main__':
# Create the model and the optimizer
model
=
SpeakerEncoder
()
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
(),
lr
=
learning_rate_init
)
# scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, exponential_decay_beta)
init_step
=
1
# Load any existing model
...
...
@@ -72,7 +71,6 @@ if __name__ == '__main__':
loss
.
backward
()
model
.
do_gradient_ops
()
optimizer
.
step
()
# scheduler.step()
# Update visualizations
learning_rate
=
optimizer
.
param_groups
[
0
][
'lr'
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录