Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
mrywhh
Real-Time-Voice-Cloning
提交
60e9d6e4
R
Real-Time-Voice-Cloning
项目概览
mrywhh
/
Real-Time-Voice-Cloning
落后 Fork 源项目 12 个版本
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
R
Real-Time-Voice-Cloning
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
60e9d6e4
编写于
2月 14, 2019
作者:
C
Corentin Jemine
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fixed error cases of encoder inference
上级
4d21f6b0
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
32 addition
and
16 deletion
+32
-16
encoder/data_objects/speaker.py
encoder/data_objects/speaker.py
+1
-1
encoder/data_objects/speaker_batch.py
encoder/data_objects/speaker_batch.py
+1
-1
encoder/data_objects/speaker_verification_dataset.py
encoder/data_objects/speaker_verification_dataset.py
+2
-2
encoder/inference.py
encoder/inference.py
+27
-11
encoder/ui/speaker_matrix_ui.py
encoder/ui/speaker_matrix_ui.py
+1
-1
未找到文件。
encoder/data_objects/speaker.py
浏览文件 @
60e9d6e4
from
vlibs
import
fileio
from
vlibs.structs.random_cycler
import
RandomCycler
from
.
.data_objects.
utterance
import
Utterance
from
.utterance
import
Utterance
# Contains the set of utterances of a single speaker
class
Speaker
:
...
...
encoder/data_objects/speaker_batch.py
浏览文件 @
60e9d6e4
from
typing
import
List
import
numpy
as
np
from
.
.data_objects.
speaker
import
Speaker
from
.speaker
import
Speaker
from
..params_data
import
mel_n_channels
class
SpeakerBatch
:
...
...
encoder/data_objects/speaker_verification_dataset.py
浏览文件 @
60e9d6e4
...
...
@@ -4,8 +4,8 @@ from collections import OrderedDict
from
vlibs
import
fileio
import
numpy
as
np
import
random
from
.
.data_objects.
speaker_batch
import
SpeakerBatch
from
.
.data_objects.
speaker
import
Speaker
from
.speaker_batch
import
SpeakerBatch
from
.speaker
import
Speaker
from
..params_data
import
partial_utterance_n_frames
from
..config
import
*
...
...
encoder/inference.py
浏览文件 @
60e9d6e4
...
...
@@ -43,24 +43,40 @@ def embed_frames_batch(frames_batch):
def
compute_partial_splits
(
n_samples
,
partial_utterance_n_frames
=
partial_utterance_n_frames
,
min_pad_coverage
=
0.75
,
overlap
=
0.5
):
"""
Computes
:param n_samples:
:param partial_utterance_n_frames:
:param min_pad_coverage:
:param overlap:
:return:
Computes where to split an utterance waveform and its corresponding mel spectrogram to obtain
partial utterances of <partial_utterance_n_frames> each. Both the waveform and the mel
spectrogram splits are returned, so as to make each partial utterance waveform correspond to
its spectrogram. This function assumes that the mel spectrogram parameters used are those
defined in params_data.py.
The returned ranges may be indexing further than the length of the waveform. It is
recommended that you pad the waveform with zeros up to wave_splits[-1].stop.
:param n_samples: the number of samples in the waveform
:param partial_utterance_n_frames: the number of mel spectrogram frames in each partial
utterance
:param min_pad_coverage: when reaching the last partial utterance, it may or may not have
enough frames. If at least <min_pad_coverage> of <partial_utterance_n_frames> are present,
then the last partial utterance will be considered, as if we padded the audio. Otherwise,
it will be discarded, as if we trimmed the audio. If there aren't enough frames for 1 partial
utterance, this parameter is ignored so that the function always returns at least 1 split.
:param overlap: by how much the partial utterance should overlap. If set to 0, the partial
utterances are entirely disjoint.
:return: the waveform splits and mel spectrogram splits as lists of array slices. Index
respectively the waveform and the mel spectrogram with these slices to obtain the partial
utterances.
"""
assert
0
<=
overlap
<
1
assert
0
<
min_pad_coverage
<=
1
samples_per_frame
=
int
((
sampling_rate
*
mel_window_step
/
1000
))
n_frames
=
int
(
np
.
ceil
((
n_samples
+
1
)
/
samples_per_frame
))
frame_step
=
int
(
np
.
round
(
partial_utterance_n_frames
*
(
1
-
overlap
))
)
frame_step
=
max
(
int
(
np
.
round
(
partial_utterance_n_frames
*
(
1
-
overlap
))),
1
)
# Compute the splits
wave_splits
,
mel_splits
=
[],
[]
for
i
in
range
(
0
,
n_frames
-
partial_utterance_n_frames
+
frame_step
+
1
,
frame_step
):
steps
=
max
(
1
,
n_frames
-
partial_utterance_n_frames
+
frame_step
+
1
)
for
i
in
range
(
0
,
steps
,
frame_step
):
mel_range
=
np
.
array
([
i
,
i
+
partial_utterance_n_frames
])
wave_range
=
mel_range
*
samples_per_frame
mel_splits
.
append
(
slice
(
*
mel_range
))
...
...
@@ -69,7 +85,7 @@ def compute_partial_splits(n_samples, partial_utterance_n_frames=partial_utteran
# Evaluate whether extra padding is warranted or not
last_wave_range
=
wave_splits
[
-
1
]
coverage
=
(
n_samples
-
last_wave_range
.
start
)
/
(
last_wave_range
.
stop
-
last_wave_range
.
start
)
if
coverage
<
min_pad_coverage
:
if
coverage
<
min_pad_coverage
and
len
(
mel_splits
)
>
1
:
mel_splits
=
mel_splits
[:
-
1
]
wave_splits
=
wave_splits
[:
-
1
]
...
...
@@ -122,7 +138,7 @@ def embed_utterance(wave, using_partials=True, return_partial_embeds=False,
out
.
append
(
partial_embeds
)
if
return_wave_splits
:
out
.
append
(
wave_splits
)
return
tuple
(
out
)
return
out
[
0
]
if
len
(
out
)
==
1
else
tuple
(
out
)
def
embed_stream
(
stream
,
partial_utterance_n_frames
=
partial_utterance_n_frames
,
overlap
=
0.5
):
pass
...
...
encoder/ui/speaker_matrix_ui.py
浏览文件 @
60e9d6e4
...
...
@@ -5,9 +5,9 @@ from PyQt4 import QtGui
import
numpy
as
np
import
librosa
import
sys
from
..
import
audio
from
..params_data
import
sampling_rate
,
mel_window_step
from
..preprocess
import
preprocess_wave
from
..
import
audio
class
SpeakerMatrixUI
(
QtGui
.
QDialog
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录