Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
mrywhh
Real-Time-Voice-Cloning
提交
4cd60d96
R
Real-Time-Voice-Cloning
项目概览
mrywhh
/
Real-Time-Voice-Cloning
落后 Fork 源项目 12 个版本
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
R
Real-Time-Voice-Cloning
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
4cd60d96
编写于
3月 13, 2019
作者:
C
Corentin Jemine
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Setup the training configuration for ASR
上级
6ac1ff26
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
48 addition
and
57 deletion
+48
-57
.gitignore
.gitignore
+2
-1
tacotron2/hparams.py
tacotron2/hparams.py
+2
-2
tacotron2/preprocess.py
tacotron2/preprocess.py
+2
-2
tacotron2/synthesize.py
tacotron2/synthesize.py
+5
-32
tacotron2/tacotron/synthesize.py
tacotron2/tacotron/synthesize.py
+0
-3
tacotron2/tacotron/train.py
tacotron2/tacotron/train.py
+9
-15
tacotron2/temp.py
tacotron2/temp.py
+26
-0
tacotron2/train.py
tacotron2/train.py
+2
-2
未找到文件。
.gitignore
浏览文件 @
4cd60d96
...
...
@@ -17,4 +17,5 @@ encoder/saved_models/*_backups
tacotron2/logs-*
waveglow
torch-tacotron2
wave-rnn/checkpoints
\ No newline at end of file
wave-rnn/checkpoints
wave-rnn/model_outputs
\ No newline at end of file
tacotron2/hparams.py
浏览文件 @
4cd60d96
...
...
@@ -103,7 +103,7 @@ hparams = tf.contrib.training.HParams(
clip_mels_length
=
True
,
# For cases of OOM (Not really recommended, only use if facing unsolvable OOM errors,
# also consider clipping your samples to smaller chunks)
max_mel_frames
=
12
00
,
max_mel_frames
=
9
00
,
# Only relevant when clip_mels_length = True, please only use after trying output_per_steps=3
# and still getting OOM errors.
...
...
@@ -252,7 +252,7 @@ hparams = tf.contrib.training.HParams(
# major slowdowns! Only use when critical!)
# train/test split ratios, mini-batches sizes
tacotron_batch_size
=
19
,
# number of training samples on each training steps (was 32)
tacotron_batch_size
=
25
,
# number of training samples on each training steps (was 32)
# Tacotron Batch synthesis supports ~16x the training batch size (no gradients during
# testing).
# Training Tacotron with unmasked paddings makes it aware of them, which makes synthesis times
...
...
tacotron2/preprocess.py
浏览文件 @
4cd60d96
...
...
@@ -54,11 +54,11 @@ def main():
parser
.
add_argument
(
'--base_dir'
,
default
=
''
)
parser
.
add_argument
(
'--hparams'
,
default
=
''
,
help
=
'Hyperparameter overrides as a comma-separated list of name=value pairs'
)
parser
.
add_argument
(
'--output'
,
default
=
'Synthesizer
3
'
)
parser
.
add_argument
(
'--output'
,
default
=
'Synthesizer'
)
parser
.
add_argument
(
'--n_jobs'
,
type
=
int
,
default
=
cpu_count
())
# Name of the LibriSpeech sets to use, separated by spaces
# (e.g. "--sets train-other-500 train-clean-360). Defaults to using all the training sets
# (e.g. "--sets train-other-500 train-clean-360). Defaults to using all the
clean
training sets
# present in the LibriSpeech directory.
parser
.
add_argument
(
'--sets'
,
type
=
str
,
nargs
=
'+'
,
default
=
None
)
...
...
tacotron2/synthesize.py
浏览文件 @
4cd60d96
...
...
@@ -8,17 +8,16 @@ import tensorflow as tf
from
hparams
import
hparams
from
infolog
import
log
from
tacotron.synthesize
import
tacotron_synthesize
from
wavenet_vocoder.synthesize
import
wavenet_synthesize
def
prepare_run
(
args
):
modified_hp
=
hparams
.
parse
(
args
.
hparams
)
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'2'
run_name
=
args
.
name
or
args
.
tacotron_name
or
args
.
model
run_name
=
args
.
name
taco_checkpoint
=
os
.
path
.
join
(
'logs-'
+
run_name
,
'taco_'
+
args
.
checkpoint
)
run_name
=
args
.
name
or
args
.
wavenet_name
or
args
.
model
run_name
=
args
.
name
wave_checkpoint
=
os
.
path
.
join
(
'logs-'
+
run_name
,
'wave_'
+
args
.
checkpoint
)
return
taco_checkpoint
,
wave_checkpoint
,
modified_hp
...
...
@@ -33,7 +32,7 @@ def get_sentences(args):
def
synthesize
(
args
,
hparams
,
taco_checkpoint
,
wave_checkpoint
,
sentences
):
log
(
'Running End-to-End TTS Evaluation. Model: {}'
.
format
(
args
.
name
or
args
.
model
))
log
(
'Running End-to-End TTS Evaluation. Model: {}'
.
format
(
args
.
name
))
log
(
'Synthesizing mel-spectrograms from text..'
)
wavenet_in_dir
=
tacotron_synthesize
(
args
,
hparams
,
taco_checkpoint
,
sentences
)
# Delete Tacotron model from graph
...
...
@@ -42,7 +41,7 @@ def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
# synthesizing
sleep
(
0.5
)
log
(
'Synthesizing audio from mel-spectrograms.. (This may take a while)'
)
wavenet_synthesize
(
args
,
hparams
,
wave_checkpoint
)
raise
NotImplemented
(
)
log
(
'Tacotron-2 TTS synthesis complete!'
)
...
...
@@ -80,43 +79,17 @@ def main():
'ids'
)
args
=
parser
.
parse_args
()
accepted_models
=
[
'Tacotron'
,
'WaveNet'
,
'Tacotron-2'
]
if
args
.
model
not
in
accepted_models
:
raise
ValueError
(
'please enter a valid model to synthesize with: {}'
.
format
(
accepted_models
))
if
args
.
mode
not
in
accepted_modes
:
raise
ValueError
(
'accepted modes are: {}, found {}'
.
format
(
accepted_modes
,
args
.
mode
))
if
args
.
mode
==
'live'
and
args
.
model
==
'Wavenet'
:
raise
RuntimeError
(
'Wavenet vocoder cannot be tested live due to its slow generation. Live only works '
'with Tacotron!'
)
if
args
.
GTA
not
in
(
'True'
,
'False'
):
raise
ValueError
(
'GTA option must be either True or False'
)
if
args
.
model
==
'Tacotron-2'
:
if
args
.
mode
==
'live'
:
warn
(
'Requested a live evaluation with Tacotron-2, Wavenet will not be used!'
)
if
args
.
mode
==
'synthesis'
:
raise
ValueError
(
'I don
\'
t recommend running WaveNet on entire dataset.. The world might end '
'before the synthesis :) (only eval allowed)'
)
taco_checkpoint
,
wave_checkpoint
,
hparams
=
prepare_run
(
args
)
sentences
=
get_sentences
(
args
)
if
args
.
model
==
'Tacotron'
:
_
=
tacotron_synthesize
(
args
,
hparams
,
taco_checkpoint
,
sentences
)
elif
args
.
model
==
'WaveNet'
:
wavenet_synthesize
(
args
,
hparams
,
wave_checkpoint
)
elif
args
.
model
==
'Tacotron-2'
:
synthesize
(
args
,
hparams
,
taco_checkpoint
,
wave_checkpoint
,
sentences
)
else
:
raise
ValueError
(
'Model provided {} unknown! {}'
.
format
(
args
.
model
,
accepted_models
))
_
=
tacotron_synthesize
(
args
,
hparams
,
taco_checkpoint
,
sentences
)
if
__name__
==
'__main__'
:
main
()
tacotron2/tacotron/synthesize.py
浏览文件 @
4cd60d96
...
...
@@ -42,9 +42,6 @@ def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
eval_dir
=
os
.
path
.
join
(
output_dir
,
'eval'
)
log_dir
=
os
.
path
.
join
(
output_dir
,
'logs-eval'
)
if
args
.
model
==
'Tacotron-2'
:
assert
os
.
path
.
normpath
(
eval_dir
)
==
os
.
path
.
normpath
(
args
.
mels_dir
)
#Create output path if it doesn't exist
os
.
makedirs
(
eval_dir
,
exist_ok
=
True
)
os
.
makedirs
(
log_dir
,
exist_ok
=
True
)
...
...
tacotron2/tacotron/train.py
浏览文件 @
4cd60d96
...
...
@@ -88,10 +88,7 @@ def time_string():
def
model_train_mode
(
args
,
feeder
,
hparams
,
global_step
):
with
tf
.
variable_scope
(
'Tacotron_model'
,
reuse
=
tf
.
AUTO_REUSE
)
as
scope
:
model_name
=
None
if
args
.
model
==
'Tacotron-2'
:
model_name
=
'Tacotron'
model
=
create_model
(
model_name
or
args
.
model
,
hparams
)
model
=
create_model
(
'Tacotron'
,
hparams
)
if
hparams
.
predict_linear
:
model
.
initialize
(
feeder
.
inputs
,
feeder
.
input_lengths
,
feeder
.
speaker_embeddings
,
feeder
.
mel_targets
,
feeder
.
token_targets
,
...
...
@@ -111,10 +108,7 @@ def model_train_mode(args, feeder, hparams, global_step):
def
model_test_mode
(
args
,
feeder
,
hparams
,
global_step
):
with
tf
.
variable_scope
(
'Tacotron_model'
,
reuse
=
tf
.
AUTO_REUSE
)
as
scope
:
model_name
=
None
if
args
.
model
==
'Tacotron-2'
:
model_name
=
'Tacotron'
model
=
create_model
(
model_name
or
args
.
model
,
hparams
)
model
=
create_model
(
'Tacotron'
,
hparams
)
if
hparams
.
predict_linear
:
model
.
initialize
(
feeder
.
eval_inputs
,
feeder
.
eval_input_lengths
,
feeder
.
speaker_embeddings
,
feeder
.
eval_mel_targets
,
...
...
@@ -161,7 +155,7 @@ def train(log_dir, args, hparams):
log
(
'Checkpoint path: {}'
.
format
(
checkpoint_path
))
log
(
'Loading training data from: {}'
.
format
(
input_path
))
log
(
'Using model:
{}'
.
format
(
args
.
model
)
)
log
(
'Using model:
Tacotron'
)
log
(
hparams_debug_string
())
# Start by setting a seed for repeatability
...
...
@@ -323,7 +317,7 @@ def train(log_dir, args, hparams):
plot
.
plot_alignment
(
align
,
os
.
path
.
join
(
eval_plot_dir
,
'step-{}-eval-align.png'
.
format
(
step
)),
title
=
'{}, {}, step={}, loss={:.5f}'
.
format
(
args
.
model
,
title
=
'{}, {}, step={}, loss={:.5f}'
.
format
(
'Tacotron'
,
time_string
(),
step
,
eval_loss
),
...
...
@@ -332,7 +326,7 @@ def train(log_dir, args, hparams):
'step-{'
'}-eval-mel-spectrogram.png'
.
format
(
step
)),
title
=
'{}, {}, step={}, loss={:.5f}'
.
format
(
args
.
model
,
title
=
'{}, {}, step={}, loss={:.5f}'
.
format
(
'Tacotron'
,
time_string
(),
step
,
eval_loss
),
...
...
@@ -344,7 +338,7 @@ def train(log_dir, args, hparams):
'step-{}-eval-linear-spectrogram.png'
.
format
(
step
)),
title
=
'{}, {}, step={}, loss={:.5f}'
.
format
(
args
.
model
,
time_string
(),
step
,
eval_loss
),
'Tacotron'
,
time_string
(),
step
,
eval_loss
),
target_spectrogram
=
lin_t
,
max_len
=
t_len
,
auto_aspect
=
True
)
...
...
@@ -387,7 +381,7 @@ def train(log_dir, args, hparams):
'step-{}-linear-spectrogram.png'
.
format
(
step
)),
title
=
'{}, {}, step={}, loss={:.5f}'
.
format
(
args
.
model
,
time_string
(),
step
,
loss
),
'Tacotron'
,
time_string
(),
step
,
loss
),
target_spectrogram
=
linear_target
,
max_len
=
target_length
,
auto_aspect
=
True
)
...
...
@@ -414,7 +408,7 @@ def train(log_dir, args, hparams):
# save alignment plot to disk (control purposes)
plot
.
plot_alignment
(
alignment
,
os
.
path
.
join
(
plot_dir
,
'step-{}-align.png'
.
format
(
step
)),
title
=
'{}, {}, step={}, loss={:.5f}'
.
format
(
args
.
model
,
title
=
'{}, {}, step={}, loss={:.5f}'
.
format
(
'Tacotron'
,
time_string
(),
step
,
loss
),
max_len
=
target_length
//
hparams
.
outputs_per_step
)
...
...
@@ -422,7 +416,7 @@ def train(log_dir, args, hparams):
plot
.
plot_spectrogram
(
mel_prediction
,
os
.
path
.
join
(
plot_dir
,
'step-{}-mel-spectrogram.png'
.
format
(
step
)),
title
=
'{}, {}, step={}, loss={:.5f}'
.
format
(
args
.
model
,
title
=
'{}, {}, step={}, loss={:.5f}'
.
format
(
'Tacotron'
,
time_string
(),
step
,
loss
),
target_spectrogram
=
target
,
...
...
tacotron2/temp.py
0 → 100644
浏览文件 @
4cd60d96
from
vlibs
import
fileio
import
numpy
as
np
root
=
r
'E:\Datasets\Synthesizer'
lines
=
fileio
.
read_all_lines
(
fileio
.
join
(
root
,
"train.txt"
))
out
=
[]
pruned
=
0
intact
=
0
for
line
in
lines
:
line
=
line
.
rstrip
()
audio_fname
,
mel_fname
,
embed_fname
,
*
_
=
line
.
split
(
'|'
)
mel
=
np
.
load
(
fileio
.
join
(
root
,
"mels"
,
mel_fname
))
if
len
(
mel
)
>
900
:
fileio
.
remove
(
fileio
.
join
(
root
,
"audio"
,
audio_fname
))
fileio
.
remove
(
fileio
.
join
(
root
,
"mels"
,
mel_fname
))
fileio
.
remove
(
fileio
.
join
(
root
,
"embed"
,
embed_fname
))
pruned
+=
1
else
:
intact
+=
1
out
.
append
(
line
)
if
intact
%
100
==
0
:
print
(
"%d / %d"
%
(
intact
,
pruned
))
out
.
append
(
''
)
fileio
.
write_all_lines
(
fileio
.
join
(
root
,
"train2.txt"
),
out
)
print
(
"%d / %d"
%
(
intact
,
pruned
))
tacotron2/train.py
浏览文件 @
4cd60d96
...
...
@@ -34,7 +34,7 @@ def read_seq(file):
def
prepare_run
(
args
):
modified_hp
=
hparams
.
parse
(
args
.
hparams
)
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
str
(
args
.
tf_log_level
)
run_name
=
args
.
name
or
args
.
model
run_name
=
args
.
name
log_dir
=
os
.
path
.
join
(
args
.
base_dir
,
'logs-{}'
.
format
(
run_name
))
os
.
makedirs
(
log_dir
,
exist_ok
=
True
)
infolog
.
init
(
os
.
path
.
join
(
log_dir
,
'Terminal_train_log'
),
run_name
,
args
.
slack_url
)
...
...
@@ -109,7 +109,7 @@ def main():
help
=
'Steps between writing checkpoints'
)
parser
.
add_argument
(
'--eval_interval'
,
type
=
int
,
default
=
10000
,
help
=
'Steps between eval on test data'
)
parser
.
add_argument
(
'--tacotron_train_steps'
,
type
=
int
,
default
=
2
00000
,
# Was 100000
parser
.
add_argument
(
'--tacotron_train_steps'
,
type
=
int
,
default
=
5
00000
,
# Was 100000
help
=
'total number of tacotron training steps'
)
parser
.
add_argument
(
'--tf_log_level'
,
type
=
int
,
default
=
1
,
help
=
'Tensorflow C++ log level.'
)
parser
.
add_argument
(
'--slack_url'
,
default
=
None
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录