提交 4cd60d96 编写于 作者: C Corentin Jemine

Setup the training configuration for ASR

上级 6ac1ff26
......@@ -17,4 +17,5 @@ encoder/saved_models/*_backups
tacotron2/logs-*
waveglow
torch-tacotron2
wave-rnn/checkpoints
\ No newline at end of file
wave-rnn/checkpoints
wave-rnn/model_outputs
\ No newline at end of file
......@@ -103,7 +103,7 @@ hparams = tf.contrib.training.HParams(
clip_mels_length=True,
# For cases of OOM (Not really recommended, only use if facing unsolvable OOM errors,
# also consider clipping your samples to smaller chunks)
max_mel_frames=1200,
max_mel_frames=900,
# Only relevant when clip_mels_length = True, please only use after trying output_per_steps=3
# and still getting OOM errors.
......@@ -252,7 +252,7 @@ hparams = tf.contrib.training.HParams(
# major slowdowns! Only use when critical!)
# train/test split ratios, mini-batches sizes
tacotron_batch_size=19, # number of training samples on each training steps (was 32)
tacotron_batch_size=25, # number of training samples on each training steps (was 32)
# Tacotron Batch synthesis supports ~16x the training batch size (no gradients during
# testing).
# Training Tacotron with unmasked paddings makes it aware of them, which makes synthesis times
......
......@@ -54,11 +54,11 @@ def main():
parser.add_argument('--base_dir', default='')
parser.add_argument('--hparams', default='',
help='Hyperparameter overrides as a comma-separated list of name=value pairs')
parser.add_argument('--output', default='Synthesizer3')
parser.add_argument('--output', default='Synthesizer')
parser.add_argument('--n_jobs', type=int, default=cpu_count())
# Name of the LibriSpeech sets to use, separated by spaces
# (e.g. "--sets train-other-500 train-clean-360). Defaults to using all the training sets
# (e.g. "--sets train-other-500 train-clean-360). Defaults to using all the clean training sets
# present in the LibriSpeech directory.
parser.add_argument('--sets', type=str, nargs='+', default=None)
......
......@@ -8,17 +8,16 @@ import tensorflow as tf
from hparams import hparams
from infolog import log
from tacotron.synthesize import tacotron_synthesize
from wavenet_vocoder.synthesize import wavenet_synthesize
def prepare_run(args):
modified_hp = hparams.parse(args.hparams)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
run_name = args.name or args.tacotron_name or args.model
run_name = args.name
taco_checkpoint = os.path.join('logs-' + run_name, 'taco_' + args.checkpoint)
run_name = args.name or args.wavenet_name or args.model
run_name = args.name
wave_checkpoint = os.path.join('logs-' + run_name, 'wave_' + args.checkpoint)
return taco_checkpoint, wave_checkpoint, modified_hp
......@@ -33,7 +32,7 @@ def get_sentences(args):
def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
log('Running End-to-End TTS Evaluation. Model: {}'.format(args.name or args.model))
log('Running End-to-End TTS Evaluation. Model: {}'.format(args.name))
log('Synthesizing mel-spectrograms from text..')
wavenet_in_dir = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
# Delete Tacotron model from graph
......@@ -42,7 +41,7 @@ def synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences):
# synthesizing
sleep(0.5)
log('Synthesizing audio from mel-spectrograms.. (This may take a while)')
wavenet_synthesize(args, hparams, wave_checkpoint)
raise NotImplemented()
log('Tacotron-2 TTS synthesis complete!')
......@@ -80,43 +79,17 @@ def main():
'ids')
args = parser.parse_args()
accepted_models = ['Tacotron', 'WaveNet', 'Tacotron-2']
if args.model not in accepted_models:
raise ValueError(
'please enter a valid model to synthesize with: {}'.format(accepted_models))
if args.mode not in accepted_modes:
raise ValueError('accepted modes are: {}, found {}'.format(accepted_modes, args.mode))
if args.mode == 'live' and args.model == 'Wavenet':
raise RuntimeError(
'Wavenet vocoder cannot be tested live due to its slow generation. Live only works '
'with Tacotron!')
if args.GTA not in ('True', 'False'):
raise ValueError('GTA option must be either True or False')
if args.model == 'Tacotron-2':
if args.mode == 'live':
warn('Requested a live evaluation with Tacotron-2, Wavenet will not be used!')
if args.mode == 'synthesis':
raise ValueError(
'I don\'t recommend running WaveNet on entire dataset.. The world might end '
'before the synthesis :) (only eval allowed)')
taco_checkpoint, wave_checkpoint, hparams = prepare_run(args)
sentences = get_sentences(args)
if args.model == 'Tacotron':
_ = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
elif args.model == 'WaveNet':
wavenet_synthesize(args, hparams, wave_checkpoint)
elif args.model == 'Tacotron-2':
synthesize(args, hparams, taco_checkpoint, wave_checkpoint, sentences)
else:
raise ValueError('Model provided {} unknown! {}'.format(args.model, accepted_models))
_ = tacotron_synthesize(args, hparams, taco_checkpoint, sentences)
if __name__ == '__main__':
main()
......@@ -42,9 +42,6 @@ def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
eval_dir = os.path.join(output_dir, 'eval')
log_dir = os.path.join(output_dir, 'logs-eval')
if args.model == 'Tacotron-2':
assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir)
#Create output path if it doesn't exist
os.makedirs(eval_dir, exist_ok=True)
os.makedirs(log_dir, exist_ok=True)
......
......@@ -88,10 +88,7 @@ def time_string():
def model_train_mode(args, feeder, hparams, global_step):
with tf.variable_scope('Tacotron_model', reuse=tf.AUTO_REUSE) as scope:
model_name = None
if args.model == 'Tacotron-2':
model_name = 'Tacotron'
model = create_model(model_name or args.model, hparams)
model = create_model('Tacotron', hparams)
if hparams.predict_linear:
model.initialize(feeder.inputs, feeder.input_lengths, feeder.speaker_embeddings,
feeder.mel_targets, feeder.token_targets,
......@@ -111,10 +108,7 @@ def model_train_mode(args, feeder, hparams, global_step):
def model_test_mode(args, feeder, hparams, global_step):
with tf.variable_scope('Tacotron_model', reuse=tf.AUTO_REUSE) as scope:
model_name = None
if args.model == 'Tacotron-2':
model_name = 'Tacotron'
model = create_model(model_name or args.model, hparams)
model = create_model('Tacotron', hparams)
if hparams.predict_linear:
model.initialize(feeder.eval_inputs, feeder.eval_input_lengths,
feeder.speaker_embeddings, feeder.eval_mel_targets,
......@@ -161,7 +155,7 @@ def train(log_dir, args, hparams):
log('Checkpoint path: {}'.format(checkpoint_path))
log('Loading training data from: {}'.format(input_path))
log('Using model: {}'.format(args.model))
log('Using model: Tacotron')
log(hparams_debug_string())
# Start by setting a seed for repeatability
......@@ -323,7 +317,7 @@ def train(log_dir, args, hparams):
plot.plot_alignment(align, os.path.join(eval_plot_dir,
'step-{}-eval-align.png'.format(step)),
title='{}, {}, step={}, loss={:.5f}'.format(args.model,
title='{}, {}, step={}, loss={:.5f}'.format('Tacotron',
time_string(),
step,
eval_loss),
......@@ -332,7 +326,7 @@ def train(log_dir, args, hparams):
'step-{'
'}-eval-mel-spectrogram.png'.format(
step)),
title='{}, {}, step={}, loss={:.5f}'.format(args.model,
title='{}, {}, step={}, loss={:.5f}'.format('Tacotron',
time_string(),
step,
eval_loss),
......@@ -344,7 +338,7 @@ def train(log_dir, args, hparams):
'step-{}-eval-linear-spectrogram.png'.format(
step)),
title='{}, {}, step={}, loss={:.5f}'.format(
args.model, time_string(), step, eval_loss),
'Tacotron', time_string(), step, eval_loss),
target_spectrogram=lin_t,
max_len=t_len, auto_aspect=True)
......@@ -387,7 +381,7 @@ def train(log_dir, args, hparams):
'step-{}-linear-spectrogram.png'.format(
step)),
title='{}, {}, step={}, loss={:.5f}'.format(
args.model, time_string(), step, loss),
'Tacotron', time_string(), step, loss),
target_spectrogram=linear_target,
max_len=target_length, auto_aspect=True)
......@@ -414,7 +408,7 @@ def train(log_dir, args, hparams):
# save alignment plot to disk (control purposes)
plot.plot_alignment(alignment,
os.path.join(plot_dir, 'step-{}-align.png'.format(step)),
title='{}, {}, step={}, loss={:.5f}'.format(args.model,
title='{}, {}, step={}, loss={:.5f}'.format('Tacotron',
time_string(),
step, loss),
max_len=target_length // hparams.outputs_per_step)
......@@ -422,7 +416,7 @@ def train(log_dir, args, hparams):
plot.plot_spectrogram(mel_prediction, os.path.join(plot_dir,
'step-{}-mel-spectrogram.png'.format(
step)),
title='{}, {}, step={}, loss={:.5f}'.format(args.model,
title='{}, {}, step={}, loss={:.5f}'.format('Tacotron',
time_string(),
step, loss),
target_spectrogram=target,
......
from vlibs import fileio
import numpy as np
root = r'E:\Datasets\Synthesizer'
lines = fileio.read_all_lines(fileio.join(root, "train.txt"))
out = []
pruned = 0
intact = 0
for line in lines:
line = line.rstrip()
audio_fname, mel_fname, embed_fname, *_ = line.split('|')
mel = np.load(fileio.join(root, "mels", mel_fname))
if len(mel) > 900:
fileio.remove(fileio.join(root, "audio", audio_fname))
fileio.remove(fileio.join(root, "mels", mel_fname))
fileio.remove(fileio.join(root, "embed", embed_fname))
pruned += 1
else:
intact += 1
out.append(line)
if intact %100 == 0:
print("%d / %d" % (intact, pruned))
out.append('')
fileio.write_all_lines(fileio.join(root, "train2.txt"), out)
print("%d / %d" % (intact, pruned))
......@@ -34,7 +34,7 @@ def read_seq(file):
def prepare_run(args):
modified_hp = hparams.parse(args.hparams)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_log_level)
run_name = args.name or args.model
run_name = args.name
log_dir = os.path.join(args.base_dir, 'logs-{}'.format(run_name))
os.makedirs(log_dir, exist_ok=True)
infolog.init(os.path.join(log_dir, 'Terminal_train_log'), run_name, args.slack_url)
......@@ -109,7 +109,7 @@ def main():
help='Steps between writing checkpoints')
parser.add_argument('--eval_interval', type=int, default=10000,
help='Steps between eval on test data')
parser.add_argument('--tacotron_train_steps', type=int, default=200000, # Was 100000
parser.add_argument('--tacotron_train_steps', type=int, default=500000, # Was 100000
help='total number of tacotron training steps')
parser.add_argument('--tf_log_level', type=int, default=1, help='Tensorflow C++ log level.')
parser.add_argument('--slack_url', default=None,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册