From 9a253bc0918391b9fb1daedc615095fe00d1af37 Mon Sep 17 00:00:00 2001 From: TianYuan Date: Mon, 30 May 2022 11:58:37 +0000 Subject: [PATCH] gen lexicon with tone in mfa, test=tts --- examples/other/mfa/local/reorganize_baker.py | 3 --- examples/other/mfa/run.sh | 6 ++---- 2 files changed, 2 insertions(+), 7 deletions(-) mode change 100644 => 100755 examples/other/mfa/run.sh diff --git a/examples/other/mfa/local/reorganize_baker.py b/examples/other/mfa/local/reorganize_baker.py index 8adad834..153e01d1 100644 --- a/examples/other/mfa/local/reorganize_baker.py +++ b/examples/other/mfa/local/reorganize_baker.py @@ -42,9 +42,6 @@ def get_transcripts(path: Union[str, Path]): for i in range(0, len(lines), 2): sentence_id = lines[i].split()[0] transcription = lines[i + 1].strip() - # tones are dropped here - # since the lexicon does not consider tones, too - transcription = " ".join([item[:-1] for item in transcription.split()]) transcripts[sentence_id] = transcription return transcripts diff --git a/examples/other/mfa/run.sh b/examples/other/mfa/run.sh old mode 100644 new mode 100755 index 1fef58b4..29dacc9b --- a/examples/other/mfa/run.sh +++ b/examples/other/mfa/run.sh @@ -4,7 +4,7 @@ mkdir -p $EXP_DIR LEXICON_NAME='simple' if [ ! -f "$EXP_DIR/$LEXICON_NAME.lexicon" ]; then echo "generating lexicon..." - python local/generate_lexicon.py "$EXP_DIR/$LEXICON_NAME" --with-r + python local/generate_lexicon.py "$EXP_DIR/$LEXICON_NAME" --with-r --with-tone echo "lexicon done" fi @@ -16,6 +16,7 @@ if [ ! -d $EXP_DIR/baker_corpus ]; then echo "transcription for each audio file is saved with the same namd in $EXP_DIR/baker_corpus " fi + echo "detecting oov..." python local/detect_oov.py $EXP_DIR/baker_corpus $EXP_DIR/"$LEXICON_NAME.lexicon" echo "detecting oov done. you may consider regenerate lexicon if there is unexpected OOVs." @@ -44,6 +45,3 @@ if [ ! -d "$EXP_DIR/baker_alignment" ]; then echo "model: $EXP_DIR/baker_model" fi - - - -- GitLab