diff --git a/paddlespeech/t2s/exps/fastspeech2/preprocess.py b/paddlespeech/t2s/exps/fastspeech2/preprocess.py index 5bda75451b071321e681adceb598f29162b5fb8c..db1842b2e89fe3044e96ca4babb07c1796d06da3 100644 --- a/paddlespeech/t2s/exps/fastspeech2/preprocess.py +++ b/paddlespeech/t2s/exps/fastspeech2/preprocess.py @@ -86,6 +86,9 @@ def process_sentence(config: Dict[str, Any], logmel = mel_extractor.get_log_mel_fbank(wav) # change duration according to mel_length compare_duration_and_mel_length(sentences, utt_id, logmel) + # utt_id may be popped in compare_duration_and_mel_length + if utt_id not in sentences: + return None phones = sentences[utt_id][0] durations = sentences[utt_id][1] num_frames = logmel.shape[0] diff --git a/paddlespeech/t2s/exps/speedyspeech/preprocess.py b/paddlespeech/t2s/exps/speedyspeech/preprocess.py index 3f81c4e14753d19e51db5d23f5e75440c67de34b..e833d13940530f293842a842b65f33cf6d03d9bd 100644 --- a/paddlespeech/t2s/exps/speedyspeech/preprocess.py +++ b/paddlespeech/t2s/exps/speedyspeech/preprocess.py @@ -79,6 +79,9 @@ def process_sentence(config: Dict[str, Any], logmel = mel_extractor.get_log_mel_fbank(wav) # change duration according to mel_length compare_duration_and_mel_length(sentences, utt_id, logmel) + # utt_id may be popped in compare_duration_and_mel_length + if utt_id not in sentences: + return None labels = sentences[utt_id][0] # extract phone and duration phones = [] diff --git a/paddlespeech/t2s/exps/tacotron2/preprocess.py b/paddlespeech/t2s/exps/tacotron2/preprocess.py index 7f41089ebf9d71b336d082b065e8b50c541f7edd..14a0d7eae227f5650a716bc656f3d0c32ee077e3 100644 --- a/paddlespeech/t2s/exps/tacotron2/preprocess.py +++ b/paddlespeech/t2s/exps/tacotron2/preprocess.py @@ -82,6 +82,9 @@ def process_sentence(config: Dict[str, Any], logmel = mel_extractor.get_log_mel_fbank(wav) # change duration according to mel_length compare_duration_and_mel_length(sentences, utt_id, logmel) + # utt_id may be popped in compare_duration_and_mel_length + if utt_id not in sentences: + return None phones = sentences[utt_id][0] durations = sentences[utt_id][1] num_frames = logmel.shape[0]