From a7858551b735594e8c418de5c4807b47cdcfa5cf Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 19 Nov 2021 09:49:38 +0000 Subject: [PATCH] add utt2spk for all dataset --- .../dataset/aidatatang_200zh/aidatatang_200zh.py | 3 +++ .../dataset/mini_librispeech/mini_librispeech.py | 13 +++++++------ examples/dataset/thchs30/thchs30.py | 3 +++ examples/dataset/timit/timit.py | 4 ++-- .../dataset/timit/timit_kaldi_standard_split.py | 8 ++++++++ examples/dataset/voxforge/voxforge.py | 5 ++++- 6 files changed, 27 insertions(+), 9 deletions(-) diff --git a/examples/dataset/aidatatang_200zh/aidatatang_200zh.py b/examples/dataset/aidatatang_200zh/aidatatang_200zh.py index e32f619e..85f478c2 100644 --- a/examples/dataset/aidatatang_200zh/aidatatang_200zh.py +++ b/examples/dataset/aidatatang_200zh/aidatatang_200zh.py @@ -22,6 +22,7 @@ import argparse import codecs import json import os +from pathlib import Path import soundfile @@ -79,6 +80,7 @@ def create_manifest(data_dir, manifest_path_prefix): audio_path = os.path.abspath(os.path.join(subfolder, fname)) audio_id = os.path.basename(fname)[:-4] + utt2spk = Path(audio_path).parent.name audio_data, samplerate = soundfile.read(audio_path) duration = float(len(audio_data) / samplerate) @@ -87,6 +89,7 @@ def create_manifest(data_dir, manifest_path_prefix): json.dumps( { 'utt': audio_id, + 'utt2spk': str(utt2spk), 'feat': audio_path, 'feat_shape': (duration, ), # second 'text': text, diff --git a/examples/dataset/mini_librispeech/mini_librispeech.py b/examples/dataset/mini_librispeech/mini_librispeech.py index 65fee81a..730c73a8 100644 --- a/examples/dataset/mini_librispeech/mini_librispeech.py +++ b/examples/dataset/mini_librispeech/mini_librispeech.py @@ -74,15 +74,16 @@ def create_manifest(data_dir, manifest_path): audio_filepath = os.path.join(subfolder, segments[0] + '.flac') audio_data, samplerate = soundfile.read(audio_filepath) duration = float(len(audio_data)) / samplerate + + utt = os.path.splitext(os.path.basename(audio_filepath))[0] + utt2spk = '-'.join(utt.split('-')[:2]) json_lines.append( json.dumps({ - 'utt': - os.path.splitext(os.path.basename(audio_filepath))[0], - 'feat': - audio_filepath, + 'utt': utt, + 'utt2spk': utt2spk, + 'feat': audio_filepath, 'feat_shape': (duration, ), #second - 'text': - text + 'text': text, })) total_sec += duration diff --git a/examples/dataset/thchs30/thchs30.py b/examples/dataset/thchs30/thchs30.py index 77a264cb..2ec4ddab 100644 --- a/examples/dataset/thchs30/thchs30.py +++ b/examples/dataset/thchs30/thchs30.py @@ -113,6 +113,8 @@ def create_manifest(data_dir, manifest_path_prefix): assert os.path.exists(audio_path) and os.path.exists(text_path) audio_id = os.path.basename(audio_path)[:-4] + spk = audio_id.split('_')[0] + word_text, syllable_text, phone_text = read_trn(text_path) audio_data, samplerate = soundfile.read(audio_path) duration = float(len(audio_data) / samplerate) @@ -122,6 +124,7 @@ def create_manifest(data_dir, manifest_path_prefix): json.dumps( { 'utt': audio_id, + 'utt2spk', spk, 'feat': audio_path, 'feat_shape': (duration, ), # second 'text': word_text, # charactor diff --git a/examples/dataset/timit/timit.py b/examples/dataset/timit/timit.py index 311d445c..c4a9f066 100644 --- a/examples/dataset/timit/timit.py +++ b/examples/dataset/timit/timit.py @@ -180,12 +180,12 @@ def create_manifest(data_dir, manifest_path_prefix): json.dumps( { 'utt': utt_id, + 'utt2spk': spk, + 'utt2gender': gender, 'feat': str(audio_path), 'feat_shape': (duration, ), # second 'text': word_text, # word 'phone': phone_text, - 'spk': spk, - 'gender': gender, }, ensure_ascii=False)) diff --git a/examples/dataset/timit/timit_kaldi_standard_split.py b/examples/dataset/timit/timit_kaldi_standard_split.py index 2b494c06..26aa76c7 100644 --- a/examples/dataset/timit/timit_kaldi_standard_split.py +++ b/examples/dataset/timit/timit_kaldi_standard_split.py @@ -24,6 +24,7 @@ import json import os import soundfile +from pathlib import Path parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( @@ -67,10 +68,17 @@ def create_manifest(data_dir, manifest_path_prefix): audio_data, samplerate = soundfile.read(audio_path) duration = float(len(audio_data) / samplerate) text = phn_dict[audio_id] + + gender_spk = str(Path(audio_path).parent.stem) + spk = gender_spk[1:] + gender = gender_spk[0] + utt_id = '_'.join([spk, gender, audio_id]) json_lines.append( json.dumps( { 'utt': audio_id, + 'utt2spk': spk, + 'utt2gender': gender, 'feat': audio_path, 'feat_shape': (duration, ), # second 'text': text diff --git a/examples/dataset/voxforge/voxforge.py b/examples/dataset/voxforge/voxforge.py index 36282bd6..373791bf 100644 --- a/examples/dataset/voxforge/voxforge.py +++ b/examples/dataset/voxforge/voxforge.py @@ -175,9 +175,12 @@ def generate_manifest(data_dir, manifest_path): audio_data, samplerate = soundfile.read(u) duration = float(len(audio_data)) / samplerate + + utt = os.path.splitext(os.path.basename(u))[0] json_lines.append( json.dumps({ - 'utt': os.path.splitext(os.path.basename(u))[0], + 'utt': utt, + 'utt2spk': speaker, 'feat': u, 'feat_shape': (duration, ), #second 'text': trans.lower() -- GitLab