提交 a7858551 编写于 作者: H Hui Zhang

add utt2spk for all dataset

上级 b9790d03
...@@ -22,6 +22,7 @@ import argparse ...@@ -22,6 +22,7 @@ import argparse
import codecs import codecs
import json import json
import os import os
from pathlib import Path
import soundfile import soundfile
...@@ -79,6 +80,7 @@ def create_manifest(data_dir, manifest_path_prefix): ...@@ -79,6 +80,7 @@ def create_manifest(data_dir, manifest_path_prefix):
audio_path = os.path.abspath(os.path.join(subfolder, fname)) audio_path = os.path.abspath(os.path.join(subfolder, fname))
audio_id = os.path.basename(fname)[:-4] audio_id = os.path.basename(fname)[:-4]
utt2spk = Path(audio_path).parent.name
audio_data, samplerate = soundfile.read(audio_path) audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate) duration = float(len(audio_data) / samplerate)
...@@ -87,6 +89,7 @@ def create_manifest(data_dir, manifest_path_prefix): ...@@ -87,6 +89,7 @@ def create_manifest(data_dir, manifest_path_prefix):
json.dumps( json.dumps(
{ {
'utt': audio_id, 'utt': audio_id,
'utt2spk': str(utt2spk),
'feat': audio_path, 'feat': audio_path,
'feat_shape': (duration, ), # second 'feat_shape': (duration, ), # second
'text': text, 'text': text,
......
...@@ -74,15 +74,16 @@ def create_manifest(data_dir, manifest_path): ...@@ -74,15 +74,16 @@ def create_manifest(data_dir, manifest_path):
audio_filepath = os.path.join(subfolder, segments[0] + '.flac') audio_filepath = os.path.join(subfolder, segments[0] + '.flac')
audio_data, samplerate = soundfile.read(audio_filepath) audio_data, samplerate = soundfile.read(audio_filepath)
duration = float(len(audio_data)) / samplerate duration = float(len(audio_data)) / samplerate
utt = os.path.splitext(os.path.basename(audio_filepath))[0]
utt2spk = '-'.join(utt.split('-')[:2])
json_lines.append( json_lines.append(
json.dumps({ json.dumps({
'utt': 'utt': utt,
os.path.splitext(os.path.basename(audio_filepath))[0], 'utt2spk': utt2spk,
'feat': 'feat': audio_filepath,
audio_filepath,
'feat_shape': (duration, ), #second 'feat_shape': (duration, ), #second
'text': 'text': text,
text
})) }))
total_sec += duration total_sec += duration
......
...@@ -113,6 +113,8 @@ def create_manifest(data_dir, manifest_path_prefix): ...@@ -113,6 +113,8 @@ def create_manifest(data_dir, manifest_path_prefix):
assert os.path.exists(audio_path) and os.path.exists(text_path) assert os.path.exists(audio_path) and os.path.exists(text_path)
audio_id = os.path.basename(audio_path)[:-4] audio_id = os.path.basename(audio_path)[:-4]
spk = audio_id.split('_')[0]
word_text, syllable_text, phone_text = read_trn(text_path) word_text, syllable_text, phone_text = read_trn(text_path)
audio_data, samplerate = soundfile.read(audio_path) audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate) duration = float(len(audio_data) / samplerate)
...@@ -122,6 +124,7 @@ def create_manifest(data_dir, manifest_path_prefix): ...@@ -122,6 +124,7 @@ def create_manifest(data_dir, manifest_path_prefix):
json.dumps( json.dumps(
{ {
'utt': audio_id, 'utt': audio_id,
'utt2spk', spk,
'feat': audio_path, 'feat': audio_path,
'feat_shape': (duration, ), # second 'feat_shape': (duration, ), # second
'text': word_text, # charactor 'text': word_text, # charactor
......
...@@ -180,12 +180,12 @@ def create_manifest(data_dir, manifest_path_prefix): ...@@ -180,12 +180,12 @@ def create_manifest(data_dir, manifest_path_prefix):
json.dumps( json.dumps(
{ {
'utt': utt_id, 'utt': utt_id,
'utt2spk': spk,
'utt2gender': gender,
'feat': str(audio_path), 'feat': str(audio_path),
'feat_shape': (duration, ), # second 'feat_shape': (duration, ), # second
'text': word_text, # word 'text': word_text, # word
'phone': phone_text, 'phone': phone_text,
'spk': spk,
'gender': gender,
}, },
ensure_ascii=False)) ensure_ascii=False))
......
...@@ -24,6 +24,7 @@ import json ...@@ -24,6 +24,7 @@ import json
import os import os
import soundfile import soundfile
from pathlib import Path
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument( parser.add_argument(
...@@ -67,10 +68,17 @@ def create_manifest(data_dir, manifest_path_prefix): ...@@ -67,10 +68,17 @@ def create_manifest(data_dir, manifest_path_prefix):
audio_data, samplerate = soundfile.read(audio_path) audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate) duration = float(len(audio_data) / samplerate)
text = phn_dict[audio_id] text = phn_dict[audio_id]
gender_spk = str(Path(audio_path).parent.stem)
spk = gender_spk[1:]
gender = gender_spk[0]
utt_id = '_'.join([spk, gender, audio_id])
json_lines.append( json_lines.append(
json.dumps( json.dumps(
{ {
'utt': audio_id, 'utt': audio_id,
'utt2spk': spk,
'utt2gender': gender,
'feat': audio_path, 'feat': audio_path,
'feat_shape': (duration, ), # second 'feat_shape': (duration, ), # second
'text': text 'text': text
......
...@@ -175,9 +175,12 @@ def generate_manifest(data_dir, manifest_path): ...@@ -175,9 +175,12 @@ def generate_manifest(data_dir, manifest_path):
audio_data, samplerate = soundfile.read(u) audio_data, samplerate = soundfile.read(u)
duration = float(len(audio_data)) / samplerate duration = float(len(audio_data)) / samplerate
utt = os.path.splitext(os.path.basename(u))[0]
json_lines.append( json_lines.append(
json.dumps({ json.dumps({
'utt': os.path.splitext(os.path.basename(u))[0], 'utt': utt,
'utt2spk': speaker,
'feat': u, 'feat': u,
'feat_shape': (duration, ), #second 'feat_shape': (duration, ), #second
'text': trans.lower() 'text': trans.lower()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册