提交 a7858551 编写于 作者: H Hui Zhang

add utt2spk for all dataset

上级 b9790d03
......@@ -22,6 +22,7 @@ import argparse
import codecs
import json
import os
from pathlib import Path
import soundfile
......@@ -79,6 +80,7 @@ def create_manifest(data_dir, manifest_path_prefix):
audio_path = os.path.abspath(os.path.join(subfolder, fname))
audio_id = os.path.basename(fname)[:-4]
utt2spk = Path(audio_path).parent.name
audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate)
......@@ -87,6 +89,7 @@ def create_manifest(data_dir, manifest_path_prefix):
json.dumps(
{
'utt': audio_id,
'utt2spk': str(utt2spk),
'feat': audio_path,
'feat_shape': (duration, ), # second
'text': text,
......
......@@ -74,15 +74,16 @@ def create_manifest(data_dir, manifest_path):
audio_filepath = os.path.join(subfolder, segments[0] + '.flac')
audio_data, samplerate = soundfile.read(audio_filepath)
duration = float(len(audio_data)) / samplerate
utt = os.path.splitext(os.path.basename(audio_filepath))[0]
utt2spk = '-'.join(utt.split('-')[:2])
json_lines.append(
json.dumps({
'utt':
os.path.splitext(os.path.basename(audio_filepath))[0],
'feat':
audio_filepath,
'utt': utt,
'utt2spk': utt2spk,
'feat': audio_filepath,
'feat_shape': (duration, ), #second
'text':
text
'text': text,
}))
total_sec += duration
......
......@@ -113,6 +113,8 @@ def create_manifest(data_dir, manifest_path_prefix):
assert os.path.exists(audio_path) and os.path.exists(text_path)
audio_id = os.path.basename(audio_path)[:-4]
spk = audio_id.split('_')[0]
word_text, syllable_text, phone_text = read_trn(text_path)
audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate)
......@@ -122,6 +124,7 @@ def create_manifest(data_dir, manifest_path_prefix):
json.dumps(
{
'utt': audio_id,
'utt2spk', spk,
'feat': audio_path,
'feat_shape': (duration, ), # second
'text': word_text, # charactor
......
......@@ -180,12 +180,12 @@ def create_manifest(data_dir, manifest_path_prefix):
json.dumps(
{
'utt': utt_id,
'utt2spk': spk,
'utt2gender': gender,
'feat': str(audio_path),
'feat_shape': (duration, ), # second
'text': word_text, # word
'phone': phone_text,
'spk': spk,
'gender': gender,
},
ensure_ascii=False))
......
......@@ -24,6 +24,7 @@ import json
import os
import soundfile
from pathlib import Path
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
......@@ -67,10 +68,17 @@ def create_manifest(data_dir, manifest_path_prefix):
audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate)
text = phn_dict[audio_id]
gender_spk = str(Path(audio_path).parent.stem)
spk = gender_spk[1:]
gender = gender_spk[0]
utt_id = '_'.join([spk, gender, audio_id])
json_lines.append(
json.dumps(
{
'utt': audio_id,
'utt2spk': spk,
'utt2gender': gender,
'feat': audio_path,
'feat_shape': (duration, ), # second
'text': text
......
......@@ -175,9 +175,12 @@ def generate_manifest(data_dir, manifest_path):
audio_data, samplerate = soundfile.read(u)
duration = float(len(audio_data)) / samplerate
utt = os.path.splitext(os.path.basename(u))[0]
json_lines.append(
json.dumps({
'utt': os.path.splitext(os.path.basename(u))[0],
'utt': utt,
'utt2spk': speaker,
'feat': u,
'feat_shape': (duration, ), #second
'text': trans.lower()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册