Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
mrywhh
Real-Time-Voice-Cloning
提交
054f16ec
R
Real-Time-Voice-Cloning
项目概览
mrywhh
/
Real-Time-Voice-Cloning
落后 Fork 源项目 12 个版本
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
R
Real-Time-Voice-Cloning
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
054f16ec
编写于
7月 23, 2020
作者:
B
blue-fish
提交者:
GitHub
7月 23, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add synthesizer preprocessing support for other datasets (#441)
Co-authored-by:
N
Corentin Jemine
<
corentin.jemine@gmail.com
>
上级
eaf5ec44
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
71 addition
and
34 deletion
+71
-34
synthesizer/preprocess.py
synthesizer/preprocess.py
+62
-32
synthesizer_preprocess_audio.py
synthesizer_preprocess_audio.py
+9
-2
未找到文件。
synthesizer/preprocess.py
浏览文件 @
054f16ec
...
...
@@ -10,12 +10,12 @@ import numpy as np
import
librosa
def
preprocess_librispeech
(
datasets_root
:
Path
,
out_dir
:
Path
,
n_processes
:
int
,
skip_existing
:
bool
,
hparams
):
def
preprocess_dataset
(
datasets_root
:
Path
,
out_dir
:
Path
,
n_processes
:
int
,
skip_existing
:
bool
,
hparams
,
no_alignments
:
bool
,
datasets_name
:
str
,
subfolders
:
str
):
# Gather the input directories
dataset_root
=
datasets_root
.
joinpath
(
"LibriSpeech"
)
input_dirs
=
[
dataset_root
.
joinpath
(
"train-clean-100"
),
dataset_root
.
joinpath
(
"train-clean-360"
)]
dataset_root
=
datasets_root
.
joinpath
(
datasets_name
)
input_dirs
=
[
dataset_root
.
joinpath
(
subfolder
.
strip
())
for
subfolder
in
subfolders
.
split
(
","
)]
print
(
"
\n
"
.
join
(
map
(
str
,
[
"Using data from:"
]
+
input_dirs
)))
assert
all
(
input_dir
.
exists
()
for
input_dir
in
input_dirs
)
...
...
@@ -30,9 +30,9 @@ def preprocess_librispeech(datasets_root: Path, out_dir: Path, n_processes: int,
# Preprocess the dataset
speaker_dirs
=
list
(
chain
.
from_iterable
(
input_dir
.
glob
(
"*"
)
for
input_dir
in
input_dirs
))
func
=
partial
(
preprocess_speaker
,
out_dir
=
out_dir
,
skip_existing
=
skip_existing
,
hparams
=
hparams
)
hparams
=
hparams
,
no_alignments
=
no_alignments
)
job
=
Pool
(
n_processes
).
imap
(
func
,
speaker_dirs
)
for
speaker_metadata
in
tqdm
(
job
,
"LibriSpeech"
,
len
(
speaker_dirs
),
unit
=
"speakers"
):
for
speaker_metadata
in
tqdm
(
job
,
datasets_name
,
len
(
speaker_dirs
),
unit
=
"speakers"
):
for
metadatum
in
speaker_metadata
:
metadata_file
.
write
(
"|"
.
join
(
str
(
x
)
for
x
in
metadatum
)
+
"
\n
"
)
metadata_file
.
close
()
...
...
@@ -51,32 +51,62 @@ def preprocess_librispeech(datasets_root: Path, out_dir: Path, n_processes: int,
print
(
"Max audio timesteps length: %d"
%
max
(
int
(
m
[
3
])
for
m
in
metadata
))
def
preprocess_speaker
(
speaker_dir
,
out_dir
:
Path
,
skip_existing
:
bool
,
hparams
):
def
preprocess_speaker
(
speaker_dir
,
out_dir
:
Path
,
skip_existing
:
bool
,
hparams
,
no_alignments
:
bool
):
metadata
=
[]
for
book_dir
in
speaker_dir
.
glob
(
"*"
):
# Gather the utterance audios and texts
try
:
alignments_fpath
=
next
(
book_dir
.
glob
(
"*.alignment.txt"
))
with
alignments_fpath
.
open
(
"r"
)
as
alignments_file
:
alignments
=
[
line
.
rstrip
().
split
(
" "
)
for
line
in
alignments_file
]
except
StopIteration
:
# A few alignment files will be missing
continue
# Iterate over each entry in the alignments file
for
wav_fname
,
words
,
end_times
in
alignments
:
wav_fpath
=
book_dir
.
joinpath
(
wav_fname
+
".flac"
)
assert
wav_fpath
.
exists
()
words
=
words
.
replace
(
"
\"
"
,
""
).
split
(
","
)
end_times
=
list
(
map
(
float
,
end_times
.
replace
(
"
\"
"
,
""
).
split
(
","
)))
# Process each sub-utterance
wavs
,
texts
=
split_on_silences
(
wav_fpath
,
words
,
end_times
,
hparams
)
for
i
,
(
wav
,
text
)
in
enumerate
(
zip
(
wavs
,
texts
)):
sub_basename
=
"%s_%02d"
%
(
wav_fname
,
i
)
metadata
.
append
(
process_utterance
(
wav
,
text
,
out_dir
,
sub_basename
,
skip_existing
,
hparams
))
if
no_alignments
:
# Gather the utterance audios and texts
# LibriTTS uses .wav but we will include extensions for compatibility with other datasets
extensions
=
[
"*.wav"
,
"*.flac"
,
"*.mp3"
]
for
extension
in
extensions
:
wav_fpaths
=
book_dir
.
glob
(
extension
)
for
wav_fpath
in
wav_fpaths
:
# Load the audio waveform
wav
,
_
=
librosa
.
load
(
str
(
wav_fpath
),
hparams
.
sample_rate
)
if
hparams
.
rescale
:
wav
=
wav
/
np
.
abs
(
wav
).
max
()
*
hparams
.
rescaling_max
# Get the corresponding text
# Check for .txt (for compatibility with other datasets)
text_fpath
=
wav_fpath
.
with_suffix
(
".txt"
)
if
not
text_fpath
.
exists
():
# Check for .normalized.txt (LibriTTS)
text_fpath
=
wav_fpath
.
with_suffix
(
".normalized.txt"
)
assert
text_fpath
.
exists
()
with
text_fpath
.
open
(
"r"
)
as
text_file
:
text
=
""
.
join
([
line
for
line
in
text_file
])
text
=
text
.
replace
(
"
\"
"
,
""
)
text
=
text
.
strip
()
# Process the utterance
metadata
.
append
(
process_utterance
(
wav
,
text
,
out_dir
,
str
(
wav_fpath
.
with_suffix
(
""
).
name
),
skip_existing
,
hparams
))
else
:
# Process alignment file (LibriSpeech support)
# Gather the utterance audios and texts
try
:
alignments_fpath
=
next
(
book_dir
.
glob
(
"*.alignment.txt"
))
with
alignments_fpath
.
open
(
"r"
)
as
alignments_file
:
alignments
=
[
line
.
rstrip
().
split
(
" "
)
for
line
in
alignments_file
]
except
StopIteration
:
# A few alignment files will be missing
continue
# Iterate over each entry in the alignments file
for
wav_fname
,
words
,
end_times
in
alignments
:
wav_fpath
=
book_dir
.
joinpath
(
wav_fname
+
".flac"
)
assert
wav_fpath
.
exists
()
words
=
words
.
replace
(
"
\"
"
,
""
).
split
(
","
)
end_times
=
list
(
map
(
float
,
end_times
.
replace
(
"
\"
"
,
""
).
split
(
","
)))
# Process each sub-utterance
wavs
,
texts
=
split_on_silences
(
wav_fpath
,
words
,
end_times
,
hparams
)
for
i
,
(
wav
,
text
)
in
enumerate
(
zip
(
wavs
,
texts
)):
sub_basename
=
"%s_%02d"
%
(
wav_fname
,
i
)
metadata
.
append
(
process_utterance
(
wav
,
text
,
out_dir
,
sub_basename
,
skip_existing
,
hparams
))
return
[
m
for
m
in
metadata
if
m
is
not
None
]
...
...
@@ -222,4 +252,4 @@ def create_embeddings(synthesizer_root: Path, encoder_model_fpath: Path, n_proce
func
=
partial
(
embed_utterance
,
encoder_model_fpath
=
encoder_model_fpath
)
job
=
Pool
(
n_processes
).
imap
(
func
,
fpaths
)
list
(
tqdm
(
job
,
"Embedding"
,
len
(
fpaths
),
unit
=
"utterances"
))
\ No newline at end of file
synthesizer_preprocess_audio.py
浏览文件 @
054f16ec
from
synthesizer.preprocess
import
preprocess_
librispeech
from
synthesizer.preprocess
import
preprocess_
dataset
from
synthesizer.hparams
import
hparams
from
utils.argutils
import
print_args
from
pathlib
import
Path
...
...
@@ -26,6 +26,13 @@ if __name__ == "__main__":
"Hyperparameter overrides as a comma-separated list of name-value pairs"
)
parser
.
add_argument
(
"--no_trim"
,
action
=
"store_true"
,
help
=
\
"Preprocess audio without trimming silences (not recommended)."
)
parser
.
add_argument
(
"--no_alignments"
,
action
=
"store_true"
,
help
=
\
"Use this option when dataset does not include alignments
\
(these are used to split long audio files into sub-utterances.)"
)
parser
.
add_argument
(
"--datasets_name"
,
type
=
str
,
default
=
"LibriSpeech"
,
help
=
\
"Name of the dataset directory to process."
)
parser
.
add_argument
(
"--subfolders"
,
type
=
str
,
default
=
"train-clean-100, train-clean-360"
,
help
=
\
"Comma-separated list of subfolders to process inside your dataset directory"
)
args
=
parser
.
parse_args
()
# Process the arguments
...
...
@@ -49,4 +56,4 @@ if __name__ == "__main__":
# Preprocess the dataset
print_args
(
args
,
parser
)
args
.
hparams
=
hparams
.
parse
(
args
.
hparams
)
preprocess_
librispeech
(
**
vars
(
args
))
preprocess_
dataset
(
**
vars
(
args
))
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录