Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Parakeet
提交
ec99a42d
P
Parakeet
项目概览
PaddlePaddle
/
Parakeet
通知
8
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Parakeet
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ec99a42d
编写于
11月 24, 2019
作者:
C
chenfeiyu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add vctk
上级
34bd1e98
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
98 addition
and
2 deletion
+98
-2
.gitignore
.gitignore
+3
-0
parakeet/datasets/vctk.py
parakeet/datasets/vctk.py
+80
-0
setup.py
setup.py
+3
-1
tests/test_ljspeech.py
tests/test_ljspeech.py
+1
-1
tests/test_vctk.py
tests/test_vctk.py
+11
-0
未找到文件。
.gitignore
浏览文件 @
ec99a42d
...
...
@@ -3,6 +3,9 @@ __pycache__/
*.py[cod]
*$py.class
# vscode
.vscode
# C extensions
*.so
...
...
parakeet/datasets/vctk.py
0 → 100644
浏览文件 @
ec99a42d
from
pathlib
import
Path
import
pandas
as
pd
from
ruamel.yaml
import
YAML
import
io
import
librosa
import
numpy
as
np
from
parakeet.g2p.en
import
text_to_sequence
from
parakeet.data.dataset
import
Dataset
from
parakeet.data.datacargo
import
DataCargo
from
parakeet.data.batch
import
TextIDBatcher
,
WavBatcher
class
VCTK
(
Dataset
):
def
__init__
(
self
,
root
:
Path
):
self
.
root
=
root
self
.
text_root
=
self
.
root
.
joinpath
(
"txt"
)
self
.
wav_root
=
self
.
root
.
joinpath
(
"wav48"
)
if
not
(
self
.
root
.
joinpath
(
"metadata.csv"
).
exists
()
and
self
.
root
.
joinpath
(
"speaker_indices.yaml"
).
exists
()):
self
.
_prepare_metadata
()
self
.
speaker_indices
,
self
.
metadata
=
self
.
_load_metadata
()
def
_load_metadata
(
self
):
yaml
=
YAML
(
typ
=
'safe'
)
speaker_indices
=
yaml
.
load
(
self
.
root
.
joinpath
(
"speaker_indices.yaml"
))
metadata
=
pd
.
read_csv
(
self
.
root
.
joinpath
(
"metadata.csv"
),
sep
=
"|"
,
quoting
=
3
,
header
=
1
)
return
speaker_indices
,
metadata
def
_prepare_metadata
(
self
):
metadata
=
[]
speaker_to_index
=
{}
for
i
,
speaker_folder
in
enumerate
(
self
.
text_root
.
iterdir
()):
if
speaker_folder
.
is_dir
():
speaker_to_index
[
speaker_folder
.
name
]
=
i
for
text_file
in
speaker_folder
.
iterdir
():
if
text_file
.
is_file
():
with
io
.
open
(
str
(
text_file
))
as
f
:
transcription
=
f
.
read
().
strip
()
wav_file
=
text_file
.
with_suffix
(
".wav"
)
metadata
.
append
((
wav_file
.
name
,
speaker_folder
.
name
,
transcription
))
metadata
=
pd
.
DataFrame
.
from_records
(
metadata
,
columns
=
[
"wave_file"
,
"speaker"
,
"text"
])
# save them
yaml
=
YAML
(
typ
=
'safe'
)
yaml
.
dump
(
speaker_to_index
,
self
.
root
.
joinpath
(
"speaker_indices.yaml"
))
metadata
.
to_csv
(
self
.
root
.
joinpath
(
"metadata.csv"
),
sep
=
"|"
,
quoting
=
3
,
index
=
False
)
def
_get_example
(
self
,
metadatum
):
wave_file
,
speaker
,
text
=
metadatum
wav_path
=
self
.
wav_root
.
joinpath
(
speaker
,
wave_file
)
wav
,
sr
=
librosa
.
load
(
str
(
wav_path
),
sr
=
None
)
phoneme_seq
=
np
.
array
(
text_to_sequence
(
text
))
return
wav
,
self
.
speaker_indices
[
speaker
],
phoneme_seq
def
__getitem__
(
self
,
index
):
metadatum
=
self
.
metadata
.
iloc
[
index
]
example
=
self
.
_get_example
(
metadatum
)
return
example
def
__len__
(
self
):
return
len
(
self
.
metadata
)
def
_batch_examples
(
self
,
minibatch
):
wav_batch
,
speaker_batch
,
phoneme_batch
=
[],
[],
[]
for
example
in
minibatch
:
wav
,
speaker_id
,
phoneme_seq
=
example
wav_batch
.
append
(
wav
)
speaker_batch
.
append
(
speaker_id
)
phoneme_batch
.
append
(
phoneme_seq
)
wav_batch
=
WavBatcher
(
pad_value
=
0.
)(
wav_batch
)
speaker_batch
=
np
.
array
(
speaker_batch
)
phoneme_batch
=
TextIDBatcher
(
pad_id
=
0
)(
phoneme_batch
)
return
wav_batch
,
speaker_batch
,
phoneme_batch
\ No newline at end of file
setup.py
浏览文件 @
ec99a42d
...
...
@@ -34,7 +34,9 @@ setup_info = dict(
license
=
'Apache 2'
,
install_requires
=
[
'numpy'
,
'nltk'
,
'inflect'
,
'librosa'
,
'unidecode'
,
'numba'
,
'tqdm'
,
'matplotlib'
,
'tensorboardX'
,
'tensorboard'
,
'scipy'
,
'tqdm'
,
'numpy'
,
'nltk'
,
'inflect'
,
'librosa'
,
'unidecode'
,
'numba'
,
'tqdm'
,
'matplotlib'
,
'tensorboardX'
,
'tensorboard'
,
'scipy'
,
'ruamel.yaml'
,
'pandas'
,
],
# Package info
...
...
tests/test_ljspeech.py
浏览文件 @
ec99a42d
...
...
@@ -3,7 +3,7 @@ from parakeet.data.datacargo import DataCargo
from
pathlib
import
Path
LJSPEECH_ROOT
=
Path
(
"/
Users/chenfeiyu/projec
ts/LJSpeech-1.1"
)
LJSPEECH_ROOT
=
Path
(
"/
workspace/datase
ts/LJSpeech-1.1"
)
ljspeech
=
LJSpeech
(
LJSPEECH_ROOT
)
ljspeech_cargo
=
DataCargo
(
ljspeech
,
batch_size
=
16
,
shuffle
=
True
)
for
i
,
batch
in
enumerate
(
ljspeech_cargo
):
...
...
tests/test_vctk.py
0 → 100644
浏览文件 @
ec99a42d
from
parakeet.datasets
import
vctk
from
pathlib
import
Path
from
parakeet.data.datacargo
import
DataCargo
root
=
Path
(
"/workspace/datasets/VCTK-Corpus"
)
vctk_dataset
=
vctk
.
VCTK
(
root
)
vctk_cargo
=
DataCargo
(
vctk_dataset
,
batch_size
=
16
,
shuffle
=
True
,
drop_last
=
True
)
for
i
,
batch
in
enumerate
(
vctk_cargo
):
print
(
i
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录