Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
e3d73acd
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e3d73acd
编写于
8月 17, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix io; add test
上级
4b5410ee
变更
4
展开全部
隐藏空白更改
内联
并排
Showing
4 changed file
with
1173 addition
and
6 deletion
+1173
-6
.bashrc
.bashrc
+10
-0
.notebook/espnet_dataloader.ipynb
.notebook/espnet_dataloader.ipynb
+1157
-0
deepspeech/io/batchfy.py
deepspeech/io/batchfy.py
+5
-5
deepspeech/io/dataset.py
deepspeech/io/dataset.py
+1
-1
未找到文件。
.bashrc
0 → 100755
浏览文件 @
e3d73acd
# Locales
export
LC_ALL
=
en_US.UTF-8
export
LANG
=
en_US.UTF-8
export
LANGUAGE
=
en_US.UTF-8
# Aliases
alias
nvs
=
"nvidia-smi"
alias
rsync
=
"rsync --progress -raz"
alias
his
=
"history"
.notebook/espnet_dataloader.ipynb
0 → 100644
浏览文件 @
e3d73acd
此差异已折叠。
点击以展开。
deepspeech/io/batchfy.py
浏览文件 @
e3d73acd
...
@@ -347,7 +347,7 @@ def make_batchset(
...
@@ -347,7 +347,7 @@ def make_batchset(
Note that if any utts doesn't have "category",
Note that if any utts doesn't have "category",
perform as same as batchfy_by_{count}
perform as same as batchfy_by_{count}
:param
Dict[str,
Dict[str, Any]] data: dictionary loaded from data.json
:param
List[
Dict[str, Any]] data: dictionary loaded from data.json
:param int batch_size: maximum number of sequences in a minibatch.
:param int batch_size: maximum number of sequences in a minibatch.
:param int batch_bins: maximum number of bins (frames x dim) in a minibatch.
:param int batch_bins: maximum number of bins (frames x dim) in a minibatch.
:param int batch_frames_in: maximum number of input frames in a minibatch.
:param int batch_frames_in: maximum number of input frames in a minibatch.
...
@@ -374,7 +374,6 @@ def make_batchset(
...
@@ -374,7 +374,6 @@ def make_batchset(
reserved for future research, -1 means all axis.)
reserved for future research, -1 means all axis.)
:return: List[List[Tuple[str, dict]]] list of batches
:return: List[List[Tuple[str, dict]]] list of batches
"""
"""
# check args
# check args
if
count
not
in
BATCH_COUNT_CHOICES
:
if
count
not
in
BATCH_COUNT_CHOICES
:
raise
ValueError
(
raise
ValueError
(
...
@@ -386,7 +385,6 @@ def make_batchset(
...
@@ -386,7 +385,6 @@ def make_batchset(
ikey
=
"input"
ikey
=
"input"
okey
=
"output"
okey
=
"output"
batch_sort_axis
=
0
# index of list
batch_sort_axis
=
0
# index of list
if
count
==
"auto"
:
if
count
==
"auto"
:
if
batch_size
!=
0
:
if
batch_size
!=
0
:
count
=
"seq"
count
=
"seq"
...
@@ -405,7 +403,8 @@ def make_batchset(
...
@@ -405,7 +403,8 @@ def make_batchset(
"batch_sort_key=shuffle is only available if batch_count=seq"
)
"batch_sort_key=shuffle is only available if batch_count=seq"
)
category2data
=
{}
# Dict[str, dict]
category2data
=
{}
# Dict[str, dict]
for
k
,
v
in
data
.
items
():
for
v
in
data
:
k
=
v
[
'utt'
]
category2data
.
setdefault
(
v
.
get
(
"category"
),
{})[
k
]
=
v
category2data
.
setdefault
(
v
.
get
(
"category"
),
{})[
k
]
=
v
batches_list
=
[]
# List[List[List[Tuple[str, dict]]]]
batches_list
=
[]
# List[List[List[Tuple[str, dict]]]]
...
@@ -422,6 +421,7 @@ def make_batchset(
...
@@ -422,6 +421,7 @@ def make_batchset(
key
=
lambda
data
:
int
(
data
[
1
][
batch_sort_key
][
batch_sort_axis
][
"shape"
][
0
]),
key
=
lambda
data
:
int
(
data
[
1
][
batch_sort_key
][
batch_sort_axis
][
"shape"
][
0
]),
reverse
=
not
shortest_first
,
)
reverse
=
not
shortest_first
,
)
logger
.
info
(
"# utts: "
+
str
(
len
(
sorted_data
)))
logger
.
info
(
"# utts: "
+
str
(
len
(
sorted_data
)))
if
count
==
"seq"
:
if
count
==
"seq"
:
batches
=
batchfy_by_seq
(
batches
=
batchfy_by_seq
(
sorted_data
,
sorted_data
,
...
@@ -466,4 +466,4 @@ def make_batchset(
...
@@ -466,4 +466,4 @@ def make_batchset(
logger
.
info
(
"# minibatches: "
+
str
(
len
(
batches
)))
logger
.
info
(
"# minibatches: "
+
str
(
len
(
batches
)))
# batch: List[List[Tuple[str, dict]]]
# batch: List[List[Tuple[str, dict]]]
return
batches
return
batches
\ No newline at end of file
deepspeech/io/dataset.py
浏览文件 @
e3d73acd
...
@@ -16,7 +16,7 @@ from typing import Optional
...
@@ -16,7 +16,7 @@ from typing import Optional
from
paddle.io
import
Dataset
from
paddle.io
import
Dataset
from
yacs.config
import
CfgNode
from
yacs.config
import
CfgNode
from
deepspeech.frontend.utility
import
read_manifest
from
deepspeech.utils.log
import
Log
from
deepspeech.utils.log
import
Log
__all__
=
[
"ManifestDataset"
,
"TripletManifestDataset"
,
"TransformDataset"
]
__all__
=
[
"ManifestDataset"
,
"TripletManifestDataset"
,
"TransformDataset"
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录