Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
d1420d12
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
1 年多 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d1420d12
编写于
9月 19, 2017
作者:
Y
yangyaming
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Extract common utility functions.
上级
69e0d86d
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
28 addition
and
28 deletion
+28
-28
data/librispeech/librispeech.py
data/librispeech/librispeech.py
+1
-28
data_utils/utility.py
data_utils/utility.py
+27
-0
未找到文件。
data/librispeech/librispeech.py
浏览文件 @
d1420d12
...
...
@@ -12,12 +12,12 @@ from __future__ import print_function
import
distutils.util
import
os
import
sys
import
tarfile
import
argparse
import
soundfile
import
json
import
codecs
from
paddle.v2.dataset.common
import
md5file
from
data_utils.utility
import
download
,
unpack
DATA_HOME
=
os
.
path
.
expanduser
(
'~/.cache/paddle/dataset/speech'
)
...
...
@@ -59,33 +59,6 @@ parser.add_argument(
args
=
parser
.
parse_args
()
def
download
(
url
,
md5sum
,
target_dir
):
"""
Download file from url to target_dir, and check md5sum.
"""
if
not
os
.
path
.
exists
(
target_dir
):
os
.
makedirs
(
target_dir
)
filepath
=
os
.
path
.
join
(
target_dir
,
url
.
split
(
"/"
)[
-
1
])
if
not
(
os
.
path
.
exists
(
filepath
)
and
md5file
(
filepath
)
==
md5sum
):
print
(
"Downloading %s ..."
%
url
)
os
.
system
(
"wget -c "
+
url
+
" -P "
+
target_dir
)
print
(
"
\n
MD5 Chesksum %s ..."
%
filepath
)
if
not
md5file
(
filepath
)
==
md5sum
:
raise
RuntimeError
(
"MD5 checksum failed."
)
else
:
print
(
"File exists, skip downloading. (%s)"
%
filepath
)
return
filepath
def
unpack
(
filepath
,
target_dir
):
"""
Unpack the file to the target_dir.
"""
print
(
"Unpacking %s ..."
%
filepath
)
tar
=
tarfile
.
open
(
filepath
)
tar
.
extractall
(
target_dir
)
tar
.
close
()
def
create_manifest
(
data_dir
,
manifest_path
):
"""
Create a manifest json file summarizing the data set, with each line
...
...
data_utils/utility.py
浏览文件 @
d1420d12
...
...
@@ -5,6 +5,8 @@ from __future__ import print_function
import
json
import
codecs
import
os
import
tarfile
def
read_manifest
(
manifest_path
,
max_duration
=
float
(
'inf'
),
min_duration
=
0.0
):
...
...
@@ -33,3 +35,28 @@ def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0):
json_data
[
"duration"
]
>=
min_duration
):
manifest
.
append
(
json_data
)
return
manifest
def
download
(
url
,
md5sum
,
target_dir
):
"""Download file from url to target_dir, and check md5sum."""
if
not
os
.
path
.
exists
(
target_dir
):
os
.
makedirs
(
target_dir
)
filepath
=
os
.
path
.
join
(
target_dir
,
url
.
split
(
"/"
)[
-
1
])
if
not
(
os
.
path
.
exists
(
filepath
)
and
md5file
(
filepath
)
==
md5sum
):
print
(
"Downloading %s ..."
%
url
)
os
.
system
(
"wget -c "
+
url
+
" -P "
+
target_dir
)
print
(
"
\n
MD5 Chesksum %s ..."
%
filepath
)
if
not
md5file
(
filepath
)
==
md5sum
:
raise
RuntimeError
(
"MD5 checksum failed."
)
else
:
print
(
"File exists, skip downloading. (%s)"
%
filepath
)
return
filepath
def
unpack
(
filepath
,
target_dir
,
rm_tar
=
False
):
"""Unpack the file to the target_dir."""
print
(
"Unpacking %s ..."
%
filepath
)
tar
=
tarfile
.
open
(
filepath
)
tar
.
extractall
(
target_dir
)
tar
.
close
()
if
rm_tar
==
True
:
os
.
remove
(
filepath
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录