Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
91115ab6
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
91115ab6
编写于
2月 28, 2017
作者:
Y
Yi Wang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Use module name and raw data filename as the local filename
上级
37e2b920
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
25 addition
and
29 deletion
+25
-29
python/paddle/v2/dataset/common.py
python/paddle/v2/dataset/common.py
+16
-26
python/paddle/v2/dataset/tests/common_test.py
python/paddle/v2/dataset/tests/common_test.py
+9
-3
未找到文件。
python/paddle/v2/dataset/common.py
浏览文件 @
91115ab6
import
requests
import
hashlib
import
os
import
shutil
import
urllib2
__all__
=
[
'DATA_HOME'
,
'download'
,
'md5file'
]
...
...
@@ -11,31 +11,6 @@ if not os.path.exists(DATA_HOME):
os
.
makedirs
(
DATA_HOME
)
def
download
(
url
,
package_name
,
md5
):
filename
=
os
.
path
.
split
(
url
)[
-
1
]
assert
DATA_HOME
is
not
None
filepath
=
os
.
path
.
join
(
DATA_HOME
,
md5
)
if
not
os
.
path
.
exists
(
filepath
):
os
.
makedirs
(
filepath
)
__full_file__
=
os
.
path
.
join
(
filepath
,
filename
)
def
__file_ok__
():
if
not
os
.
path
.
exists
(
__full_file__
):
return
False
md5_hash
=
hashlib
.
md5
()
with
open
(
__full_file__
,
'rb'
)
as
f
:
for
chunk
in
iter
(
lambda
:
f
.
read
(
4096
),
b
""
):
md5_hash
.
update
(
chunk
)
return
md5_hash
.
hexdigest
()
==
md5
while
not
__file_ok__
():
response
=
urllib2
.
urlopen
(
url
)
with
open
(
__full_file__
,
mode
=
'wb'
)
as
of
:
shutil
.
copyfileobj
(
fsrc
=
response
,
fdst
=
of
)
return
__full_file__
def
md5file
(
fname
):
hash_md5
=
hashlib
.
md5
()
f
=
open
(
fname
,
"rb"
)
...
...
@@ -43,3 +18,18 @@ def md5file(fname):
hash_md5
.
update
(
chunk
)
f
.
close
()
return
hash_md5
.
hexdigest
()
def
download
(
url
,
module_name
,
md5sum
):
dirname
=
os
.
path
.
join
(
DATA_HOME
,
module_name
)
if
not
os
.
path
.
exists
(
dirname
):
os
.
makedirs
(
dirname
)
filename
=
os
.
path
.
join
(
dirname
,
url
.
split
(
'/'
)[
-
1
])
if
not
(
os
.
path
.
exists
(
filename
)
and
md5file
(
filename
)
==
md5sum
):
# If file doesn't exist or MD5 doesn't match, then download.
r
=
requests
.
get
(
url
,
stream
=
True
)
with
open
(
filename
,
'w'
)
as
f
:
shutil
.
copyfileobj
(
r
.
raw
,
f
)
return
filename
python/paddle/v2/dataset/tests/common_test.py
浏览文件 @
91115ab6
...
...
@@ -5,12 +5,18 @@ import tempfile
class
TestCommon
(
unittest
.
TestCase
):
def
test_md5file
(
self
):
_
,
temp_path
=
tempfile
.
mkstemp
()
f
=
open
(
temp_path
,
'w'
)
f
.
write
(
"Hello
\n
"
)
f
.
close
()
with
open
(
temp_path
,
'w'
)
as
f
:
f
.
write
(
"Hello
\n
"
)
self
.
assertEqual
(
'09f7e02f1290be211da707a266f153b3'
,
paddle
.
v2
.
dataset
.
common
.
md5file
(
temp_path
))
def
test_download
(
self
):
yi_avatar
=
'https://avatars0.githubusercontent.com/u/1548775?v=3&s=460'
self
.
assertEqual
(
paddle
.
v2
.
dataset
.
common
.
DATA_HOME
+
'/test/1548775?v=3&s=460'
,
paddle
.
v2
.
dataset
.
common
.
download
(
yi_avatar
,
'test'
,
'f75287202d6622414c706c36c16f8e0d'
))
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录