Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
de95bb4f
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 1 年 前同步成功
通知
281
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
de95bb4f
编写于
3月 14, 2019
作者:
W
wuzewu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update downloader
上级
cb09f5f4
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
81 addition
and
65 deletion
+81
-65
paddle_hub/module/module.py
paddle_hub/module/module.py
+3
-2
paddle_hub/tools/downloader.py
paddle_hub/tools/downloader.py
+78
-63
未找到文件。
paddle_hub/module/module.py
浏览文件 @
de95bb4f
...
@@ -17,7 +17,7 @@ from __future__ import division
...
@@ -17,7 +17,7 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
from
paddle_hub.tools
import
utils
from
paddle_hub.tools
import
utils
from
paddle_hub.tools.logger
import
logger
from
paddle_hub.tools.logger
import
logger
from
paddle_hub.tools
import
downloader
from
paddle_hub.tools
.downloader
import
default_
downloader
from
paddle_hub.tools
import
paddle_helper
from
paddle_hub.tools
import
paddle_helper
from
paddle_hub.module
import
module_desc_pb2
from
paddle_hub.module
import
module_desc_pb2
from
paddle_hub.module.signature
import
Signature
,
create_signature
from
paddle_hub.module.signature
import
Signature
,
create_signature
...
@@ -116,7 +116,8 @@ class Module:
...
@@ -116,7 +116,8 @@ class Module:
def
_init_with_url
(
self
,
url
):
def
_init_with_url
(
self
,
url
):
utils
.
check_url_valid
(
url
)
utils
.
check_url_valid
(
url
)
module_dir
=
downloader
.
download_and_uncompress
(
module_url
)
module_dir
=
default_downloader
.
download_file_and_uncompress
(
module_url
,
save_path
=
"."
)
self
.
_init_with_module_file
(
module_dir
)
self
.
_init_with_module_file
(
module_dir
)
def
_dump_processor
(
self
):
def
_dump_processor
(
self
):
...
...
paddle_hub/tools/downloader.py
浏览文件 @
de95bb4f
...
@@ -23,11 +23,13 @@ import hashlib
...
@@ -23,11 +23,13 @@ import hashlib
import
requests
import
requests
import
tempfile
import
tempfile
import
tarfile
import
tarfile
from
paddle_hub.tools
import
utils
from
paddle_hub.tools.logger
import
logger
__all__
=
[
'MODULE_HOME'
,
'download
'
,
'md5file'
,
'download_and_uncompress
'
]
__all__
=
[
'MODULE_HOME'
,
'download
er'
,
'md5file'
,
'Downloader
'
]
# TODO(ZeyuChen) add environment varialble to set MODULE_HOME
# TODO(ZeyuChen) add environment varialble to set MODULE_HOME
MODULE_HOME
=
os
.
path
.
expanduser
(
'~/.
cache/paddle
/module'
)
MODULE_HOME
=
os
.
path
.
expanduser
(
'~/.
hub
/module'
)
# When running unit tests, there could be multiple processes that
# When running unit tests, there could be multiple processes that
...
@@ -53,64 +55,77 @@ def md5file(fname):
...
@@ -53,64 +55,77 @@ def md5file(fname):
return
hash_md5
.
hexdigest
()
return
hash_md5
.
hexdigest
()
def
download_and_uncompress
(
url
,
save_name
=
None
):
class
Downloader
:
module_name
=
url
.
split
(
"/"
)[
-
2
]
def
__init__
(
self
,
module_home
=
None
):
dirname
=
os
.
path
.
join
(
MODULE_HOME
,
module_name
)
self
.
module_home
=
module_home
if
module_home
else
MODULE_HOME
if
not
os
.
path
.
exists
(
dirname
):
os
.
makedirs
(
dirname
)
def
download_file
(
self
,
url
,
save_path
=
None
,
save_name
=
None
,
retry_limit
=
3
):
module_name
=
url
.
split
(
"/"
)[
-
2
]
#TODO(ZeyuChen) add download md5 file to verify file completeness
save_path
=
self
.
module_home
if
save_path
is
None
else
save_path
file_name
=
os
.
path
.
join
(
if
not
os
.
path
.
exists
(
save_path
):
dirname
,
utils
.
mkdir
(
save_path
)
url
.
split
(
'/'
)[
-
1
]
if
save_name
is
None
else
save_name
)
save_name
=
url
.
split
(
'/'
)[
-
1
]
if
save_name
is
None
else
save_name
file_name
=
os
.
path
.
join
(
save_path
,
save_name
)
retry
=
0
retry_times
=
0
retry_limit
=
3
while
not
(
os
.
path
.
exists
(
file_name
)):
while
not
(
os
.
path
.
exists
(
file_name
)):
if
os
.
path
.
exists
(
file_name
):
if
os
.
path
.
exists
(
file_name
):
logger
.
info
(
"file md5"
,
md5file
(
file_name
))
print
(
"file md5"
,
md5file
(
file_name
))
if
retry_times
<
retry_limit
:
if
retry
<
retry_limit
:
retry_times
+=
1
retry
+=
1
else
:
else
:
raise
RuntimeError
(
raise
RuntimeError
(
"Cannot download {0} within retry limit {1}"
.
format
(
"Cannot download {0} within retry limit {1}"
.
format
(
url
,
retry_limit
))
url
,
retry_limit
))
logger
.
info
(
print
(
"Cache file %s not found, downloading %s"
%
(
file_name
,
url
))
"Cache file %s not found, downloading %s"
%
(
file_name
,
url
))
r
=
requests
.
get
(
url
,
stream
=
True
)
r
=
requests
.
get
(
url
,
stream
=
True
)
total_length
=
r
.
headers
.
get
(
'content-length'
)
total_length
=
r
.
headers
.
get
(
'content-length'
)
if
total_length
is
None
:
if
total_length
is
None
:
with
open
(
file_name
,
'wb'
)
as
f
:
with
open
(
file_name
,
'wb'
)
as
f
:
shutil
.
copyfileobj
(
r
.
raw
,
f
)
shutil
.
copyfileobj
(
r
.
raw
,
f
)
else
:
else
:
#TODO(ZeyuChen) upgrade to tqdm process
#TODO(ZeyuChen) upgrade to tqdm process
with
open
(
file_name
,
'wb'
)
as
f
:
with
open
(
file_name
,
'wb'
)
as
f
:
dl
=
0
dl
=
0
total_length
=
int
(
total_length
)
total_length
=
int
(
total_length
)
for
data
in
r
.
iter_content
(
chunk_size
=
4096
):
for
data
in
r
.
iter_content
(
chunk_size
=
4096
):
dl
+=
len
(
data
)
dl
+=
len
(
data
)
f
.
write
(
data
)
f
.
write
(
data
)
done
=
int
(
50
*
dl
/
total_length
)
done
=
int
(
50
*
dl
/
total_length
)
sys
.
stdout
.
write
(
sys
.
stdout
.
write
(
"
\r
[%s%s]"
%
(
'='
*
done
,
' '
*
(
50
-
done
)))
"
\r
[%s%s]"
%
(
'='
*
done
,
' '
*
(
50
-
done
)))
sys
.
stdout
.
flush
()
sys
.
stdout
.
flush
()
print
(
"file download completed!"
,
file_name
)
logger
.
info
(
"file %s download completed!"
%
(
file_name
))
#TODO(ZeyuChen) add md5 check error and file incompleted error, then raise
return
file_name
# them and catch them
with
tarfile
.
open
(
file_name
,
"r:gz"
)
as
tar
:
def
uncompress
(
self
,
file
,
dirname
=
None
,
delete_file
=
False
):
file_names
=
tar
.
getnames
()
dirname
=
os
.
path
.
dirname
(
file
)
if
dirname
is
None
else
dirname
print
(
file_names
)
with
tarfile
.
open
(
file
,
"r:gz"
)
as
tar
:
module_dir
=
os
.
path
.
join
(
dirname
,
file_names
[
0
])
file_names
=
tar
.
getnames
()
for
file_name
in
file_names
:
logger
.
info
(
file_names
)
tar
.
extract
(
file_name
,
dirname
)
module_dir
=
os
.
path
.
join
(
dirname
,
file_names
[
0
])
for
file_name
in
file_names
:
return
module_dir
tar
.
extract
(
file_name
,
dirname
)
if
delete_file
:
if
__name__
==
"__main__"
:
os
.
remove
(
file
)
# TODO(ZeyuChen) add unit test
link
=
"http://paddlehub.bj.bcebos.com/word2vec/word2vec-dim16-simple-example-1.tar.gz"
return
module_dir
module_path
=
download_and_uncompress
(
link
)
def
download_file_and_uncompress
(
self
,
print
(
"module path"
,
module_path
)
url
,
save_path
=
None
,
save_name
=
None
,
retry_limit
=
3
,
delete_file
=
True
):
file
=
self
.
download_file
(
url
=
url
,
save_path
=
save_path
,
save_name
=
save_name
,
retry_limit
=
retry_limit
)
return
self
.
uncompress
(
file
,
delete_file
=
delete_file
)
default_downloader
=
Downloader
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录