Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
c948e498
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c948e498
编写于
5月 07, 2020
作者:
Y
yaoxuefeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add tools for dataset download
上级
11360f6c
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
133 addition
and
0 deletion
+133
-0
tools/tools.py
tools/tools.py
+133
-0
未找到文件。
tools/tools.py
0 → 100644
浏览文件 @
c948e498
import
os
import
time
import
shutil
import
requests
import
sys
import
tarfile
import
zipfile
import
platform
import
functools
lasttime
=
time
.
time
()
FLUSH_INTERVAL
=
0.1
LOCAL_PATH
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
def
get_platform
():
return
platform
.
platform
()
def
is_windows
():
return
get_platform
().
lower
().
startswith
(
"windows"
)
def
progress
(
str
,
end
=
False
):
global
lasttime
if
end
:
str
+=
"
\n
"
lasttime
=
0
if
time
.
time
()
-
lasttime
>=
FLUSH_INTERVAL
:
sys
.
stdout
.
write
(
"
\r
%s"
%
str
)
lasttime
=
time
.
time
()
sys
.
stdout
.
flush
()
def
download_file
(
url
,
savepath
,
print_progress
):
r
=
requests
.
get
(
url
,
stream
=
True
)
total_length
=
r
.
headers
.
get
(
'content-length'
)
if
total_length
is
None
:
with
open
(
savepath
,
'wb'
)
as
f
:
shutil
.
copyfileobj
(
r
.
raw
,
f
)
else
:
with
open
(
savepath
,
'wb'
)
as
f
:
dl
=
0
total_length
=
int
(
total_length
)
starttime
=
time
.
time
()
if
print_progress
:
print
(
"Downloading %s"
%
os
.
path
.
basename
(
savepath
))
for
data
in
r
.
iter_content
(
chunk_size
=
4096
):
dl
+=
len
(
data
)
f
.
write
(
data
)
if
print_progress
:
done
=
int
(
50
*
dl
/
total_length
)
progress
(
"[%-50s] %.2f%%"
%
(
'='
*
done
,
float
(
100
*
dl
)
/
total_length
))
if
print_progress
:
progress
(
"[%-50s] %.2f%%"
%
(
'='
*
50
,
100
),
end
=
True
)
def
_uncompress_file
(
filepath
,
extrapath
,
delete_file
,
print_progress
):
if
print_progress
:
print
(
"Uncompress %s"
%
os
.
path
.
basename
(
filepath
))
if
filepath
.
endswith
(
"zip"
):
handler
=
_uncompress_file_zip
elif
filepath
.
endswith
(
"tgz"
):
handler
=
_uncompress_file_tar
else
:
handler
=
functools
.
partial
(
_uncompress_file_tar
,
mode
=
"r"
)
for
total_num
,
index
,
rootpath
in
handler
(
filepath
,
extrapath
):
if
print_progress
:
done
=
int
(
50
*
float
(
index
)
/
total_num
)
progress
(
"[%-50s] %.2f%%"
%
(
'='
*
done
,
float
(
100
*
index
)
/
total_num
))
if
print_progress
:
progress
(
"[%-50s] %.2f%%"
%
(
'='
*
50
,
100
),
end
=
True
)
if
delete_file
:
os
.
remove
(
filepath
)
return
rootpath
def
_uncompress_file_zip
(
filepath
,
extrapath
):
files
=
zipfile
.
ZipFile
(
filepath
,
'r'
)
filelist
=
files
.
namelist
()
rootpath
=
filelist
[
0
]
total_num
=
len
(
filelist
)
for
index
,
file
in
enumerate
(
filelist
):
files
.
extract
(
file
,
extrapath
)
yield
total_num
,
index
,
rootpath
files
.
close
()
yield
total_num
,
index
,
rootpath
def
_uncompress_file_tar
(
filepath
,
extrapath
,
mode
=
"r:gz"
):
files
=
tarfile
.
open
(
filepath
,
mode
)
filelist
=
files
.
getnames
()
total_num
=
len
(
filelist
)
rootpath
=
filelist
[
0
]
for
index
,
file
in
enumerate
(
filelist
):
files
.
extract
(
file
,
extrapath
)
yield
total_num
,
index
,
rootpath
files
.
close
()
yield
total_num
,
index
,
rootpath
def
download_file_and_uncompress
(
url
,
savepath
=
None
,
savename
=
None
,
extrapath
=
None
,
print_progress
=
True
,
cover
=
False
,
delete_file
=
False
):
if
savepath
is
None
:
savepath
=
"."
if
extrapath
is
None
:
extrapath
=
"."
if
savename
is
None
:
savename
=
url
.
split
(
"/"
)[
-
1
]
savepath
=
os
.
path
.
join
(
savepath
,
savename
)
if
cover
:
if
os
.
path
.
exists
(
savepath
):
shutil
.
rmtree
(
savepath
)
if
not
os
.
path
.
exists
(
savepath
):
download_file
(
url
,
savepath
,
print_progress
)
_
=
_uncompress_file
(
savepath
,
extrapath
,
delete_file
,
print_progress
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录