Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PALM
提交
82874d8f
P
PALM
项目概览
PaddlePaddle
/
PALM
通知
4
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
10
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PALM
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
10
Issue
10
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
82874d8f
编写于
4月 15, 2020
作者:
X
Xiaoyao Xi
提交者:
GitHub
4月 15, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #78 from wangxiao1021/api
update downloaders
上级
4dac1032
589a5f77
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
99 addition
and
163 deletion
+99
-163
examples/classification/download.py
examples/classification/download.py
+15
-23
examples/matching/download.py
examples/matching/download.py
+15
-19
examples/mrc/download.py
examples/mrc/download.py
+15
-22
examples/multi-task/download.py
examples/multi-task/download.py
+15
-22
examples/predict/download.py
examples/predict/download.py
+15
-23
examples/tagging/download.py
examples/tagging/download.py
+15
-23
paddlepalm/_downloader.py
paddlepalm/_downloader.py
+9
-31
未找到文件。
examples/classification/download.py
浏览文件 @
82874d8f
# -*- coding: utf-8 -*-
from
__future__
import
print_function
import
os
import
requests
import
tarfile
import
shutil
from
tqdm
import
tqdm
import
sys
import
urllib
URLLIB
=
urllib
if
sys
.
version_info
>=
(
3
,
0
):
import
urllib.request
URLLIB
=
urllib
.
request
def
download
(
src
,
url
):
file_size
=
int
(
requests
.
head
(
url
).
headers
[
'Content-Length'
])
header
=
{
'user-agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'70.0.3538.67 Safari/537.36'
}
pbar
=
tqdm
(
total
=
file_size
)
resp
=
requests
.
get
(
url
,
headers
=
header
,
stream
=
True
)
with
open
(
src
,
'ab'
)
as
f
:
for
chunk
in
resp
.
iter_content
(
chunk_size
=
1024
):
if
chunk
:
f
.
write
(
chunk
)
pbar
.
update
(
1024
)
pbar
.
close
()
return
file_size
def
_reporthook
(
count
,
chunk_size
,
total_size
):
bytes_so_far
=
count
*
chunk_size
percent
=
float
(
bytes_so_far
)
/
float
(
total_size
)
if
percent
>
1
:
percent
=
1
print
(
'
\r
>> Downloading... {:.1%}'
.
format
(
percent
),
end
=
""
)
URLLIB
.
urlretrieve
(
url
,
src
,
reporthook
=
_reporthook
)
abs_path
=
os
.
path
.
abspath
(
__file__
)
download_url
=
"https://ernie.bj.bcebos.com/task_data_zh.tgz"
...
...
@@ -46,5 +39,4 @@ for file in os.listdir(os.path.join(target_dir, 'task_data', 'chnsenticorp')):
shutil
.
move
(
os
.
path
.
join
(
target_dir
,
'task_data'
,
'chnsenticorp'
,
file
),
dst_dir
)
shutil
.
rmtree
(
os
.
path
.
join
(
target_dir
,
'task_data'
))
print
(
" done!"
)
examples/matching/download.py
浏览文件 @
82874d8f
# -*- coding: utf-8 -*-
from
__future__
import
print_function
import
os
import
requests
from
tqdm
import
tqdm
import
sys
import
urllib
URLLIB
=
urllib
if
sys
.
version_info
>=
(
3
,
0
):
import
urllib.request
URLLIB
=
urllib
.
request
def
download
(
src
,
url
):
file_size
=
int
(
requests
.
head
(
url
).
headers
[
'Content-Length'
])
header
=
{
'user-agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'70.0.3538.67 Safari/537.36'
}
pbar
=
tqdm
(
total
=
file_size
)
resp
=
requests
.
get
(
url
,
headers
=
header
,
stream
=
True
)
with
open
(
src
,
'ab'
)
as
f
:
for
chunk
in
resp
.
iter_content
(
chunk_size
=
1024
):
if
chunk
:
f
.
write
(
chunk
)
pbar
.
update
(
1024
)
def
_reporthook
(
count
,
chunk_size
,
total_size
):
bytes_so_far
=
count
*
chunk_size
percent
=
float
(
bytes_so_far
)
/
float
(
total_size
)
if
percent
>
1
:
percent
=
1
print
(
'
\r
>> Downloading... {:.1%}'
.
format
(
percent
),
end
=
""
)
pbar
.
close
()
return
file_size
URLLIB
.
urlretrieve
(
url
,
src
,
reporthook
=
_reporthook
)
abs_path
=
os
.
path
.
abspath
(
__file__
)
...
...
@@ -32,3 +27,4 @@ if not os.path.exists(data_dir) or not os.path.isdir(data_dir):
download_url
=
"http://qim.fs.quoracdn.net/quora_duplicate_questions.tsv"
downlaod_path
=
os
.
path
.
join
(
data_dir
,
"quora_duplicate_questions.tsv"
)
download
(
downlaod_path
,
download_url
)
print
(
" done!"
)
examples/mrc/download.py
浏览文件 @
82874d8f
# -*- coding: utf-8 -*-
from
__future__
import
print_function
import
os
import
requests
import
tarfile
import
shutil
from
tqdm
import
tqdm
import
sys
import
urllib
URLLIB
=
urllib
if
sys
.
version_info
>=
(
3
,
0
):
import
urllib.request
URLLIB
=
urllib
.
request
def
download
(
src
,
url
):
file_size
=
int
(
requests
.
head
(
url
).
headers
[
'Content-Length'
])
header
=
{
'user-agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'70.0.3538.67 Safari/537.36'
}
pbar
=
tqdm
(
total
=
file_size
)
resp
=
requests
.
get
(
url
,
headers
=
header
,
stream
=
True
)
with
open
(
src
,
'ab'
)
as
f
:
for
chunk
in
resp
.
iter_content
(
chunk_size
=
1024
):
if
chunk
:
f
.
write
(
chunk
)
pbar
.
update
(
1024
)
pbar
.
close
()
return
file_size
def
_reporthook
(
count
,
chunk_size
,
total_size
):
bytes_so_far
=
count
*
chunk_size
percent
=
float
(
bytes_so_far
)
/
float
(
total_size
)
if
percent
>
1
:
percent
=
1
print
(
'
\r
>> Downloading... {:.1%}'
.
format
(
percent
),
end
=
""
)
URLLIB
.
urlretrieve
(
url
,
src
,
reporthook
=
_reporthook
)
abs_path
=
os
.
path
.
abspath
(
__file__
)
download_url
=
"https://ernie.bj.bcebos.com/task_data_zh.tgz"
...
...
@@ -46,5 +39,5 @@ for file in os.listdir(os.path.join(target_dir, 'task_data', 'cmrc2018')):
shutil
.
move
(
os
.
path
.
join
(
target_dir
,
'task_data'
,
'cmrc2018'
,
file
),
dst_dir
)
shutil
.
rmtree
(
os
.
path
.
join
(
target_dir
,
'task_data'
))
print
(
" done!"
)
examples/multi-task/download.py
浏览文件 @
82874d8f
# -*- coding: utf-8 -*-
from
__future__
import
print_function
import
os
import
requests
import
tarfile
import
shutil
from
tqdm
import
tqdm
import
sys
import
urllib
URLLIB
=
urllib
if
sys
.
version_info
>=
(
3
,
0
):
import
urllib.request
URLLIB
=
urllib
.
request
def
download
(
src
,
url
):
file_size
=
int
(
requests
.
head
(
url
).
headers
[
'Content-Length'
])
header
=
{
'user-agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'70.0.3538.67 Safari/537.36'
}
pbar
=
tqdm
(
total
=
file_size
)
resp
=
requests
.
get
(
url
,
headers
=
header
,
stream
=
True
)
with
open
(
src
,
'ab'
)
as
f
:
for
chunk
in
resp
.
iter_content
(
chunk_size
=
1024
):
if
chunk
:
f
.
write
(
chunk
)
pbar
.
update
(
1024
)
pbar
.
close
()
return
file_size
def
_reporthook
(
count
,
chunk_size
,
total_size
):
bytes_so_far
=
count
*
chunk_size
percent
=
float
(
bytes_so_far
)
/
float
(
total_size
)
if
percent
>
1
:
percent
=
1
print
(
'
\r
>> Downloading... {:.1%}'
.
format
(
percent
),
end
=
""
)
URLLIB
.
urlretrieve
(
url
,
src
,
reporthook
=
_reporthook
)
abs_path
=
os
.
path
.
abspath
(
__file__
)
download_url
=
"https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz"
...
...
@@ -42,4 +35,4 @@ shutil.rmtree(os.path.join(target_dir, 'data/mrda/'))
shutil
.
rmtree
(
os
.
path
.
join
(
target_dir
,
'data/multi-woz/'
))
shutil
.
rmtree
(
os
.
path
.
join
(
target_dir
,
'data/swda/'
))
shutil
.
rmtree
(
os
.
path
.
join
(
target_dir
,
'data/udc/'
))
print
(
" done!"
)
examples/predict/download.py
浏览文件 @
82874d8f
# -*- coding: utf-8 -*-
from
__future__
import
print_function
import
os
import
requests
import
tarfile
import
shutil
from
tqdm
import
tqdm
import
sys
import
urllib
URLLIB
=
urllib
if
sys
.
version_info
>=
(
3
,
0
):
import
urllib.request
URLLIB
=
urllib
.
request
def
download
(
src
,
url
):
file_size
=
int
(
requests
.
head
(
url
).
headers
[
'Content-Length'
])
header
=
{
'user-agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'70.0.3538.67 Safari/537.36'
}
pbar
=
tqdm
(
total
=
file_size
)
resp
=
requests
.
get
(
url
,
headers
=
header
,
stream
=
True
)
with
open
(
src
,
'ab'
)
as
f
:
for
chunk
in
resp
.
iter_content
(
chunk_size
=
1024
):
if
chunk
:
f
.
write
(
chunk
)
pbar
.
update
(
1024
)
pbar
.
close
()
return
file_size
def
_reporthook
(
count
,
chunk_size
,
total_size
):
bytes_so_far
=
count
*
chunk_size
percent
=
float
(
bytes_so_far
)
/
float
(
total_size
)
if
percent
>
1
:
percent
=
1
print
(
'
\r
>> Downloading... {:.1%}'
.
format
(
percent
),
end
=
""
)
URLLIB
.
urlretrieve
(
url
,
src
,
reporthook
=
_reporthook
)
abs_path
=
os
.
path
.
abspath
(
__file__
)
download_url
=
"https://ernie.bj.bcebos.com/task_data_zh.tgz"
...
...
@@ -46,5 +39,4 @@ for file in os.listdir(os.path.join(target_dir, 'task_data', 'chnsenticorp')):
shutil
.
move
(
os
.
path
.
join
(
target_dir
,
'task_data'
,
'chnsenticorp'
,
file
),
dst_dir
)
shutil
.
rmtree
(
os
.
path
.
join
(
target_dir
,
'task_data'
))
print
(
" done!"
)
examples/tagging/download.py
浏览文件 @
82874d8f
# -*- coding: utf-8 -*-
from
__future__
import
print_function
import
os
import
requests
import
tarfile
import
shutil
from
tqdm
import
tqdm
import
sys
import
urllib
URLLIB
=
urllib
if
sys
.
version_info
>=
(
3
,
0
):
import
urllib.request
URLLIB
=
urllib
.
request
def
download
(
src
,
url
):
file_size
=
int
(
requests
.
head
(
url
).
headers
[
'Content-Length'
])
header
=
{
'user-agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'70.0.3538.67 Safari/537.36'
}
pbar
=
tqdm
(
total
=
file_size
)
resp
=
requests
.
get
(
url
,
headers
=
header
,
stream
=
True
)
with
open
(
src
,
'ab'
)
as
f
:
for
chunk
in
resp
.
iter_content
(
chunk_size
=
1024
):
if
chunk
:
f
.
write
(
chunk
)
pbar
.
update
(
1024
)
pbar
.
close
()
return
file_size
def
_reporthook
(
count
,
chunk_size
,
total_size
):
bytes_so_far
=
count
*
chunk_size
percent
=
float
(
bytes_so_far
)
/
float
(
total_size
)
if
percent
>
1
:
percent
=
1
print
(
'
\r
>> Downloading... {:.1%}'
.
format
(
percent
),
end
=
""
)
URLLIB
.
urlretrieve
(
url
,
src
,
reporthook
=
_reporthook
)
abs_path
=
os
.
path
.
abspath
(
__file__
)
download_url
=
"https://ernie.bj.bcebos.com/task_data_zh.tgz"
...
...
@@ -46,5 +39,4 @@ for file in os.listdir(os.path.join(target_dir, 'task_data', 'msra_ner')):
shutil
.
move
(
os
.
path
.
join
(
target_dir
,
'task_data'
,
'msra_ner'
,
file
),
dst_dir
)
shutil
.
rmtree
(
os
.
path
.
join
(
target_dir
,
'task_data'
))
print
(
" done!"
)
paddlepalm/_downloader.py
浏览文件 @
82874d8f
...
...
@@ -15,23 +15,18 @@
from
__future__
import
print_function
import
os
import
requests
import
tarfile
import
shutil
try
:
from
urllib.request
import
urlopen
# Python 3
except
ImportError
:
from
urllib2
import
urlopen
# Python 2
from
collections
import
OrderedDict
import
ssl
import
sys
import
urllib
URLLIB
=
urllib
if
sys
.
version_info
>=
(
3
,
0
):
import
urllib.request
URLLIB
=
urllib
.
request
__all__
=
[
"download"
,
"ls"
]
# for https
ssl
.
_create_default_https_context
=
ssl
.
_create_unverified_context
_pretrain
=
((
'RoBERTa-zh-base'
,
'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_ext_L-12_H-768_A-12.tar.gz'
),
(
'RoBERTa-zh-large'
,
'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.tar.gz'
),
(
'ERNIE-v2-en-base'
,
'https://ernie.bj.bcebos.com/ERNIE_Base_en_stable-2.0.0.tar.gz'
),
...
...
@@ -76,32 +71,15 @@ def _download(item, scope, path, silent=False, convert=False):
filename
=
data_dir
+
'/'
+
data_name
# print process
def
_chunk_report
(
bytes_so_far
,
total_size
):
def
_reporthook
(
count
,
chunk_size
,
total_size
):
bytes_so_far
=
count
*
chunk_size
percent
=
float
(
bytes_so_far
)
/
float
(
total_size
)
if
percent
>
1
:
percent
=
1
if
not
silent
:
print
(
'
\r
>> Downloading... {:.1%}'
.
format
(
percent
),
end
=
""
)
# copy to local
def
_chunk_read
(
response
,
url
,
chunk_size
=
16
*
1024
,
report_hook
=
None
):
total_size
=
int
(
requests
.
head
(
url
).
headers
[
'Content-Length'
])
bytes_so_far
=
0
with
open
(
"%s"
%
filename
,
"wb"
)
as
f
:
while
1
:
chunk
=
response
.
read
(
chunk_size
)
f
.
write
(
chunk
)
f
.
flush
()
bytes_so_far
+=
len
(
chunk
)
if
not
chunk
:
break
if
report_hook
:
report_hook
(
bytes_so_far
,
total_size
)
return
bytes_so_far
response
=
urlopen
(
data_url
)
_chunk_read
(
response
,
data_url
,
report_hook
=
_chunk_report
)
URLLIB
.
urlretrieve
(
data_url
,
filename
,
reporthook
=
_reporthook
)
if
not
silent
:
print
(
' done!'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录