Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
ERNIE
提交
fdb59529
E
ERNIE
项目概览
PaddlePaddle
/
ERNIE
大约 1 年 前同步成功
通知
109
Star
5997
Fork
1270
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
29
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
E
ERNIE
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
29
Issue
29
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fdb59529
编写于
5月 24, 2021
作者:
N
nbcc
提交者:
GitHub
5月 24, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #673 from Meiyim/multihead-download
Multihead download
上级
689b9beb
a898bb29
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
38 addition
and
33 deletion
+38
-33
ernie/__init__.py
ernie/__init__.py
+10
-0
ernie/file_utils.py
ernie/file_utils.py
+24
-30
ernie/modeling_ernie.py
ernie/modeling_ernie.py
+1
-1
ernie/tokenizing_ernie.py
ernie/tokenizing_ernie.py
+1
-1
requirements.txt
requirements.txt
+2
-1
未找到文件。
ernie/__init__.py
浏览文件 @
fdb59529
...
@@ -17,6 +17,9 @@ from __future__ import absolute_import
...
@@ -17,6 +17,9 @@ from __future__ import absolute_import
from
__future__
import
print_function
from
__future__
import
print_function
from
__future__
import
unicode_literals
from
__future__
import
unicode_literals
import
sys
import
logging
import
paddle
import
paddle
if
paddle
.
__version__
!=
'0.0.0'
and
paddle
.
__version__
<
'2.0.0'
:
if
paddle
.
__version__
!=
'0.0.0'
and
paddle
.
__version__
<
'2.0.0'
:
raise
RuntimeError
(
'propeller 0.2 requires paddle 2.0+, got %s'
%
raise
RuntimeError
(
'propeller 0.2 requires paddle 2.0+, got %s'
%
...
@@ -28,3 +31,10 @@ from ernie.modeling_ernie import (
...
@@ -28,3 +31,10 @@ from ernie.modeling_ernie import (
ErnieModelForQuestionAnswering
,
ErnieModelForPretraining
)
ErnieModelForQuestionAnswering
,
ErnieModelForPretraining
)
from
ernie.tokenizing_ernie
import
ErnieTokenizer
,
ErnieTinyTokenizer
from
ernie.tokenizing_ernie
import
ErnieTokenizer
,
ErnieTinyTokenizer
log
=
logging
.
getLogger
(
__name__
)
formatter
=
logging
.
Formatter
(
fmt
=
'[%(levelname)s] %(asctime)s [%(filename)12s:%(lineno)5d]: %(message)s'
)
stream_hdl
=
logging
.
StreamHandler
(
stream
=
sys
.
stderr
)
stream_hdl
.
setFormatter
(
formatter
)
log
.
addHandler
(
stream_hdl
)
log
.
propagate
=
False
ernie/file_utils.py
浏览文件 @
fdb59529
...
@@ -21,7 +21,6 @@ import logging
...
@@ -21,7 +21,6 @@ import logging
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
pathlib
import
Path
from
pathlib
import
Path
import
six
import
six
import
paddle
as
P
import
time
import
time
if
six
.
PY2
:
if
six
.
PY2
:
from
pathlib2
import
Path
from
pathlib2
import
Path
...
@@ -35,8 +34,6 @@ def _fetch_from_remote(url,
...
@@ -35,8 +34,6 @@ def _fetch_from_remote(url,
force_download
=
False
,
force_download
=
False
,
cached_dir
=
'~/.paddle-ernie-cache'
):
cached_dir
=
'~/.paddle-ernie-cache'
):
import
hashlib
,
tempfile
,
requests
,
tarfile
import
hashlib
,
tempfile
,
requests
,
tarfile
env
=
P
.
distributed
.
ParallelEnv
()
sig
=
hashlib
.
md5
(
url
.
encode
(
'utf8'
)).
hexdigest
()
sig
=
hashlib
.
md5
(
url
.
encode
(
'utf8'
)).
hexdigest
()
cached_dir
=
Path
(
cached_dir
).
expanduser
()
cached_dir
=
Path
(
cached_dir
).
expanduser
()
try
:
try
:
...
@@ -44,34 +41,31 @@ def _fetch_from_remote(url,
...
@@ -44,34 +41,31 @@ def _fetch_from_remote(url,
except
OSError
:
except
OSError
:
pass
pass
cached_dir_model
=
cached_dir
/
sig
cached_dir_model
=
cached_dir
/
sig
done_file
=
cached_dir_model
/
'fetch_done'
from
filelock
import
FileLock
if
force_download
or
not
done_file
.
exists
():
with
FileLock
(
str
(
cached_dir_model
)
+
'.lock'
):
if
env
.
dev_id
==
0
:
donefile
=
cached_dir_model
/
'done'
cached_dir_model
.
mkdir
()
if
(
not
force_download
)
and
donefile
.
exists
():
tmpfile
=
cached_dir_model
/
'tmp'
log
.
debug
(
'%s cached in %s'
%
(
url
,
cached_dir_model
))
with
tmpfile
.
open
(
'wb'
)
as
f
:
return
cached_dir_model
#url = 'https://ernie.bj.bcebos.com/ERNIE_stable.tgz'
cached_dir_model
.
mkdir
(
exist_ok
=
True
)
r
=
requests
.
get
(
url
,
stream
=
True
)
tmpfile
=
cached_dir_model
/
'tmp'
total_len
=
int
(
r
.
headers
.
get
(
'content-length'
))
with
tmpfile
.
open
(
'wb'
)
as
f
:
for
chunk
in
tqdm
(
r
=
requests
.
get
(
url
,
stream
=
True
)
r
.
iter_content
(
chunk_size
=
1024
),
total_len
=
int
(
r
.
headers
.
get
(
'content-length'
))
total
=
total_len
//
1024
,
for
chunk
in
tqdm
(
desc
=
'downloading %s'
%
url
,
r
.
iter_content
(
chunk_size
=
1024
),
unit
=
'KB'
):
total
=
total_len
//
1024
,
if
chunk
:
desc
=
'downloading %s'
%
url
,
f
.
write
(
chunk
)
unit
=
'KB'
):
f
.
flush
()
if
chunk
:
log
.
debug
(
'extacting... to %s'
%
tmpfile
)
f
.
write
(
chunk
)
with
tarfile
.
open
(
tmpfile
.
as_posix
())
as
tf
:
f
.
flush
()
tf
.
extractall
(
path
=
cached_dir_model
.
as_posix
())
log
.
debug
(
'extacting... to %s'
%
tmpfile
)
os
.
remove
(
tmpfile
.
as_posix
())
with
tarfile
.
open
(
tmpfile
.
as_posix
())
as
tf
:
f
=
done_file
.
open
(
'wb'
)
tf
.
extractall
(
path
=
str
(
cached_dir_model
))
f
.
close
()
donefile
.
touch
()
else
:
os
.
remove
(
tmpfile
.
as_posix
())
while
not
done_file
.
exists
():
time
.
sleep
(
1
)
log
.
debug
(
'%s cached in %s'
%
(
url
,
cached_dir
))
return
cached_dir_model
return
cached_dir_model
...
...
ernie/modeling_ernie.py
浏览文件 @
fdb59529
...
@@ -272,7 +272,7 @@ class PretrainedModel(object):
...
@@ -272,7 +272,7 @@ class PretrainedModel(object):
pretrain_dir
=
Path
(
pretrain_dir_or_url
)
pretrain_dir
=
Path
(
pretrain_dir_or_url
)
if
not
pretrain_dir
.
exists
():
if
not
pretrain_dir
.
exists
():
raise
ValueError
(
'pretrain dir not found: %s
'
%
pretrain_dir
)
raise
ValueError
(
'pretrain dir not found: %s
, optional: %s'
%
(
pretrain_dir
,
cls
.
resource_map
.
keys
())
)
state_dict_path
=
pretrain_dir
/
'saved_weights.pdparams'
state_dict_path
=
pretrain_dir
/
'saved_weights.pdparams'
config_path
=
pretrain_dir
/
'ernie_config.json'
config_path
=
pretrain_dir
/
'ernie_config.json'
...
...
ernie/tokenizing_ernie.py
浏览文件 @
fdb59529
...
@@ -107,7 +107,7 @@ class ErnieTokenizer(object):
...
@@ -107,7 +107,7 @@ class ErnieTokenizer(object):
(
pretrain_dir_or_url
,
repr
(
cls
.
resource_map
)))
(
pretrain_dir_or_url
,
repr
(
cls
.
resource_map
)))
pretrain_dir
=
Path
(
pretrain_dir_or_url
)
pretrain_dir
=
Path
(
pretrain_dir_or_url
)
if
not
pretrain_dir
.
exists
():
if
not
pretrain_dir
.
exists
():
raise
ValueError
(
'pretrain dir not found: %s
'
%
pretrain_dir
)
raise
ValueError
(
'pretrain dir not found: %s
, optional: %s'
%
(
pretrain_dir
,
cls
.
resource_map
.
keys
())
)
vocab_path
=
pretrain_dir
/
'vocab.txt'
vocab_path
=
pretrain_dir
/
'vocab.txt'
if
not
vocab_path
.
exists
():
if
not
vocab_path
.
exists
():
raise
ValueError
(
'no vocab file in pretrain dir: %s'
%
raise
ValueError
(
'no vocab file in pretrain dir: %s'
%
...
...
requirements.txt
浏览文件 @
fdb59529
numpy
numpy
pyzmq
==18.0.2
pyzmq
==18.0.2
six
=
=1.11.0
six
>
=1.11.0
sklearn
==0.0
sklearn
==0.0
sentencepiece
==0.1.8
sentencepiece
==0.1.8
jieba
==0.39
jieba
==0.39
visualdl
>=2.0.0b7
visualdl
>=2.0.0b7
pathlib2
>=2.3.2
pathlib2
>=2.3.2
filelock
>=3.0.0
tqdm
>=4.32.2
tqdm
>=4.32.2
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录