Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
ERNIE
提交
cd47f552
E
ERNIE
项目概览
PaddlePaddle
/
ERNIE
大约 1 年 前同步成功
通知
109
Star
5997
Fork
1270
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
29
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
E
ERNIE
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
29
Issue
29
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
cd47f552
编写于
5月 20, 2021
作者:
Z
zhanghan17
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
change the path of ernie-gram and fix mrc
上级
4f2bfe3c
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
65 addition
and
31 deletion
+65
-31
ernie-gram/.meta/ernie-gram.jpeg
ernie-gram/.meta/ernie-gram.jpeg
+0
-0
ernie-gram/README.en.md
ernie-gram/README.en.md
+1
-1
ernie-gram/README.md
ernie-gram/README.md
+0
-0
ernie-gram/README.zh.md
ernie-gram/README.zh.md
+1
-1
ernie-gram/__init__.py
ernie-gram/__init__.py
+0
-0
ernie-gram/finetune_classifier_distributed.py
ernie-gram/finetune_classifier_distributed.py
+4
-2
ernie-gram/finetune_mrc.py
ernie-gram/finetune_mrc.py
+42
-14
ernie-gram/finetune_ner.py
ernie-gram/finetune_ner.py
+4
-2
ernie-gram/mrc/__init__.py
ernie-gram/mrc/__init__.py
+0
-0
ernie-gram/mrc/mrc_metrics.py
ernie-gram/mrc/mrc_metrics.py
+4
-1
ernie-gram/mrc/mrc_reader.py
ernie-gram/mrc/mrc_reader.py
+0
-0
ernie-gram/optimization.py
ernie-gram/optimization.py
+3
-1
ernie-gram/run_cls.sh
ernie-gram/run_cls.sh
+1
-1
ernie-gram/run_mrc.sh
ernie-gram/run_mrc.sh
+1
-1
ernie-gram/run_ner.sh
ernie-gram/run_ner.sh
+1
-1
ernie-gram/task_configs/cmrc_conf
ernie-gram/task_configs/cmrc_conf
+0
-0
ernie-gram/task_configs/msra_ner_conf
ernie-gram/task_configs/msra_ner_conf
+0
-0
ernie-gram/task_configs/xnli_conf
ernie-gram/task_configs/xnli_conf
+0
-0
ernie-gram/utils.py
ernie-gram/utils.py
+0
-0
ernie/file_utils.py
ernie/file_utils.py
+3
-6
未找到文件。
ernie
_
gram/.meta/ernie-gram.jpeg
→
ernie
-
gram/.meta/ernie-gram.jpeg
浏览文件 @
cd47f552
文件已移动
ernie
_
gram/README.en.md
→
ernie
-
gram/README.en.md
浏览文件 @
cd47f552
...
@@ -36,7 +36,7 @@ wget https://ernie-github.cdn.bcebos.com/data-xnli.tar.gz
...
@@ -36,7 +36,7 @@ wget https://ernie-github.cdn.bcebos.com/data-xnli.tar.gz
tar
xf data-xnli.tar.gz
tar
xf data-xnli.tar.gz
cd
..
cd
..
#demo for NLI task
#demo for NLI task
sh
ernie_gram/run_cls.sh ernie_gram/
task_configs/xnli_conf
sh
run_cls.sh
task_configs/xnli_conf
```
```
### Setup
### Setup
...
...
ernie
_
gram/README.md
→
ernie
-
gram/README.md
浏览文件 @
cd47f552
文件已移动
ernie
_
gram/README.zh.md
→
ernie
-
gram/README.zh.md
浏览文件 @
cd47f552
...
@@ -38,7 +38,7 @@ wget https://ernie-github.cdn.bcebos.com/data-xnli.tar.gz
...
@@ -38,7 +38,7 @@ wget https://ernie-github.cdn.bcebos.com/data-xnli.tar.gz
tar
xf data-xnli.tar.gz
tar
xf data-xnli.tar.gz
cd
..
cd
..
#demo for NLI task
#demo for NLI task
sh
ernie_gram/run_cls.sh ernie_gram/
task_configs/xnli_conf
sh
run_cls.sh
task_configs/xnli_conf
```
```
...
...
ernie
_
gram/__init__.py
→
ernie
-
gram/__init__.py
浏览文件 @
cd47f552
文件已移动
ernie
_
gram/finetune_classifier_distributed.py
→
ernie
-
gram/finetune_classifier_distributed.py
浏览文件 @
cd47f552
...
@@ -23,6 +23,8 @@ from functools import reduce, partial
...
@@ -23,6 +23,8 @@ from functools import reduce, partial
import
numpy
as
np
import
numpy
as
np
import
logging
import
logging
#from visualdl import LogWriter
#from visualdl import LogWriter
import
sys
sys
.
path
.
append
(
"../"
)
from
pathlib
import
Path
from
pathlib
import
Path
import
paddle
as
P
import
paddle
as
P
...
@@ -32,8 +34,8 @@ import propeller.paddle as propeller
...
@@ -32,8 +34,8 @@ import propeller.paddle as propeller
#from model.bert import BertConfig, BertModelLayer
#from model.bert import BertConfig, BertModelLayer
from
ernie.modeling_ernie
import
ErnieModel
,
ErnieModelForSequenceClassification
from
ernie.modeling_ernie
import
ErnieModel
,
ErnieModelForSequenceClassification
from
ernie.tokenizing_ernie
import
ErnieTokenizer
,
ErnieTinyTokenizer
from
ernie.tokenizing_ernie
import
ErnieTokenizer
,
ErnieTinyTokenizer
from
ernie_gram.
optimization
import
AdamW
from
optimization
import
AdamW
from
ernie_gram.
utils
import
create_if_not_exists
,
get_warmup_and_linear_decay
from
utils
import
create_if_not_exists
,
get_warmup_and_linear_decay
log
.
setLevel
(
logging
.
DEBUG
)
log
.
setLevel
(
logging
.
DEBUG
)
logging
.
getLogger
().
setLevel
(
logging
.
DEBUG
)
logging
.
getLogger
().
setLevel
(
logging
.
DEBUG
)
...
...
ernie
_
gram/finetune_mrc.py
→
ernie
-
gram/finetune_mrc.py
浏览文件 @
cd47f552
...
@@ -30,6 +30,8 @@ import pickle
...
@@ -30,6 +30,8 @@ import pickle
import
argparse
import
argparse
from
functools
import
partial
from
functools
import
partial
from
io
import
open
from
io
import
open
import
sys
sys
.
path
.
append
(
"../"
)
import
numpy
as
np
import
numpy
as
np
import
logging
import
logging
...
@@ -38,22 +40,23 @@ import paddle as P
...
@@ -38,22 +40,23 @@ import paddle as P
from
propeller
import
log
from
propeller
import
log
import
propeller.paddle
as
propeller
import
propeller.paddle
as
propeller
from
ernie_gram.
optimization
import
AdamW
from
optimization
import
AdamW
from
ernie.modeling_ernie
import
ErnieModel
,
ErnieModelForQuestionAnswering
from
ernie.modeling_ernie
import
ErnieModel
,
ErnieModelForQuestionAnswering
from
ernie.tokenizing_ernie
import
ErnieTokenizer
,
ErnieTinyTokenizer
from
ernie.tokenizing_ernie
import
ErnieTokenizer
,
ErnieTinyTokenizer
#from ernie.optimization import AdamW, LinearDecay
#from ernie.optimization import AdamW, LinearDecay
from
ernie_gram.
mrc
import
mrc_reader
from
mrc
import
mrc_reader
from
ernie_gram.
mrc
import
mrc_metrics
from
mrc
import
mrc_metrics
from
ernie_gram.
utils
import
create_if_not_exists
,
get_warmup_and_linear_decay
from
utils
import
create_if_not_exists
,
get_warmup_and_linear_decay
log
.
setLevel
(
logging
.
DEBUG
)
log
.
setLevel
(
logging
.
DEBUG
)
logging
.
getLogger
().
setLevel
(
logging
.
DEBUG
)
logging
.
getLogger
().
setLevel
(
logging
.
DEBUG
)
def
evaluate
(
model
,
ds
,
all_examples
,
all_features
,
tokenizer
,
args
):
def
evaluate
(
model
,
ds
,
all_examples
,
all_features
,
tokenizer
,
args
,
is_test
=
False
):
dev_file
=
json
.
loads
(
open
(
args
.
dev_file
,
encoding
=
'utf8'
).
read
())
dev_file
=
args
.
dev_file
if
not
is_test
else
args
.
test_file
dev_file
=
json
.
loads
(
open
(
dev_file
,
encoding
=
'utf8'
).
read
())
with
P
.
no_grad
():
with
P
.
no_grad
():
log
.
debug
(
'start eval'
)
log
.
debug
(
'start eval'
)
model
.
eval
()
model
.
eval
()
...
@@ -84,8 +87,8 @@ def evaluate(model, ds, all_examples, all_features, tokenizer, args):
...
@@ -84,8 +87,8 @@ def evaluate(model, ds, all_examples, all_features, tokenizer, args):
return
f1
,
em
return
f1
,
em
def
train
(
model
,
train_dataset
,
dev_dataset
,
dev_examples
,
dev_features
,
def
train
(
model
,
train_dataset
,
dev_dataset
,
dev_examples
,
dev_features
,
tokenizer
,
args
):
tokenizer
,
args
,
test_dataset
=
None
,
test_examples
=
None
,
test_features
=
None
,
do_test
=
False
):
model
=
P
.
DataParallel
(
model
)
model
=
P
.
DataParallel
(
model
)
max_steps
=
args
.
max_steps
max_steps
=
args
.
max_steps
...
@@ -142,10 +145,14 @@ def train(model, train_dataset, dev_dataset, dev_examples, dev_features,
...
@@ -142,10 +145,14 @@ def train(model, train_dataset, dev_dataset, dev_examples, dev_features,
log
.
debug
(
msg
)
log
.
debug
(
msg
)
if
env
.
dev_id
==
0
and
step
%
100
==
0
and
step
:
if
env
.
dev_id
==
0
and
step
%
100
==
0
and
step
:
print
(
step
)
f1
,
em
=
evaluate
(
model
,
dev_dataset
,
dev_examples
,
f1
,
em
=
evaluate
(
model
,
dev_dataset
,
dev_examples
,
dev_features
,
tokenizer
,
args
)
dev_features
,
tokenizer
,
args
)
log
.
debug
(
'[step %d] eval result: f1 %.5f em %.5f'
%
log
.
debug
(
'[step %d] dev eval result: f1 %.5f em %.5f'
%
(
step
,
f1
,
em
))
if
do_test
:
f1
,
em
=
evaluate
(
model
,
test_dataset
,
test_examples
,
test_features
,
tokenizer
,
args
,
True
)
log
.
debug
(
'[step %d] test eval result: f1 %.5f em %.5f'
%
(
step
,
f1
,
em
))
(
step
,
f1
,
em
))
if
env
.
dev_id
==
0
and
args
.
save_dir
is
not
None
:
if
env
.
dev_id
==
0
and
args
.
save_dir
is
not
None
:
P
.
save
(
model
.
state_dict
(),
args
.
save_dir
/
'ckpt.bin'
)
P
.
save
(
model
.
state_dict
(),
args
.
save_dir
/
'ckpt.bin'
)
...
@@ -177,7 +184,12 @@ if __name__ == "__main__":
...
@@ -177,7 +184,12 @@ if __name__ == "__main__":
type
=
str
,
type
=
str
,
required
=
True
,
required
=
True
,
help
=
'data directory includes train / develop data'
)
help
=
'data directory includes train / develop data'
)
parser
.
add_argument
(
'--warmup_proportion'
,
type
=
float
,
default
=
0.0
)
parser
.
add_argument
(
'--test_file'
,
type
=
str
,
default
=
None
,
help
=
'data directory includes train / develop data'
)
parser
.
add_argument
(
'--warmup_proportion'
,
type
=
float
,
default
=
0.1
)
parser
.
add_argument
(
'--lr'
,
type
=
float
,
default
=
3e-5
,
help
=
'learning rate'
)
parser
.
add_argument
(
'--lr'
,
type
=
float
,
default
=
3e-5
,
help
=
'learning rate'
)
parser
.
add_argument
(
parser
.
add_argument
(
'--save_dir'
,
type
=
Path
,
required
=
True
,
help
=
'model output directory'
)
'--save_dir'
,
type
=
Path
,
required
=
True
,
help
=
'model output directory'
)
...
@@ -216,6 +228,10 @@ if __name__ == "__main__":
...
@@ -216,6 +228,10 @@ if __name__ == "__main__":
dev_examples
=
mrc_reader
.
read_files
(
args
.
dev_file
,
is_training
=
False
)
dev_examples
=
mrc_reader
.
read_files
(
args
.
dev_file
,
is_training
=
False
)
dev_features
=
mrc_reader
.
convert_example_to_features
(
dev_features
=
mrc_reader
.
convert_example_to_features
(
dev_examples
,
args
.
max_seqlen
,
tokenizer
,
is_training
=
False
)
dev_examples
,
args
.
max_seqlen
,
tokenizer
,
is_training
=
False
)
if
args
.
test_file
:
test_examples
=
mrc_reader
.
read_files
(
args
.
test_file
,
is_training
=
False
)
test_features
=
mrc_reader
.
convert_example_to_features
(
test_examples
,
args
.
max_seqlen
,
tokenizer
,
is_training
=
False
)
log
.
info
(
'train examples: %d, features: %d'
%
log
.
info
(
'train examples: %d, features: %d'
%
(
len
(
train_examples
),
len
(
train_features
)))
(
len
(
train_examples
),
len
(
train_features
)))
...
@@ -235,16 +251,28 @@ if __name__ == "__main__":
...
@@ -235,16 +251,28 @@ if __name__ == "__main__":
dev_dataset
=
propeller
.
data
.
Dataset
.
from_list
(
dev_features
).
map
(
dev_dataset
=
propeller
.
data
.
Dataset
.
from_list
(
dev_features
).
map
(
map_fn
).
padded_batch
(
args
.
bsz
)
map_fn
).
padded_batch
(
args
.
bsz
)
model
=
ErnieModelForQuestionAnswering
.
from_pretrained
(
model
=
ErnieModelForQuestionAnswering
.
from_pretrained
(
args
.
from_pretrained
,
name
=
''
)
args
.
from_pretrained
,
name
=
''
)
train
(
model
,
train_dataset
,
dev_dataset
,
dev_examples
,
dev_features
,
if
args
.
test_file
:
test_dataset
=
propeller
.
data
.
Dataset
.
from_list
(
test_features
).
map
(
map_fn
).
padded_batch
(
args
.
bsz
)
train
(
model
,
train_dataset
,
dev_dataset
,
dev_examples
,
dev_features
,
tokenizer
,
args
,
test_dataset
,
test_examples
,
test_features
,
True
)
else
:
train
(
model
,
train_dataset
,
dev_dataset
,
dev_examples
,
dev_features
,
tokenizer
,
args
)
tokenizer
,
args
)
if
env
.
dev_id
==
0
:
if
env
.
dev_id
==
0
:
f1
,
em
=
evaluate
(
model
,
dev_dataset
,
dev_examples
,
dev_features
,
f1
,
em
=
evaluate
(
model
,
dev_dataset
,
dev_examples
,
dev_features
,
tokenizer
,
args
)
tokenizer
,
args
)
log
.
debug
(
'final eval result: f1 %.5f em %.5f'
%
(
f1
,
em
))
log
.
debug
(
'final dev eval result: f1 %.5f em %.5f'
%
(
f1
,
em
))
if
args
.
test_file
:
f1
,
em
=
evaluate
(
model
,
test_dataset
,
test_examples
,
test_features
,
tokenizer
,
args
,
True
)
log
.
debug
(
'final test eval result: f1 %.5f em %.5f'
%
(
f1
,
em
))
if
env
.
dev_id
==
0
and
args
.
save_dir
is
not
None
:
if
env
.
dev_id
==
0
and
args
.
save_dir
is
not
None
:
P
.
save
(
model
.
state_dict
(),
args
.
save_dir
/
'ckpt.bin'
)
P
.
save
(
model
.
state_dict
(),
args
.
save_dir
/
'ckpt.bin'
)
ernie
_
gram/finetune_ner.py
→
ernie
-
gram/finetune_ner.py
浏览文件 @
cd47f552
...
@@ -29,6 +29,8 @@ import numpy as np
...
@@ -29,6 +29,8 @@ import numpy as np
import
multiprocessing
import
multiprocessing
import
pickle
import
pickle
import
logging
import
logging
import
sys
sys
.
path
.
append
(
"../"
)
from
sklearn.metrics
import
f1_score
from
sklearn.metrics
import
f1_score
import
paddle
as
P
import
paddle
as
P
...
@@ -39,10 +41,10 @@ import propeller.paddle as propeller
...
@@ -39,10 +41,10 @@ import propeller.paddle as propeller
log
.
setLevel
(
logging
.
DEBUG
)
log
.
setLevel
(
logging
.
DEBUG
)
logging
.
getLogger
().
setLevel
(
logging
.
DEBUG
)
logging
.
getLogger
().
setLevel
(
logging
.
DEBUG
)
from
ernie_gram.
utils
import
create_if_not_exists
,
get_warmup_and_linear_decay
from
utils
import
create_if_not_exists
,
get_warmup_and_linear_decay
from
ernie.modeling_ernie
import
ErnieModel
,
ErnieModelForSequenceClassification
,
ErnieModelForTokenClassification
from
ernie.modeling_ernie
import
ErnieModel
,
ErnieModelForSequenceClassification
,
ErnieModelForTokenClassification
from
ernie.tokenizing_ernie
import
ErnieTokenizer
from
ernie.tokenizing_ernie
import
ErnieTokenizer
from
ernie_gram.
optimization
import
AdamW
from
optimization
import
AdamW
parser
=
propeller
.
ArgumentParser
(
'NER model with ERNIE'
)
parser
=
propeller
.
ArgumentParser
(
'NER model with ERNIE'
)
parser
.
add_argument
(
'--max_seqlen'
,
type
=
int
,
default
=
256
)
parser
.
add_argument
(
'--max_seqlen'
,
type
=
int
,
default
=
256
)
...
...
ernie
_
gram/mrc/__init__.py
→
ernie
-
gram/mrc/__init__.py
浏览文件 @
cd47f552
文件已移动
ernie
_
gram/mrc/mrc_metrics.py
→
ernie
-
gram/mrc/mrc_metrics.py
浏览文件 @
cd47f552
...
@@ -349,7 +349,10 @@ def make_results(vocab, all_examples, all_features, all_results, n_best_size,
...
@@ -349,7 +349,10 @@ def make_results(vocab, all_examples, all_features, all_results, n_best_size,
unique_id_to_result
=
{}
unique_id_to_result
=
{}
for
result
in
all_results
:
for
result
in
all_results
:
unique_id_to_result
[
result
.
unique_id
]
=
result
try
:
unique_id_to_result
[
result
.
unique_id
]
=
result
except
:
continue
_PrelimPrediction
=
collections
.
namedtuple
(
# pylint: disable=invalid-name
_PrelimPrediction
=
collections
.
namedtuple
(
# pylint: disable=invalid-name
"PrelimPrediction"
,
[
"PrelimPrediction"
,
[
...
...
ernie
_
gram/mrc/mrc_reader.py
→
ernie
-
gram/mrc/mrc_reader.py
浏览文件 @
cd47f552
文件已移动
ernie
_
gram/optimization.py
→
ernie
-
gram/optimization.py
浏览文件 @
cd47f552
...
@@ -25,11 +25,13 @@ from paddle.fluid.framework import Variable, default_main_program
...
@@ -25,11 +25,13 @@ from paddle.fluid.framework import Variable, default_main_program
import
numpy
as
np
import
numpy
as
np
import
paddle
as
P
import
paddle
as
P
import
paddle.distributed.fleet
as
fleet
import
paddle.distributed.fleet
as
fleet
import
sys
sys
.
path
.
append
(
"../"
)
from
propeller.paddle.train.hooks
import
RunHook
from
propeller.paddle.train.hooks
import
RunHook
import
paddle.fluid
as
F
import
paddle.fluid
as
F
log
=
logging
.
getLogger
(
__name__
)
log
=
logging
.
getLogger
(
__name__
)
from
ernie_gram.
utils
import
create_if_not_exists
,
get_warmup_and_linear_decay
from
utils
import
create_if_not_exists
,
get_warmup_and_linear_decay
class
AdamW
(
P
.
optimizer
.
AdamW
):
class
AdamW
(
P
.
optimizer
.
AdamW
):
"""AdamW object for dygraph"""
"""AdamW object for dygraph"""
...
...
ernie
_
gram/run_cls.sh
→
ernie
-
gram/run_cls.sh
浏览文件 @
cd47f552
source
$1
source
$1
python3
-m
paddle.distributed.launch ./
ernie_gram/
finetune_classifier_distributed.py
\
python3
-m
paddle.distributed.launch ./finetune_classifier_distributed.py
\
--data_dir
$data_dir
\
--data_dir
$data_dir
\
--max_steps
$max_steps
\
--max_steps
$max_steps
\
--bsz
$bsz
\
--bsz
$bsz
\
...
...
ernie
_
gram/run_mrc.sh
→
ernie
-
gram/run_mrc.sh
浏览文件 @
cd47f552
source
$1
source
$1
export
CUDA_VISIBLE_DEVICES
=
0
export
CUDA_VISIBLE_DEVICES
=
0
python3
-m
paddle.distributed.launch ./
ernie_gram/
finetune_mrc.py
\
python3
-m
paddle.distributed.launch ./finetune_mrc.py
\
--train_file
$train_file
\
--train_file
$train_file
\
--dev_file
$dev_file
\
--dev_file
$dev_file
\
--max_steps
$max_steps
\
--max_steps
$max_steps
\
...
...
ernie
_
gram/run_ner.sh
→
ernie
-
gram/run_ner.sh
浏览文件 @
cd47f552
source
$1
source
$1
python3
-m
paddle.distributed.launch ./
ernie_gram/
finetune_ner.py
\
python3
-m
paddle.distributed.launch ./finetune_ner.py
\
--data_dir
$data_dir
\
--data_dir
$data_dir
\
--max_steps
$max_steps
\
--max_steps
$max_steps
\
--epoch
$epoch
\
--epoch
$epoch
\
...
...
ernie
_
gram/task_configs/cmrc_conf
→
ernie
-
gram/task_configs/cmrc_conf
浏览文件 @
cd47f552
文件已移动
ernie
_
gram/task_configs/msra_ner_conf
→
ernie
-
gram/task_configs/msra_ner_conf
浏览文件 @
cd47f552
文件已移动
ernie
_
gram/task_configs/xnli_conf
→
ernie
-
gram/task_configs/xnli_conf
浏览文件 @
cd47f552
文件已移动
ernie
_
gram/utils.py
→
ernie
-
gram/utils.py
浏览文件 @
cd47f552
文件已移动
ernie/file_utils.py
浏览文件 @
cd47f552
...
@@ -68,12 +68,9 @@ def _fetch_from_remote(url,
...
@@ -68,12 +68,9 @@ def _fetch_from_remote(url,
f
=
done_file
.
open
(
'wb'
)
f
=
done_file
.
open
(
'wb'
)
f
.
close
()
f
.
close
()
else
:
else
:
while
True
:
while
not
done_file
.
exists
():
if
done_file
.
exists
():
time
.
sleep
(
1
)
break
else
:
time
.
sleep
(
1
)
log
.
debug
(
'%s cached in %s'
%
(
url
,
cached_dir
))
log
.
debug
(
'%s cached in %s'
%
(
url
,
cached_dir
))
return
cached_dir_model
return
cached_dir_model
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录