Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
ERNIE
提交
e5d95cb9
E
ERNIE
项目概览
PaddlePaddle
/
ERNIE
大约 1 年 前同步成功
通知
109
Star
5997
Fork
1270
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
29
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
E
ERNIE
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
29
Issue
29
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e5d95cb9
编写于
11月 04, 2019
作者:
C
chenxuyi
提交者:
Meiyim
1月 16, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
1.6: DataLoader, ernie_encode + santiy check
上级
a3e96ed4
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
78 addition
and
95 deletion
+78
-95
ernie/ernie_encoder.py
ernie/ernie_encoder.py
+16
-13
ernie/finetune/classifier.py
ernie/finetune/classifier.py
+14
-26
ernie/finetune/mrc.py
ernie/finetune/mrc.py
+12
-14
ernie/finetune/sequence_label.py
ernie/finetune/sequence_label.py
+11
-14
ernie/run_classifier.py
ernie/run_classifier.py
+4
-4
ernie/run_mrc.py
ernie/run_mrc.py
+5
-5
ernie/run_sequence_labeling.py
ernie/run_sequence_labeling.py
+3
-3
ernie/train.py
ernie/train.py
+13
-16
未找到文件。
ernie/ernie_encoder.py
浏览文件 @
e5d95cb9
...
...
@@ -52,18 +52,16 @@ run_type_g.add_arg("use_cuda", bool, True, "If set, use G
def
create_model
(
args
,
pyreader_name
,
ernie_config
):
pyreader
=
fluid
.
layers
.
py_reader
(
capacity
=
50
,
shapes
=
[[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
]],
dtypes
=
[
'int64'
,
'int64'
,
'int64'
,
'int64'
,
'float'
,
'int64'
],
lod_levels
=
[
0
,
0
,
0
,
0
,
0
,
0
],
name
=
pyreader_name
,
use_double_buffer
=
True
)
(
src_ids
,
sent_ids
,
pos_ids
,
task_ids
,
input_mask
,
seq_lens
)
=
fluid
.
layers
.
read_file
(
pyreader
)
src_ids
=
fluid
.
layers
.
data
(
name
=
'1'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
sent_ids
=
fluid
.
layers
.
data
(
name
=
'2'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
pos_ids
=
fluid
.
layers
.
data
(
name
=
'3'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
task_ids
=
fluid
.
layers
.
data
(
name
=
'4'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
input_mask
=
fluid
.
layers
.
data
(
name
=
'5'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'float32'
)
seq_lens
=
fluid
.
layers
.
data
(
name
=
'8'
,
shape
=
[
-
1
],
dtype
=
'int64'
)
pyreader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
[
src_ids
,
sent_ids
,
pos_ids
,
task_ids
,
input_mask
,
seq_lens
],
capacity
=
70
,
iterable
=
False
)
ernie
=
ErnieModel
(
src_ids
=
src_ids
,
...
...
@@ -143,7 +141,7 @@ def main(args):
exec_strategy
=
fluid
.
ExecutionStrategy
()
exec_strategy
.
num_threads
=
dev_count
pyreader
.
decorate_tensor_provide
r
(
data_generator
)
pyreader
.
set_batch_generato
r
(
data_generator
)
pyreader
.
start
()
total_cls_emb
=
[]
...
...
@@ -167,6 +165,11 @@ def main(args):
total_cls_emb
=
np
.
concatenate
(
total_cls_emb
)
total_top_layer_emb
=
np
.
concatenate
(
total_top_layer_emb
)
if
not
os
.
path
.
exists
(
args
.
output_dir
):
os
.
mkdir
(
args
.
output_dir
)
else
:
raise
RuntimeError
(
'output dir exists: %s'
%
args
.
output_dir
)
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"cls_emb.npy"
),
"wb"
)
as
cls_emb_file
:
np
.
save
(
cls_emb_file
,
total_cls_emb
)
...
...
ernie/finetune/classifier.py
浏览文件 @
e5d95cb9
...
...
@@ -39,34 +39,22 @@ def create_model(args,
is_classify
=
False
,
is_regression
=
False
,
ernie_version
=
"1.0"
):
src_ids
=
fluid
.
layers
.
data
(
name
=
'1'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
sent_ids
=
fluid
.
layers
.
data
(
name
=
'2'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
pos_ids
=
fluid
.
layers
.
data
(
name
=
'3'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
task_ids
=
fluid
.
layers
.
data
(
name
=
'4'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
input_mask
=
fluid
.
layers
.
data
(
name
=
'5'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'float32'
)
qids
=
fluid
.
layers
.
data
(
name
=
'7'
,
shape
=
[
-
1
,
1
],
dtype
=
'int64'
)
if
is_classify
:
pyreader
=
fluid
.
layers
.
py_reader
(
capacity
=
50
,
shapes
=
[[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
1
],
[
-
1
,
1
]],
dtypes
=
[
'int64'
,
'int64'
,
'int64'
,
'int64'
,
'float32'
,
'int64'
,
'int64'
],
lod_levels
=
[
0
,
0
,
0
,
0
,
0
,
0
,
0
],
name
=
task_name
+
"_"
+
pyreader_name
,
use_double_buffer
=
True
)
labels
=
fluid
.
layers
.
data
(
name
=
'6'
,
shape
=
[
-
1
,
1
],
dtype
=
'int64'
)
elif
is_regression
:
pyreader
=
fluid
.
layers
.
py_reader
(
capacity
=
50
,
shapes
=
[[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
1
],
[
-
1
,
1
]],
dtypes
=
[
'int64'
,
'int64'
,
'int64'
,
'int64'
,
'float32'
,
'float32'
,
'int64'
],
lod_levels
=
[
0
,
0
,
0
,
0
,
0
,
0
,
0
],
name
=
task_name
+
"_"
+
pyreader_name
,
use_double_buffer
=
True
)
(
src_ids
,
sent_ids
,
pos_ids
,
task_ids
,
input_mask
,
labels
,
qids
)
=
fluid
.
layers
.
read_file
(
pyreader
)
labels
=
fluid
.
layers
.
data
(
name
=
'6'
,
shape
=
[
-
1
,
1
],
dtype
=
'float32'
)
pyreader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
[
src_ids
,
sent_ids
,
pos_ids
,
task_ids
,
input_mask
,
qids
],
capacity
=
70
,
iterable
=
False
)
ernie
=
ErnieModel
(
src_ids
=
src_ids
,
...
...
ernie/finetune/mrc.py
浏览文件 @
e5d95cb9
...
...
@@ -40,20 +40,18 @@ import tokenization
log
=
logging
.
getLogger
(
__name__
)
def
create_model
(
args
,
pyreader_name
,
ernie_config
,
is_training
):
pyreader
=
fluid
.
layers
.
py_reader
(
capacity
=
50
,
shapes
=
[[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
1
],
[
-
1
,
1
],
[
-
1
,
1
]],
dtypes
=
[
'int64'
,
'int64'
,
'int64'
,
'int64'
,
'float32'
,
'int64'
,
'int64'
,
'int64'
],
lod_levels
=
[
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
],
name
=
pyreader_name
,
use_double_buffer
=
True
)
(
src_ids
,
sent_ids
,
pos_ids
,
task_ids
,
input_mask
,
start_positions
,
end_positions
,
unique_id
)
=
fluid
.
layers
.
read_file
(
pyreader
)
src_ids
=
fluid
.
layers
.
data
(
name
=
'1'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
pos_ids
=
fluid
.
layers
.
data
(
name
=
'2'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
sent_ids
=
fluid
.
layers
.
data
(
name
=
'3'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
task_ids
=
fluid
.
layers
.
data
(
name
=
'4'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
input_mask
=
fluid
.
layers
.
data
(
name
=
'5'
,
shape
=
[
-
1
,
1
],
dtype
=
'float32'
)
start_positions
=
fluid
.
layers
.
data
(
name
=
'6'
,
shape
=
[
-
1
,
1
],
dtype
=
'int64'
)
end_positions
=
fluid
.
layers
.
data
(
name
=
'7'
,
shape
=
[
-
1
,
1
],
dtype
=
'int64'
)
unique_id
=
fluid
.
layers
.
data
(
name
=
'8'
,
shape
=
[
-
1
,
1
],
dtype
=
'int64'
)
pyreader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
[
src_ids
,
sent_ids
,
pos_ids
,
task_ids
,
input_mask
,
start_positions
,
end_positions
,
unique_id
],
capacity
=
50
,
iterable
=
False
)
ernie
=
ErnieModel
(
src_ids
=
src_ids
,
...
...
ernie/finetune/sequence_label.py
浏览文件 @
e5d95cb9
...
...
@@ -36,20 +36,17 @@ from model.ernie import ErnieModel
log
=
logging
.
getLogger
(
__name__
)
def
create_model
(
args
,
pyreader_name
,
ernie_config
,
is_prediction
=
False
):
pyreader
=
fluid
.
layers
.
py_reader
(
capacity
=
50
,
shapes
=
[[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
]],
dtypes
=
[
'int64'
,
'int64'
,
'int64'
,
'int64'
,
'float32'
,
'int64'
,
'int64'
],
lod_levels
=
[
0
,
0
,
0
,
0
,
0
,
0
,
0
],
name
=
pyreader_name
,
use_double_buffer
=
True
)
(
src_ids
,
sent_ids
,
pos_ids
,
task_ids
,
input_mask
,
labels
,
seq_lens
)
=
fluid
.
layers
.
read_file
(
pyreader
)
src_ids
=
fluid
.
layers
.
data
(
name
=
'1'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
sent_ids
=
fluid
.
layers
.
data
(
name
=
'2'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
pos_ids
=
fluid
.
layers
.
data
(
name
=
'3'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
task_ids
=
fluid
.
layers
.
data
(
name
=
'4'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
input_mask
=
fluid
.
layers
.
data
(
name
=
'5'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'float32'
)
labels
=
fluid
.
layers
.
data
(
name
=
'7'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
seq_lens
=
fluid
.
layers
.
data
(
name
=
'8'
,
shape
=
[
-
1
],
dtype
=
'int64'
)
pyreader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
[
src_ids
,
sent_ids
,
pos_ids
,
task_ids
,
input_mask
,
labels
,
seq_lens
],
capacity
=
70
,
iterable
=
False
)
ernie
=
ErnieModel
(
src_ids
=
src_ids
,
...
...
ernie/run_classifier.py
浏览文件 @
e5d95cb9
...
...
@@ -228,7 +228,7 @@ def main(args):
num_trainers
=
nccl2_num_trainers
,
trainer_id
=
nccl2_trainer_id
)
train_pyreader
.
decorate_tensor_provide
r
(
train_data_generator
)
train_pyreader
.
set_batch_generato
r
(
train_data_generator
)
else
:
train_exe
=
None
...
...
@@ -349,7 +349,7 @@ def main(args):
# final eval on dianostic, hack for glue-ax
if
args
.
diagnostic
:
test_pyreader
.
decorate_tensor_provide
r
(
test_pyreader
.
set_batch_generato
r
(
reader
.
data_generator
(
args
.
diagnostic
,
batch_size
=
args
.
batch_size
,
...
...
@@ -380,7 +380,7 @@ def evaluate_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars,
# evaluate dev set
batch_size
=
args
.
batch_size
if
args
.
predict_batch_size
is
None
else
args
.
predict_batch_size
for
ds
in
args
.
dev_set
.
split
(
','
):
test_pyreader
.
decorate_tensor_provide
r
(
test_pyreader
.
set_batch_generato
r
(
reader
.
data_generator
(
ds
,
batch_size
=
batch_size
,
...
...
@@ -409,7 +409,7 @@ def predict_wrapper(args, reader, exe, test_prog, test_pyreader, graph_vars,
batch_size
=
args
.
batch_size
if
args
.
predict_batch_size
is
None
else
args
.
predict_batch_size
for
test_f
,
save_f
in
zip
(
test_sets
,
save_dirs
):
test_pyreader
.
decorate_tensor_provide
r
(
test_pyreader
.
set_batch_generato
r
(
reader
.
data_generator
(
test_f
,
batch_size
=
batch_size
,
...
...
ernie/run_mrc.py
浏览文件 @
e5d95cb9
...
...
@@ -228,7 +228,7 @@ def main(args):
num_trainers
=
nccl2_num_trainers
,
trainer_id
=
nccl2_trainer_id
)
train_pyreader
.
decorate_tensor_provide
r
(
train_data_generator
)
train_pyreader
.
set_batch_generato
r
(
train_data_generator
)
else
:
train_exe
=
None
...
...
@@ -272,7 +272,7 @@ def main(args):
if
steps
%
args
.
validation_steps
==
0
:
if
args
.
do_val
:
test_pyreader
.
decorate_tensor_provide
r
(
test_pyreader
.
set_batch_generato
r
(
reader
.
data_generator
(
args
.
dev_set
,
batch_size
=
args
.
batch_size
,
...
...
@@ -291,7 +291,7 @@ def main(args):
args
=
args
)
if
args
.
do_test
:
test_pyreader
.
decorate_tensor_provide
r
(
test_pyreader
.
set_batch_generato
r
(
reader
.
data_generator
(
args
.
test_set
,
batch_size
=
args
.
batch_size
,
...
...
@@ -318,7 +318,7 @@ def main(args):
# final eval on dev set
if
args
.
do_val
:
log
.
info
(
"Final validation result:"
)
test_pyreader
.
decorate_tensor_provide
r
(
test_pyreader
.
set_batch_generato
r
(
reader
.
data_generator
(
args
.
dev_set
,
batch_size
=
args
.
batch_size
,
...
...
@@ -339,7 +339,7 @@ def main(args):
# final eval on test set
if
args
.
do_test
:
log
.
info
(
"Final test result:"
)
test_pyreader
.
decorate_tensor_provide
r
(
test_pyreader
.
set_batch_generato
r
(
reader
.
data_generator
(
args
.
test_set
,
batch_size
=
args
.
batch_size
,
...
...
ernie/run_sequence_labeling.py
浏览文件 @
e5d95cb9
...
...
@@ -217,7 +217,7 @@ def main(args):
num_trainers
=
nccl2_num_trainers
,
trainer_id
=
nccl2_trainer_id
)
train_pyreader
.
decorate_tensor_provide
r
(
train_data_generator
)
train_pyreader
.
set_batch_generato
r
(
train_data_generator
)
else
:
train_exe
=
None
...
...
@@ -302,7 +302,7 @@ def evaluate_wrapper(reader, exe, test_prog, test_pyreader, graph_vars,
# evaluate dev set
batch_size
=
args
.
batch_size
if
args
.
predict_batch_size
is
None
else
args
.
predict_batch_size
for
ds
in
args
.
dev_set
.
split
(
','
):
#single card eval
test_pyreader
.
decorate_tensor_provide
r
(
test_pyreader
.
set_batch_generato
r
(
reader
.
data_generator
(
ds
,
batch_size
=
batch_size
,
...
...
@@ -324,7 +324,7 @@ def predict_wrapper(reader, exe, test_prog, test_pyreader, graph_vars,
batch_size
=
args
.
batch_size
if
args
.
predict_batch_size
is
None
else
args
.
predict_batch_size
for
test_f
,
save_f
in
zip
(
test_sets
,
save_dirs
):
test_pyreader
.
decorate_tensor_provide
r
(
reader
.
data_generator
(
test_pyreader
.
set_batch_generato
r
(
reader
.
data_generator
(
test_f
,
batch_size
=
batch_size
,
epoch
=
1
,
...
...
ernie/train.py
浏览文件 @
e5d95cb9
...
...
@@ -41,20 +41,17 @@ args = parser.parse_args()
def
create_model
(
pyreader_name
,
ernie_config
):
pyreader
=
fluid
.
layers
.
py_reader
(
capacity
=
70
,
shapes
=
[[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
1
],
[
-
1
,
1
],
[
-
1
,
1
]],
dtypes
=
[
'int64'
,
'int64'
,
'int64'
,
'float32'
,
'int64'
,
'int64'
,
'int64'
],
lod_levels
=
[
0
,
0
,
0
,
0
,
0
,
0
,
0
],
name
=
pyreader_name
,
use_double_buffer
=
True
)
(
src_ids
,
pos_ids
,
sent_ids
,
input_mask
,
mask_label
,
mask_pos
,
labels
)
=
fluid
.
layers
.
read_file
(
pyreader
)
src_ids
=
fluid
.
layers
.
data
(
name
=
'1'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
pos_ids
=
fluid
.
layers
.
data
(
name
=
'2'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
sent_ids
=
fluid
.
layers
.
data
(
name
=
'3'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
input_mask
=
fluid
.
layers
.
data
(
name
=
'4'
,
shape
=
[
-
1
,
args
.
max_seq_len
,
1
],
dtype
=
'float32'
)
mask_label
=
fluid
.
layers
.
data
(
name
=
'5'
,
shape
=
[
-
1
,
1
],
dtype
=
'int64'
)
mask_pos
=
fluid
.
layers
.
data
(
name
=
'6'
,
shape
=
[
-
1
,
1
],
dtype
=
'int64'
)
labels
=
fluid
.
layers
.
data
(
name
=
'r'
,
shape
=
[
-
1
,
1
],
dtype
=
'int64'
)
pyreader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
[
src_ids
,
pos_ids
,
sent_ids
,
input_mask
,
mask_label
,
mask_pos
,
labels
],
capacity
=
70
,
iterable
=
False
)
ernie
=
ErnieModel
(
src_ids
=
src_ids
,
...
...
@@ -97,7 +94,7 @@ def predict_wrapper(args,
def
predict
(
exe
=
exe
,
pyreader
=
pyreader
):
pyreader
.
decorate_tensor_provide
r
(
data_reader
.
data_generator
())
pyreader
.
set_batch_generato
r
(
data_reader
.
data_generator
())
pyreader
.
start
()
cost
=
0
...
...
@@ -285,7 +282,7 @@ def train(args):
next_sent_acc
.
name
,
mask_lm_loss
.
name
,
total_loss
.
name
])
train_pyreader
.
decorate_tensor_provide
r
(
data_reader
.
data_generator
())
train_pyreader
.
set_batch_generato
r
(
data_reader
.
data_generator
())
train_pyreader
.
start
()
steps
=
0
cost
=
[]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录