Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
8b7d837b
M
models
项目概览
PaddlePaddle
/
models
1 年多 前同步成功
通知
226
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8b7d837b
编写于
10月 31, 2019
作者:
Y
Yibing Liu
提交者:
GitHub
10月 31, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update bert to 1.6 api (#3849)
上级
f39f5776
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
136 addition
and
139 deletion
+136
-139
PaddleNLP/PaddleLARK/BERT/README.md
PaddleNLP/PaddleLARK/BERT/README.md
+1
-1
PaddleNLP/PaddleLARK/BERT/batching.py
PaddleNLP/PaddleLARK/BERT/batching.py
+2
-2
PaddleNLP/PaddleLARK/BERT/model/bert.py
PaddleNLP/PaddleLARK/BERT/model/bert.py
+7
-3
PaddleNLP/PaddleLARK/BERT/model/classifier.py
PaddleNLP/PaddleLARK/BERT/model/classifier.py
+9
-7
PaddleNLP/PaddleLARK/BERT/optimization.py
PaddleNLP/PaddleLARK/BERT/optimization.py
+18
-5
PaddleNLP/PaddleLARK/BERT/predict_classifier.py
PaddleNLP/PaddleLARK/BERT/predict_classifier.py
+10
-4
PaddleNLP/PaddleLARK/BERT/reader/squad.py
PaddleNLP/PaddleLARK/BERT/reader/squad.py
+19
-18
PaddleNLP/PaddleLARK/BERT/run_classifier.py
PaddleNLP/PaddleLARK/BERT/run_classifier.py
+20
-29
PaddleNLP/PaddleLARK/BERT/run_squad.py
PaddleNLP/PaddleLARK/BERT/run_squad.py
+23
-34
PaddleNLP/PaddleLARK/BERT/train.py
PaddleNLP/PaddleLARK/BERT/train.py
+23
-33
PaddleNLP/PaddleLARK/BERT/utils/fp16.py
PaddleNLP/PaddleLARK/BERT/utils/fp16.py
+4
-3
未找到文件。
PaddleNLP/PaddleLARK/BERT/README.md
浏览文件 @
8b7d837b
...
...
@@ -70,7 +70,7 @@
```
## 安装
本项目依赖于 Paddle Fluid
**1.
5.1
**
及以上版本,请参考
[
安装指南
](
http://www.paddlepaddle.org/#quick-start
)
进行安装。如果需要进行 TensorFlow 模型到 Paddle Fluid 参数的转换,则需要同时安装 TensorFlow 1.12。
本项目依赖于 Paddle Fluid
**1.
6.0
**
及以上版本,请参考
[
安装指南
](
http://www.paddlepaddle.org/#quick-start
)
进行安装。如果需要进行 TensorFlow 模型到 Paddle Fluid 参数的转换,则需要同时安装 TensorFlow 1.12。
## 预训练
...
...
PaddleNLP/PaddleLARK/BERT/batching.py
浏览文件 @
8b7d837b
...
...
@@ -155,7 +155,7 @@ def pad_batch_data(insts,
inst_data
=
np
.
array
([
list
(
inst
)
+
list
([
pad_idx
]
*
(
max_len
-
len
(
inst
)))
for
inst
in
insts
])
return_list
+=
[
inst_data
.
astype
(
"int64"
).
reshape
([
-
1
,
max_len
,
1
])]
return_list
+=
[
inst_data
.
astype
(
"int64"
).
reshape
([
-
1
,
max_len
])]
# position data
if
return_pos
:
...
...
@@ -164,7 +164,7 @@ def pad_batch_data(insts,
for
inst
in
insts
])
return_list
+=
[
inst_pos
.
astype
(
"int64"
).
reshape
([
-
1
,
max_len
,
1
])]
return_list
+=
[
inst_pos
.
astype
(
"int64"
).
reshape
([
-
1
,
max_len
])]
if
return_input_mask
:
# This is used to avoid attention on paddings.
...
...
PaddleNLP/PaddleLARK/BERT/model/bert.py
浏览文件 @
8b7d837b
...
...
@@ -82,21 +82,21 @@ class BertModel(object):
def
_build_model
(
self
,
src_ids
,
position_ids
,
sentence_ids
,
input_mask
):
# padding id in vocabulary must be set to 0
emb_out
=
fluid
.
layers
.
embedding
(
emb_out
=
fluid
.
embedding
(
input
=
src_ids
,
size
=
[
self
.
_voc_size
,
self
.
_emb_size
],
dtype
=
self
.
_dtype
,
param_attr
=
fluid
.
ParamAttr
(
name
=
self
.
_word_emb_name
,
initializer
=
self
.
_param_initializer
),
is_sparse
=
False
)
position_emb_out
=
fluid
.
layers
.
embedding
(
position_emb_out
=
fluid
.
embedding
(
input
=
position_ids
,
size
=
[
self
.
_max_position_seq_len
,
self
.
_emb_size
],
dtype
=
self
.
_dtype
,
param_attr
=
fluid
.
ParamAttr
(
name
=
self
.
_pos_emb_name
,
initializer
=
self
.
_param_initializer
))
sent_emb_out
=
fluid
.
layers
.
embedding
(
sent_emb_out
=
fluid
.
embedding
(
sentence_ids
,
size
=
[
self
.
_sent_types
,
self
.
_emb_size
],
dtype
=
self
.
_dtype
,
...
...
@@ -148,6 +148,7 @@ class BertModel(object):
input
=
self
.
_enc_out
,
axes
=
[
1
],
starts
=
[
0
],
ends
=
[
1
])
next_sent_feat
=
fluid
.
layers
.
fc
(
input
=
next_sent_feat
,
num_flatten_dims
=
2
,
size
=
self
.
_emb_size
,
act
=
"tanh"
,
param_attr
=
fluid
.
ParamAttr
(
...
...
@@ -209,11 +210,14 @@ class BertModel(object):
next_sent_fc_out
=
fluid
.
layers
.
fc
(
input
=
next_sent_feat
,
num_flatten_dims
=
2
,
size
=
2
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"next_sent_fc.w_0"
,
initializer
=
self
.
_param_initializer
),
bias_attr
=
"next_sent_fc.b_0"
)
next_sent_fc_out
=
fluid
.
layers
.
reshape
(
next_sent_fc_out
,
[
-
1
,
2
],
inplace
=
True
)
next_sent_loss
,
next_sent_softmax
=
fluid
.
layers
.
softmax_with_cross_entropy
(
logits
=
next_sent_fc_out
,
label
=
labels
,
return_softmax
=
True
)
...
...
PaddleNLP/PaddleLARK/BERT/model/classifier.py
浏览文件 @
8b7d837b
...
...
@@ -25,15 +25,14 @@ from model.bert import BertModel
def
create_model
(
args
,
bert_config
,
num_labels
,
is_prediction
=
False
):
input_fields
=
{
'names'
:
[
'src_ids'
,
'pos_ids'
,
'sent_ids'
,
'input_mask'
,
'labels'
],
'shapes'
:
[[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
1
]],
'shapes'
:
[[
None
,
None
],
[
None
,
None
],
[
None
,
None
],
[
None
,
args
.
max_seq_len
,
1
],
[
None
,
1
]],
'dtypes'
:
[
'int64'
,
'int64'
,
'int64'
,
'float32'
,
'int64'
],
'lod_levels'
:
[
0
,
0
,
0
,
0
,
0
],
}
inputs
=
[
fluid
.
layers
.
data
(
fluid
.
data
(
name
=
input_fields
[
'names'
][
i
],
shape
=
input_fields
[
'shapes'
][
i
],
dtype
=
input_fields
[
'dtypes'
][
i
],
...
...
@@ -42,7 +41,8 @@ def create_model(args, bert_config, num_labels, is_prediction=False):
]
(
src_ids
,
pos_ids
,
sent_ids
,
input_mask
,
labels
)
=
inputs
pyreader
=
fluid
.
io
.
PyReader
(
feed_list
=
inputs
,
capacity
=
50
,
iterable
=
False
)
data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
inputs
,
capacity
=
50
,
iterable
=
False
)
bert
=
BertModel
(
src_ids
=
src_ids
,
...
...
@@ -59,6 +59,7 @@ def create_model(args, bert_config, num_labels, is_prediction=False):
dropout_implementation
=
"upscale_in_train"
)
logits
=
fluid
.
layers
.
fc
(
input
=
cls_feats
,
num_flatten_dims
=
2
,
size
=
num_labels
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"cls_out_w"
,
...
...
@@ -71,8 +72,9 @@ def create_model(args, bert_config, num_labels, is_prediction=False):
feed_targets_name
=
[
src_ids
.
name
,
pos_ids
.
name
,
sent_ids
.
name
,
input_mask
.
name
]
return
pyre
ader
,
probs
,
feed_targets_name
return
data_lo
ader
,
probs
,
feed_targets_name
logits
=
fluid
.
layers
.
reshape
(
logits
,
[
-
1
,
num_labels
],
inplace
=
True
)
ce_loss
,
probs
=
fluid
.
layers
.
softmax_with_cross_entropy
(
logits
=
logits
,
label
=
labels
,
return_softmax
=
True
)
loss
=
fluid
.
layers
.
mean
(
x
=
ce_loss
)
...
...
@@ -80,4 +82,4 @@ def create_model(args, bert_config, num_labels, is_prediction=False):
num_seqs
=
fluid
.
layers
.
create_tensor
(
dtype
=
'int64'
)
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
probs
,
label
=
labels
,
total
=
num_seqs
)
return
pyre
ader
,
loss
,
probs
,
accuracy
,
num_seqs
return
data_lo
ader
,
loss
,
probs
,
accuracy
,
num_seqs
PaddleNLP/PaddleLARK/BERT/optimization.py
浏览文件 @
8b7d837b
...
...
@@ -73,9 +73,10 @@ def optimization(loss,
.
noam_decay
(
1
/
(
warmup_steps
*
(
learning_rate
**
2
)),
warmup_steps
)
else
:
printf
(
"WARNING: noam decay should have postive warmup steps, using "
"constant learning rate instead!"
)
print
(
"WARNING: noam decay of learning rate should have postive warmup "
"steps but given {}, using constant learning rate instead!"
.
format
(
warmup_steps
))
scheduled_lr
=
fluid
.
layers
.
create_global_var
(
name
=
fluid
.
unique_name
.
generate
(
"learning_rate"
),
shape
=
[
1
],
...
...
@@ -83,8 +84,20 @@ def optimization(loss,
dtype
=
'float32'
,
persistable
=
True
)
elif
scheduler
==
'linear_warmup_decay'
:
scheduled_lr
=
linear_warmup_decay
(
learning_rate
,
warmup_steps
,
num_train_steps
)
if
warmup_steps
>
0
:
scheduled_lr
=
linear_warmup_decay
(
learning_rate
,
warmup_steps
,
num_train_steps
)
else
:
print
(
"WARNING: linear warmup decay of learning rate should have "
"postive warmup steps but given {}, use constant learning rate "
"instead!"
.
format
(
warmup_steps
))
scheduled_lr
=
fluid
.
layers
.
create_global_var
(
name
=
fluid
.
unique_name
.
generate
(
"learning_rate"
),
shape
=
[
1
],
value
=
learning_rate
,
dtype
=
'float32'
,
persistable
=
True
)
else
:
raise
ValueError
(
"Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'"
)
...
...
PaddleNLP/PaddleLARK/BERT/predict_classifier.py
浏览文件 @
8b7d837b
...
...
@@ -17,6 +17,12 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
six
import
sys
if
six
.
PY2
:
reload
(
sys
)
sys
.
setdefaultencoding
(
'utf8'
)
import
os
import
time
import
argparse
...
...
@@ -82,7 +88,7 @@ def main(args):
predict_startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
predict_prog
,
predict_startup
):
with
fluid
.
unique_name
.
guard
():
predict_
pyre
ader
,
probs
,
feed_target_names
=
create_model
(
predict_
data_lo
ader
,
probs
,
feed_target_names
=
create_model
(
args
,
bert_config
=
bert_config
,
num_labels
=
num_labels
,
...
...
@@ -112,11 +118,11 @@ def main(args):
predict_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
args
.
use_cuda
,
main_program
=
predict_prog
)
predict_
pyreader
.
decorate
_batch_generator
(
predict_
data_loader
.
set
_batch_generator
(
processor
.
data_generator
(
batch_size
=
args
.
batch_size
,
phase
=
'test'
,
epoch
=
1
,
shuffle
=
False
))
predict_
pyre
ader
.
start
()
predict_
data_lo
ader
.
start
()
all_results
=
[]
time_begin
=
time
.
time
()
while
True
:
...
...
@@ -124,7 +130,7 @@ def main(args):
results
=
predict_exe
.
run
(
fetch_list
=
[
probs
.
name
])
all_results
.
extend
(
results
[
0
])
except
fluid
.
core
.
EOFException
:
predict_
pyre
ader
.
reset
()
predict_
data_lo
ader
.
reset
()
break
time_end
=
time
.
time
()
...
...
PaddleNLP/PaddleLARK/BERT/reader/squad.py
浏览文件 @
8b7d837b
...
...
@@ -307,32 +307,33 @@ def convert_examples_to_features(
end_position
=
0
if
example_index
<
3
:
print
(
"*** Example ***"
)
print
(
"unique_id: %s"
%
(
unique_id
))
print
(
"example_index: %s"
%
(
example_index
))
print
(
"doc_span_index: %s"
%
(
doc_span_index
))
print
(
"tokens: %s"
%
" "
.
join
(
print
(
u
"*** Example ***"
)
print
(
u
"unique_id: %s"
%
(
unique_id
))
print
(
u
"example_index: %s"
%
(
example_index
))
print
(
u
"doc_span_index: %s"
%
(
doc_span_index
))
print
(
u
"tokens: %s"
%
" "
.
join
(
[
tokenization
.
printable_text
(
x
)
for
x
in
tokens
]))
print
(
"token_to_orig_map: %s"
%
" "
.
join
([
print
(
u
"token_to_orig_map: %s"
%
" "
.
join
([
"%d:%d"
%
(
x
,
y
)
for
(
x
,
y
)
in
six
.
iteritems
(
token_to_orig_map
)
]))
print
(
"token_is_max_context: %s"
%
" "
.
join
([
print
(
u
"token_is_max_context: %s"
%
" "
.
join
([
"%d:%s"
%
(
x
,
y
)
for
(
x
,
y
)
in
six
.
iteritems
(
token_is_max_context
)
]))
print
(
"input_ids: %s"
%
" "
.
join
([
str
(
x
)
for
x
in
input_ids
]))
print
(
"input_mask: %s"
%
" "
.
join
([
str
(
x
)
for
x
in
input_mask
]))
print
(
"segment_ids: %s"
%
print
(
u
"input_ids: %s"
%
" "
.
join
([
str
(
x
)
for
x
in
input_ids
]))
print
(
u
"input_mask: %s"
%
" "
.
join
([
str
(
x
)
for
x
in
input_mask
]))
print
(
u
"segment_ids: %s"
%
" "
.
join
([
str
(
x
)
for
x
in
segment_ids
]))
if
is_training
and
example
.
is_impossible
:
print
(
"impossible example"
)
print
(
u
"impossible example"
)
if
is_training
and
not
example
.
is_impossible
:
answer_text
=
" "
.
join
(
tokens
[
start_position
:(
end_position
+
1
)])
print
(
"start_position: %d"
%
(
start_position
))
print
(
"end_position: %d"
%
(
end_position
))
print
(
"answer: %s"
%
print
(
u
"start_position: %d"
%
(
start_position
))
print
(
u
"end_position: %d"
%
(
end_position
))
print
(
u
"answer: %s"
%
(
tokenization
.
printable_text
(
answer_text
)))
feature
=
InputFeatures
(
...
...
@@ -825,7 +826,7 @@ def get_final_text(pred_text, orig_text, do_lower_case, verbose):
start_position
=
tok_text
.
find
(
pred_text
)
if
start_position
==
-
1
:
if
verbose
:
print
(
"Unable to find text: '%s' in '%s'"
%
(
pred_text
,
orig_text
))
print
(
u
"Unable to find text: '%s' in '%s'"
%
(
pred_text
,
orig_text
))
return
orig_text
end_position
=
start_position
+
len
(
pred_text
)
-
1
...
...
@@ -834,7 +835,7 @@ def get_final_text(pred_text, orig_text, do_lower_case, verbose):
if
len
(
orig_ns_text
)
!=
len
(
tok_ns_text
):
if
verbose
:
print
(
"Length not equal after stripping spaces: '%s' vs '%s'"
,
print
(
u
"Length not equal after stripping spaces: '%s' vs '%s'"
,
orig_ns_text
,
tok_ns_text
)
return
orig_text
...
...
@@ -852,7 +853,7 @@ def get_final_text(pred_text, orig_text, do_lower_case, verbose):
if
orig_start_position
is
None
:
if
verbose
:
print
(
"Couldn't map start position"
)
print
(
u
"Couldn't map start position"
)
return
orig_text
orig_end_position
=
None
...
...
@@ -863,7 +864,7 @@ def get_final_text(pred_text, orig_text, do_lower_case, verbose):
if
orig_end_position
is
None
:
if
verbose
:
print
(
"Couldn't map end position"
)
print
(
u
"Couldn't map end position"
)
return
orig_text
output_text
=
orig_text
[
orig_start_position
:(
orig_end_position
+
1
)]
...
...
PaddleNLP/PaddleLARK/BERT/run_classifier.py
浏览文件 @
8b7d837b
...
...
@@ -17,9 +17,11 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
six
import
sys
reload
(
sys
)
sys
.
setdefaultencoding
(
'utf8'
)
if
six
.
PY2
:
reload
(
sys
)
sys
.
setdefaultencoding
(
'utf8'
)
import
os
import
time
...
...
@@ -107,8 +109,8 @@ args = parser.parse_args()
# yapf: enable.
def
evaluate
(
exe
,
test_program
,
test_
pyre
ader
,
fetch_list
,
eval_phase
):
test_
pyre
ader
.
start
()
def
evaluate
(
exe
,
test_program
,
test_
data_lo
ader
,
fetch_list
,
eval_phase
):
test_
data_lo
ader
.
start
()
total_cost
,
total_acc
,
total_num_seqs
=
[],
[],
[]
time_begin
=
time
.
time
()
while
True
:
...
...
@@ -119,7 +121,7 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase):
total_acc
.
extend
(
np_acc
*
np_num_seqs
)
total_num_seqs
.
extend
(
np_num_seqs
)
except
fluid
.
core
.
EOFException
:
test_
pyre
ader
.
reset
()
test_
data_lo
ader
.
reset
()
break
time_end
=
time
.
time
()
print
(
"[%s evaluation] ave loss: %f, ave acc: %f, elapsed time: %f s"
%
...
...
@@ -203,7 +205,7 @@ def main(args):
with
fluid
.
program_guard
(
train_program
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
train_
pyre
ader
,
loss
,
probs
,
accuracy
,
num_seqs
=
create_model
(
train_
data_lo
ader
,
loss
,
probs
,
accuracy
,
num_seqs
=
create_model
(
args
,
bert_config
=
bert_config
,
num_labels
=
num_labels
)
...
...
@@ -224,28 +226,17 @@ def main(args):
incr_ratio
=
args
.
incr_ratio
,
decr_ratio
=
args
.
decr_ratio
)
if
args
.
verbose
:
if
args
.
in_tokens
:
lower_mem
,
upper_mem
,
unit
=
fluid
.
contrib
.
memory_usage
(
program
=
train_program
,
batch_size
=
args
.
batch_size
//
args
.
max_seq_len
)
else
:
lower_mem
,
upper_mem
,
unit
=
fluid
.
contrib
.
memory_usage
(
program
=
train_program
,
batch_size
=
args
.
batch_size
)
print
(
"Theoretical memory usage in training: %.3f - %.3f %s"
%
(
lower_mem
,
upper_mem
,
unit
))
if
args
.
do_val
:
dev_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
dev_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
dev_
pyre
ader
,
loss
,
probs
,
accuracy
,
num_seqs
=
create_model
(
dev_
data_lo
ader
,
loss
,
probs
,
accuracy
,
num_seqs
=
create_model
(
args
,
bert_config
=
bert_config
,
num_labels
=
num_labels
)
dev_prog
=
dev_prog
.
clone
(
for_test
=
True
)
dev_
pyreader
.
decorate
_batch_generator
(
dev_
data_loader
.
set
_batch_generator
(
processor
.
data_generator
(
batch_size
=
args
.
batch_size
,
phase
=
'dev'
,
...
...
@@ -257,13 +248,13 @@ def main(args):
test_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
test_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
test_
pyre
ader
,
loss
,
probs
,
accuracy
,
num_seqs
=
create_model
(
test_
data_lo
ader
,
loss
,
probs
,
accuracy
,
num_seqs
=
create_model
(
args
,
bert_config
=
bert_config
,
num_labels
=
num_labels
)
test_prog
=
test_prog
.
clone
(
for_test
=
True
)
test_
pyreader
.
decorate
_batch_generator
(
test_
data_loader
.
set
_batch_generator
(
processor
.
data_generator
(
batch_size
=
args
.
batch_size
,
phase
=
'test'
,
...
...
@@ -316,11 +307,11 @@ def main(args):
train_compiled_program
=
fluid
.
CompiledProgram
(
train_program
).
with_data_parallel
(
loss_name
=
loss
.
name
,
build_strategy
=
build_strategy
)
train_
pyreader
.
decorate
_batch_generator
(
train_data_generator
,
place
)
train_
data_loader
.
set
_batch_generator
(
train_data_generator
,
place
)
if
args
.
do_train
:
train_
pyre
ader
.
start
()
train_
data_lo
ader
.
start
()
steps
=
0
total_cost
,
total_acc
,
total_num_seqs
=
[],
[],
[]
time_begin
=
time
.
time
()
...
...
@@ -350,7 +341,7 @@ def main(args):
total_num_seqs
.
extend
(
np_num_seqs
)
if
args
.
verbose
:
verbose
=
"train
pyreader queue size: %d, "
%
train_pyre
ader
.
queue
.
size
(
verbose
=
"train
data_loader queue size: %d, "
%
train_data_lo
ader
.
queue
.
size
(
)
verbose
+=
"learning rate: %f"
%
np_lr
[
0
]
if
args
.
use_fp16
:
...
...
@@ -386,18 +377,18 @@ def main(args):
throughput
=
[]
# evaluate dev set
if
args
.
do_val
:
evaluate
(
exe
,
dev_prog
,
dev_
pyre
ader
,
evaluate
(
exe
,
dev_prog
,
dev_
data_lo
ader
,
[
loss
.
name
,
accuracy
.
name
,
num_seqs
.
name
],
"dev"
)
# evaluate test set
if
args
.
do_test
:
evaluate
(
exe
,
test_prog
,
test_
pyre
ader
,
evaluate
(
exe
,
test_prog
,
test_
data_lo
ader
,
[
loss
.
name
,
accuracy
.
name
,
num_seqs
.
name
],
"test"
)
except
fluid
.
core
.
EOFException
:
save_path
=
os
.
path
.
join
(
args
.
checkpoints
,
"step_"
+
str
(
steps
))
fluid
.
io
.
save_persistables
(
exe
,
save_path
,
train_program
)
train_
pyre
ader
.
reset
()
train_
data_lo
ader
.
reset
()
break
if
args
.
enable_ce
:
card_num
=
get_cards
()
...
...
@@ -421,13 +412,13 @@ def main(args):
# final eval on dev set
if
args
.
do_val
:
print
(
"Final validation result:"
)
evaluate
(
exe
,
dev_prog
,
dev_
pyre
ader
,
evaluate
(
exe
,
dev_prog
,
dev_
data_lo
ader
,
[
loss
.
name
,
accuracy
.
name
,
num_seqs
.
name
],
"dev"
)
# final eval on test set
if
args
.
do_test
:
print
(
"Final test result:"
)
evaluate
(
exe
,
test_prog
,
test_
pyre
ader
,
evaluate
(
exe
,
test_prog
,
test_
data_lo
ader
,
[
loss
.
name
,
accuracy
.
name
,
num_seqs
.
name
],
"test"
)
...
...
PaddleNLP/PaddleLARK/BERT/run_squad.py
浏览文件 @
8b7d837b
...
...
@@ -17,9 +17,11 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
six
import
sys
reload
(
sys
)
sys
.
setdefaultencoding
(
'utf8'
)
if
six
.
PY2
:
reload
(
sys
)
sys
.
setdefaultencoding
(
'utf8'
)
import
argparse
import
collections
...
...
@@ -108,9 +110,8 @@ def create_model(bert_config, is_training=False):
if
is_training
:
input_fields
=
{
'names'
:
[
'src_ids'
,
'pos_ids'
,
'sent_ids'
,
'input_mask'
,
'start_positions'
,
'end_positions'
],
'shapes'
:
[[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
1
],
[
-
1
,
1
]],
'shapes'
:
[[
None
,
None
],
[
None
,
None
],
[
None
,
None
],
[
None
,
args
.
max_seq_len
,
1
],
[
None
,
1
],
[
None
,
1
]],
'dtypes'
:
[
'int64'
,
'int64'
,
'int64'
,
'float32'
,
'int64'
,
'int64'
],
'lod_levels'
:
[
0
,
0
,
0
,
0
,
0
,
0
],
...
...
@@ -118,20 +119,19 @@ def create_model(bert_config, is_training=False):
else
:
input_fields
=
{
'names'
:
[
'src_ids'
,
'pos_ids'
,
'sent_ids'
,
'input_mask'
,
'unique_id'
],
'shapes'
:
[[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
1
]],
'shapes'
:
[[
None
,
None
],
[
None
,
None
],
[
None
,
None
],
[
None
,
args
.
max_seq_len
,
1
],
[
None
,
1
]],
'dtypes'
:
[
'int64'
,
'int64'
,
'int64'
,
'float32'
,
'int64'
],
'lod_levels'
:
[
0
,
0
,
0
,
0
,
0
],
}
inputs
=
[
fluid
.
layers
.
data
(
name
=
input_fields
[
'names'
][
i
],
inputs
=
[
fluid
.
data
(
name
=
input_fields
[
'names'
][
i
],
shape
=
input_fields
[
'shapes'
][
i
],
dtype
=
input_fields
[
'dtypes'
][
i
],
lod_level
=
input_fields
[
'lod_levels'
][
i
])
for
i
in
range
(
len
(
input_fields
[
'names'
]))]
pyreader
=
fluid
.
io
.
PyReade
r
(
feed_list
=
inputs
,
capacity
=
50
,
iterable
=
False
)
data_loader
=
fluid
.
io
.
DataLoader
.
from_generato
r
(
feed_list
=
inputs
,
capacity
=
50
,
iterable
=
False
)
if
is_training
:
(
src_ids
,
pos_ids
,
sent_ids
,
input_mask
,
start_positions
,
end_positions
)
=
inputs
...
...
@@ -176,23 +176,23 @@ def create_model(bert_config, is_training=False):
start_loss
=
compute_loss
(
start_logits
,
start_positions
)
end_loss
=
compute_loss
(
end_logits
,
end_positions
)
total_loss
=
(
start_loss
+
end_loss
)
/
2.0
return
pyre
ader
,
total_loss
,
num_seqs
return
data_lo
ader
,
total_loss
,
num_seqs
else
:
return
pyre
ader
,
unique_id
,
start_logits
,
end_logits
,
num_seqs
return
data_lo
ader
,
unique_id
,
start_logits
,
end_logits
,
num_seqs
RawResult
=
collections
.
namedtuple
(
"RawResult"
,
[
"unique_id"
,
"start_logits"
,
"end_logits"
])
def
predict
(
test_exe
,
test_program
,
test_
pyre
ader
,
fetch_list
,
processor
):
def
predict
(
test_exe
,
test_program
,
test_
data_lo
ader
,
fetch_list
,
processor
):
if
not
os
.
path
.
exists
(
args
.
checkpoints
):
os
.
makedirs
(
args
.
checkpoints
)
output_prediction_file
=
os
.
path
.
join
(
args
.
checkpoints
,
"predictions.json"
)
output_nbest_file
=
os
.
path
.
join
(
args
.
checkpoints
,
"nbest_predictions.json"
)
output_null_log_odds_file
=
os
.
path
.
join
(
args
.
checkpoints
,
"null_odds.json"
)
test_
pyre
ader
.
start
()
test_
data_lo
ader
.
start
()
all_results
=
[]
time_begin
=
time
.
time
()
while
True
:
...
...
@@ -211,7 +211,7 @@ def predict(test_exe, test_program, test_pyreader, fetch_list, processor):
start_logits
=
start_logits
,
end_logits
=
end_logits
))
except
fluid
.
core
.
EOFException
:
test_
pyre
ader
.
reset
()
test_
data_lo
ader
.
reset
()
break
time_end
=
time
.
time
()
...
...
@@ -279,7 +279,7 @@ def train(args):
train_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
train_program
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
train_
pyre
ader
,
loss
,
num_seqs
=
create_model
(
train_
data_lo
ader
,
loss
,
num_seqs
=
create_model
(
bert_config
=
bert_config
,
is_training
=
True
)
...
...
@@ -300,22 +300,11 @@ def train(args):
incr_ratio
=
args
.
incr_ratio
,
decr_ratio
=
args
.
decr_ratio
)
if
args
.
verbose
:
if
args
.
in_tokens
:
lower_mem
,
upper_mem
,
unit
=
fluid
.
contrib
.
memory_usage
(
program
=
train_program
,
batch_size
=
args
.
batch_size
//
args
.
max_seq_len
)
else
:
lower_mem
,
upper_mem
,
unit
=
fluid
.
contrib
.
memory_usage
(
program
=
train_program
,
batch_size
=
args
.
batch_size
)
print
(
"Theoretical memory usage in training: %.3f - %.3f %s"
%
(
lower_mem
,
upper_mem
,
unit
))
if
args
.
do_predict
:
test_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
test_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
test_
pyre
ader
,
unique_ids
,
start_logits
,
end_logits
,
num_seqs
=
create_model
(
test_
data_lo
ader
,
unique_ids
,
start_logits
,
end_logits
,
num_seqs
=
create_model
(
bert_config
=
bert_config
,
is_training
=
False
)
...
...
@@ -359,9 +348,9 @@ def train(args):
train_compiled_program
=
fluid
.
CompiledProgram
(
train_program
).
with_data_parallel
(
loss_name
=
loss
.
name
,
exec_strategy
=
exec_strategy
)
train_
pyreader
.
decorate
_batch_generator
(
train_data_generator
,
place
)
train_
data_loader
.
set
_batch_generator
(
train_data_generator
,
place
)
train_
pyre
ader
.
start
()
train_
data_lo
ader
.
start
()
steps
=
0
total_cost
,
total_num_seqs
=
[],
[]
time_begin
=
time
.
time
()
...
...
@@ -387,7 +376,7 @@ def train(args):
total_num_seqs
.
extend
(
np_num_seqs
)
if
args
.
verbose
:
verbose
=
"train
pyreader queue size: %d, "
%
train_pyre
ader
.
queue
.
size
(
verbose
=
"train
data_loader queue size: %d, "
%
train_data_lo
ader
.
queue
.
size
(
)
verbose
+=
"learning rate: %f "
%
np_lr
[
0
]
if
args
.
use_fp16
:
...
...
@@ -414,11 +403,11 @@ def train(args):
save_path
=
os
.
path
.
join
(
args
.
checkpoints
,
"step_"
+
str
(
steps
)
+
"_final"
)
fluid
.
io
.
save_persistables
(
exe
,
save_path
,
train_program
)
train_
pyre
ader
.
reset
()
train_
data_lo
ader
.
reset
()
break
if
args
.
do_predict
:
test_
pyreader
.
decorate
_batch_generator
(
test_
data_loader
.
set
_batch_generator
(
processor
.
data_generator
(
data_path
=
args
.
predict_file
,
batch_size
=
args
.
batch_size
,
...
...
@@ -427,7 +416,7 @@ def train(args):
dev_count
=
1
,
epoch
=
1
),
place
)
predict
(
exe
,
test_prog
,
test_
pyre
ader
,
[
predict
(
exe
,
test_prog
,
test_
data_lo
ader
,
[
unique_ids
.
name
,
start_logits
.
name
,
end_logits
.
name
,
num_seqs
.
name
],
processor
)
...
...
PaddleNLP/PaddleLARK/BERT/train.py
浏览文件 @
8b7d837b
...
...
@@ -17,13 +17,14 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
six
import
sys
reload
(
sys
)
sys
.
setdefaultencoding
(
'utf8'
)
if
six
.
PY2
:
reload
(
sys
)
sys
.
setdefaultencoding
(
'utf8'
)
import
os
import
time
import
sys
import
argparse
import
numpy
as
np
import
multiprocessing
...
...
@@ -98,21 +99,20 @@ args = parser.parse_args()
def
create_model
(
bert_config
):
input_fields
=
{
'names'
:
[
'src_ids'
,
'pos_ids'
,
'sent_ids'
,
'input_mask'
,
'mask_label'
,
'mask_pos'
,
'labels'
],
'shapes'
:
[[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
args
.
max_seq_len
,
1
],
[
-
1
,
1
],
[
-
1
,
1
],
[
-
1
,
1
]],
'shapes'
:
[[
None
,
None
],
[
None
,
None
],
[
None
,
None
],
[
None
,
None
,
1
],
[
None
,
1
],
[
None
,
1
],
[
None
,
1
]],
'dtypes'
:
[
'int64'
,
'int64'
,
'int64'
,
'float32'
,
'int64'
,
'int64'
,
'int64'
],
'lod_levels'
:
[
0
,
0
,
0
,
0
,
0
,
0
,
0
],
}
inputs
=
[
fluid
.
layers
.
data
(
name
=
input_fields
[
'names'
][
i
],
inputs
=
[
fluid
.
data
(
name
=
input_fields
[
'names'
][
i
],
shape
=
input_fields
[
'shapes'
][
i
],
dtype
=
input_fields
[
'dtypes'
][
i
],
lod_level
=
input_fields
[
'lod_levels'
][
i
])
for
i
in
range
(
len
(
input_fields
[
'names'
]))]
(
src_ids
,
pos_ids
,
sent_ids
,
input_mask
,
mask_label
,
mask_pos
,
labels
)
=
inputs
pyreader
=
fluid
.
io
.
PyReade
r
(
feed_list
=
inputs
,
capacity
=
50
,
iterable
=
False
)
data_loader
=
fluid
.
io
.
DataLoader
.
from_generato
r
(
feed_list
=
inputs
,
capacity
=
50
,
iterable
=
False
)
bert
=
BertModel
(
src_ids
=
src_ids
,
...
...
@@ -126,14 +126,14 @@ def create_model(bert_config):
next_sent_acc
,
mask_lm_loss
,
total_loss
=
bert
.
get_pretraining_output
(
mask_label
,
mask_pos
,
labels
)
return
pyre
ader
,
next_sent_acc
,
mask_lm_loss
,
total_loss
return
data_lo
ader
,
next_sent_acc
,
mask_lm_loss
,
total_loss
def
predict_wrapper
(
args
,
exe
,
bert_config
,
test_prog
=
None
,
pyre
ader
=
None
,
data_lo
ader
=
None
,
fetch_list
=
None
):
# Context to do validation.
data_path
=
args
.
test_set_dir
if
args
.
do_test
else
args
.
validation_set_dir
...
...
@@ -148,7 +148,7 @@ def predict_wrapper(args,
max_seq_len
=
args
.
max_seq_len
,
is_test
=
True
)
pyreader
.
decorate
_batch_generator
(
data_reader
.
data_generator
())
data_loader
.
set
_batch_generator
(
data_reader
.
data_generator
())
if
args
.
do_test
:
assert
args
.
init_checkpoint
is
not
None
,
"[FATAL] Please use --init_checkpoint '/path/to/checkpoints'
\
...
...
@@ -156,8 +156,8 @@ def predict_wrapper(args,
init_pretraining_params
(
exe
,
args
.
init_checkpoint
,
test_prog
)
def
predict
(
exe
=
exe
,
pyreader
=
pyre
ader
):
pyre
ader
.
start
()
def
predict
(
exe
=
exe
,
data_loader
=
data_lo
ader
):
data_lo
ader
.
start
()
cost
=
0
lm_cost
=
0
...
...
@@ -176,7 +176,7 @@ def predict_wrapper(args,
print
(
"[test_set] steps: %d"
%
steps
)
except
fluid
.
core
.
EOFException
:
pyre
ader
.
reset
()
data_lo
ader
.
reset
()
break
used_time
=
time
.
time
()
-
time_begin
...
...
@@ -193,7 +193,7 @@ def test(args):
test_startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
test_prog
,
test_startup
):
with
fluid
.
unique_name
.
guard
():
test_
pyre
ader
,
next_sent_acc
,
mask_lm_loss
,
total_loss
=
create_model
(
test_
data_lo
ader
,
next_sent_acc
,
mask_lm_loss
,
total_loss
=
create_model
(
bert_config
=
bert_config
)
test_prog
=
test_prog
.
clone
(
for_test
=
True
)
...
...
@@ -207,7 +207,7 @@ def test(args):
exe
,
bert_config
,
test_prog
=
test_prog
,
pyreader
=
test_pyre
ader
,
data_loader
=
test_data_lo
ader
,
fetch_list
=
[
next_sent_acc
.
name
,
mask_lm_loss
.
name
,
total_loss
.
name
])
print
(
"test begin"
)
...
...
@@ -228,7 +228,7 @@ def train(args):
startup_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
train_program
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
train_
pyre
ader
,
next_sent_acc
,
mask_lm_loss
,
total_loss
=
create_model
(
train_
data_lo
ader
,
next_sent_acc
,
mask_lm_loss
,
total_loss
=
create_model
(
bert_config
=
bert_config
)
scheduled_lr
,
loss_scaling
=
optimization
(
loss
=
total_loss
,
...
...
@@ -250,7 +250,7 @@ def train(args):
test_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
test_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
test_
pyre
ader
,
next_sent_acc
,
mask_lm_loss
,
total_loss
=
create_model
(
test_
data_lo
ader
,
next_sent_acc
,
mask_lm_loss
,
total_loss
=
create_model
(
bert_config
=
bert_config
)
test_prog
=
test_prog
.
clone
(
for_test
=
True
)
...
...
@@ -263,16 +263,6 @@ def train(args):
dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
print
(
"Device count %d"
%
dev_count
)
if
args
.
verbose
:
if
args
.
in_tokens
:
lower_mem
,
upper_mem
,
unit
=
fluid
.
contrib
.
memory_usage
(
program
=
train_program
,
batch_size
=
args
.
batch_size
//
args
.
max_seq_len
)
else
:
lower_mem
,
upper_mem
,
unit
=
fluid
.
contrib
.
memory_usage
(
program
=
train_program
,
batch_size
=
args
.
batch_size
)
print
(
"Theoretical memory usage in training: %.3f - %.3f %s"
%
(
lower_mem
,
upper_mem
,
unit
))
nccl2_num_trainers
=
1
nccl2_trainer_id
=
0
...
...
@@ -345,13 +335,13 @@ def train(args):
exe
,
bert_config
,
test_prog
=
test_prog
,
pyreader
=
test_pyre
ader
,
data_loader
=
test_data_lo
ader
,
fetch_list
=
[
next_sent_acc
.
name
,
mask_lm_loss
.
name
,
total_loss
.
name
])
train_
pyreader
.
decorate
_batch_generator
(
data_reader
.
data_generator
())
train_
pyre
ader
.
start
()
train_
data_loader
.
set
_batch_generator
(
data_reader
.
data_generator
())
train_
data_lo
ader
.
start
()
steps
=
0
cost
=
[]
lm_cost
=
[]
...
...
@@ -402,7 +392,7 @@ def train(args):
epoch
,
current_file_index
,
total_file
,
current_file
=
data_reader
.
get_progress
(
)
if
args
.
verbose
:
verbose
=
"feed_queue size: %d, "
%
train_
pyre
ader
.
queue
.
size
()
verbose
=
"feed_queue size: %d, "
%
train_
data_lo
ader
.
queue
.
size
()
verbose
+=
"current learning_rate: %f, "
%
np_lr
[
0
]
if
args
.
use_fp16
:
verbose
+=
"loss scaling: %f"
%
np_scaling
[
0
]
...
...
@@ -437,7 +427,7 @@ def train(args):
np
.
mean
(
np
.
array
(
vali_acc
)
/
vali_steps
),
vali_speed
))
except
fluid
.
core
.
EOFException
:
train_
pyre
ader
.
reset
()
train_
data_lo
ader
.
reset
()
break
if
__name__
==
'__main__'
:
...
...
PaddleNLP/PaddleLARK/BERT/utils/fp16.py
浏览文件 @
8b7d837b
...
...
@@ -119,10 +119,11 @@ def create_master_params_grads(params_grads, main_prog, startup_prog,
def
master_param_to_train_param
(
master_params_grads
,
params_grads
,
main_prog
):
for
idx
,
m_p_g
in
enumerate
(
master_params_grads
):
train_p
,
_
=
params_grads
[
idx
]
if
train_p
.
name
.
find
(
"layer_norm"
)
>
-
1
:
continue
with
main_prog
.
_optimized_guard
([
m_p_g
[
0
],
m_p_g
[
1
]]):
append_cast_op
(
m_p_g
[
0
],
train_p
,
main_prog
)
if
train_p
.
name
.
find
(
"layer_norm"
)
>
-
1
:
fluid
.
layers
.
assign
(
m_p_g
[
0
],
train_p
)
else
:
append_cast_op
(
m_p_g
[
0
],
train_p
,
main_prog
)
def
update_loss_scaling
(
is_overall_finite
,
prev_loss_scaling
,
num_good_steps
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录