Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
72e1fb1f
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 1 年 前同步成功
通知
282
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
72e1fb1f
编写于
4月 04, 2019
作者:
Z
Zeyu Chen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add comments for ernie classification
上级
761572f7
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
22 addition
and
190 deletion
+22
-190
demo/ernie-classification/finetune_with_hub.py
demo/ernie-classification/finetune_with_hub.py
+4
-0
demo/ernie-seq-labeling/finetune_with_hub.py
demo/ernie-seq-labeling/finetune_with_hub.py
+4
-0
paddlehub/finetune/finetune.py
paddlehub/finetune/finetune.py
+13
-189
paddlehub/finetune/task.py
paddlehub/finetune/task.py
+1
-1
未找到文件。
demo/ernie-classification/finetune_with_hub.py
浏览文件 @
72e1fb1f
...
...
@@ -40,11 +40,14 @@ args = parser.parse_args()
# yapf: enable.
if
__name__
==
'__main__'
:
# Select a finetune strategy
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
...
...
@@ -54,6 +57,7 @@ if __name__ == '__main__':
# loading Paddlehub BERT
module
=
hub
.
Module
(
name
=
"ernie"
)
# Sentence classification dataset reader
reader
=
hub
.
reader
.
ClassifyReader
(
dataset
=
hub
.
dataset
.
ChnSentiCorp
(),
# download chnsenticorp dataset
vocab_path
=
module
.
get_vocab_path
(),
...
...
demo/ernie-seq-labeling/finetune_with_hub.py
浏览文件 @
72e1fb1f
...
...
@@ -40,11 +40,14 @@ args = parser.parse_args()
# yapf: enable.
if
__name__
==
'__main__'
:
# Select a finetune strategy
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
eval_interval
=
100
,
use_cuda
=
True
,
...
...
@@ -55,6 +58,7 @@ if __name__ == '__main__':
# loading Paddlehub ERNIE
module
=
hub
.
Module
(
name
=
"ernie"
)
# Sequence Label dataset reader
reader
=
hub
.
reader
.
SequenceLabelReader
(
dataset
=
hub
.
dataset
.
MSRA_NER
(),
vocab_path
=
module
.
get_vocab_path
(),
...
...
paddlehub/finetune/finetune.py
浏览文件 @
72e1fb1f
...
...
@@ -27,6 +27,8 @@ import numpy as np
from
paddlehub.common.logger
import
logger
from
paddlehub.finetune.strategy
import
BERTFinetuneStrategy
,
DefaultStrategy
from
paddlehub.finetune.checkpoint
import
load_checkpoint
,
save_checkpoint
from
paddlehub.finetune.evaluate
import
evaluate_cls_task
,
evaluate_seq_labeling_task
from
visualdl
import
LogWriter
import
paddlehub
as
hub
...
...
@@ -135,17 +137,17 @@ def _finetune_seq_label_task(task,
exe
=
exe
)
if
do_eval
and
global_step
%
config
.
eval_interval
==
0
:
evaluate_seq_label
(
evaluate_seq_label
_task
(
task
,
data_reader
,
feed_list
,
phase
=
"
dev
"
,
phase
=
"
test
"
,
config
=
config
)
evaluate_seq_label
(
evaluate_seq_label
_task
(
task
,
data_reader
,
feed_list
,
phase
=
"
test
"
,
phase
=
"
dev
"
,
config
=
config
)
# NOTE: current saved checkpoint machanism is not completed, it can't
...
...
@@ -157,52 +159,13 @@ def _finetune_seq_label_task(task,
exe
=
exe
)
if
do_eval
:
evaluate_seq_label
(
evaluate_seq_label_task
(
task
,
data_reader
,
feed_list
,
phase
=
"dev"
,
config
=
config
)
evaluate_seq_label_task
(
task
,
data_reader
,
feed_list
,
phase
=
"test"
,
config
=
config
)
logger
.
info
(
"PaddleHub finetune finished."
)
def
evaluate_seq_label
(
task
,
data_reader
,
feed_list
,
phase
=
"test"
,
config
=
None
):
fetch_list
=
[
task
.
variable
(
"labels"
).
name
,
task
.
variable
(
"infers"
).
name
,
task
.
variable
(
"seq_len"
).
name
,
task
.
variable
(
"loss"
).
name
]
logger
.
info
(
"Evaluation on {} dataset start"
.
format
(
phase
))
inference_program
=
task
.
inference_program
()
batch_size
=
config
.
batch_size
place
,
dev_count
=
_get_running_device_info
(
config
)
exe
=
fluid
.
Executor
(
place
=
place
)
with
fluid
.
program_guard
(
inference_program
):
data_feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
num_eval_examples
=
acc_sum
=
loss_sum
=
0
test_reader
=
data_reader
.
data_generator
(
batch_size
=
batch_size
,
phase
=
phase
)
eval_time_begin
=
time
.
time
()
eval_step
=
0
total_label
,
total_infer
,
total_correct
=
0.0
,
0.0
,
0.0
for
batch
in
test_reader
():
num_batch_examples
=
len
(
batch
)
eval_step
+=
1
np_labels
,
np_infers
,
np_lens
,
_
=
exe
.
run
(
feed
=
data_feeder
.
feed
(
batch
),
fetch_list
=
fetch_list
)
label_num
,
infer_num
,
correct_num
=
chunk_eval
(
np_labels
,
np_infers
,
np_lens
,
7
,
dev_count
)
total_infer
+=
infer_num
total_label
+=
label_num
total_correct
+=
correct_num
precision
,
recall
,
f1
=
calculate_f1
(
total_label
,
total_infer
,
total_correct
)
eval_time_used
=
time
.
time
()
-
eval_time_begin
eval_speed
=
eval_step
/
eval_time_used
logger
.
info
(
"[%s evaluation] F1-Score=%f, precision=%f, recall=%f [step/sec: %.2f]"
%
(
phase
,
f1
,
precision
,
recall
,
eval_speed
))
def
_finetune_cls_task
(
task
,
data_reader
,
feed_list
,
config
=
None
,
do_eval
=
False
):
main_program
=
task
.
main_program
()
...
...
@@ -287,7 +250,7 @@ def _finetune_cls_task(task, data_reader, feed_list, config=None,
exe
=
exe
)
if
do_eval
and
global_step
%
config
.
eval_interval
==
0
:
eval_loss
,
eval_acc
,
eval_perf
=
evaluate
(
eval_loss
,
eval_acc
,
eval_perf
=
evaluate
_cls_task
(
task
,
data_reader
,
feed_list
,
...
...
@@ -313,7 +276,8 @@ def _finetune_cls_task(task, data_reader, feed_list, config=None,
exe
=
exe
)
if
do_eval
:
evaluate
(
task
,
data_reader
,
feed_list
,
phase
=
"test"
,
config
=
config
)
evaluate_cls_task
(
task
,
data_reader
,
feed_list
,
phase
=
"test"
,
config
=
config
)
logger
.
info
(
"PaddleHub finetune finished."
)
...
...
@@ -321,150 +285,10 @@ def finetune_and_eval(task, data_reader, feed_list, config=None):
if
task
.
task_type
==
"sequence_labeling"
:
_finetune_seq_label_task
(
task
,
data_reader
,
feed_list
,
config
,
do_eval
=
True
)
# if it's image_classification and text classificaiton
else
:
_finetune_cls_task
(
task
,
data_reader
,
feed_list
,
config
,
do_eval
=
True
)
def
finetune
(
task
,
data_reader
,
feed_list
,
config
=
None
):
_finetune_cls_task
(
task
,
data_reader
,
feed_list
,
config
,
do_eval
=
False
)
def
evaluate
(
task
,
data_reader
,
feed_list
,
phase
=
"test"
,
config
=
None
):
logger
.
info
(
"Evaluation on {} dataset start"
.
format
(
phase
))
inference_program
=
task
.
inference_program
()
main_program
=
task
.
main_program
()
loss
=
task
.
variable
(
"loss"
)
accuracy
=
task
.
variable
(
"accuracy"
)
batch_size
=
config
.
batch_size
place
,
dev_count
=
_get_running_device_info
(
config
)
exe
=
fluid
.
Executor
(
place
=
place
)
with
fluid
.
program_guard
(
inference_program
):
data_feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
num_eval_examples
=
acc_sum
=
loss_sum
=
0
test_reader
=
data_reader
.
data_generator
(
batch_size
=
batch_size
,
phase
=
phase
)
eval_time_begin
=
time
.
time
()
eval_step
=
0
for
batch
in
test_reader
():
num_batch_examples
=
len
(
batch
)
eval_step
+=
1
loss_v
,
accuracy_v
=
exe
.
run
(
feed
=
data_feeder
.
feed
(
batch
),
fetch_list
=
[
loss
.
name
,
accuracy
.
name
])
num_eval_examples
+=
num_batch_examples
acc_sum
+=
accuracy_v
*
num_batch_examples
loss_sum
+=
loss_v
*
num_batch_examples
eval_time_used
=
time
.
time
()
-
eval_time_begin
avg_loss
=
loss_sum
/
num_eval_examples
avg_acc
=
acc_sum
/
num_eval_examples
eval_speed
=
eval_step
/
eval_time_used
logger
.
info
(
"[%s dataset evaluation result] loss=%.5f acc=%.5f [step/sec: %.2f]"
%
(
phase
,
avg_loss
,
avg_acc
,
eval_speed
))
return
avg_loss
,
avg_acc
,
eval_speed
# Sequence label evaluation functions
def
chunk_eval
(
np_labels
,
np_infers
,
np_lens
,
tag_num
,
dev_count
=
1
):
def
extract_bio_chunk
(
seq
):
chunks
=
[]
cur_chunk
=
None
null_index
=
tag_num
-
1
for
index
in
range
(
len
(
seq
)):
tag
=
seq
[
index
]
tag_type
=
tag
//
2
tag_pos
=
tag
%
2
if
tag
==
null_index
:
if
cur_chunk
is
not
None
:
chunks
.
append
(
cur_chunk
)
cur_chunk
=
None
continue
if
tag_pos
==
0
:
if
cur_chunk
is
not
None
:
chunks
.
append
(
cur_chunk
)
cur_chunk
=
{}
cur_chunk
=
{
"st"
:
index
,
"en"
:
index
+
1
,
"type"
:
tag_type
}
else
:
if
cur_chunk
is
None
:
cur_chunk
=
{
"st"
:
index
,
"en"
:
index
+
1
,
"type"
:
tag_type
}
continue
if
cur_chunk
[
"type"
]
==
tag_type
:
cur_chunk
[
"en"
]
=
index
+
1
else
:
chunks
.
append
(
cur_chunk
)
cur_chunk
=
{
"st"
:
index
,
"en"
:
index
+
1
,
"type"
:
tag_type
}
if
cur_chunk
is
not
None
:
chunks
.
append
(
cur_chunk
)
return
chunks
null_index
=
tag_num
-
1
num_label
=
0
num_infer
=
0
num_correct
=
0
labels
=
np_labels
.
reshape
([
-
1
]).
astype
(
np
.
int32
).
tolist
()
infers
=
np_infers
.
reshape
([
-
1
]).
astype
(
np
.
int32
).
tolist
()
all_lens
=
np_lens
.
reshape
([
dev_count
,
-
1
]).
astype
(
np
.
int32
).
tolist
()
base_index
=
0
for
dev_index
in
range
(
dev_count
):
lens
=
all_lens
[
dev_index
]
max_len
=
0
for
l
in
lens
:
max_len
=
max
(
max_len
,
l
)
for
i
in
range
(
len
(
lens
)):
seq_st
=
base_index
+
i
*
max_len
+
1
seq_en
=
seq_st
+
(
lens
[
i
]
-
2
)
infer_chunks
=
extract_bio_chunk
(
infers
[
seq_st
:
seq_en
])
label_chunks
=
extract_bio_chunk
(
labels
[
seq_st
:
seq_en
])
num_infer
+=
len
(
infer_chunks
)
num_label
+=
len
(
label_chunks
)
infer_index
=
0
label_index
=
0
while
label_index
<
len
(
label_chunks
)
\
and
infer_index
<
len
(
infer_chunks
):
if
infer_chunks
[
infer_index
][
"st"
]
\
<
label_chunks
[
label_index
][
"st"
]:
infer_index
+=
1
elif
infer_chunks
[
infer_index
][
"st"
]
\
>
label_chunks
[
label_index
][
"st"
]:
label_index
+=
1
else
:
if
infer_chunks
[
infer_index
][
"en"
]
\
==
label_chunks
[
label_index
][
"en"
]
\
and
infer_chunks
[
infer_index
][
"type"
]
\
==
label_chunks
[
label_index
][
"type"
]:
num_correct
+=
1
infer_index
+=
1
label_index
+=
1
base_index
+=
max_len
*
len
(
lens
)
return
num_label
,
num_infer
,
num_correct
def
calculate_f1
(
num_label
,
num_infer
,
num_correct
):
if
num_infer
==
0
:
precision
=
0.0
else
:
precision
=
num_correct
*
1.0
/
num_infer
if
num_label
==
0
:
recall
=
0.0
else
:
recall
=
num_correct
*
1.0
/
num_label
if
num_correct
==
0
:
f1
=
0.0
else
:
f1
=
2
*
precision
*
recall
/
(
precision
+
recall
)
return
precision
,
recall
,
f1
paddlehub/finetune/task.py
浏览文件 @
72e1fb1f
...
...
@@ -148,7 +148,7 @@ def create_img_classification_task(feature,
return
task
def
create_seq_labeling_task
(
feature
,
labels
,
seq_len
,
num_classes
=
None
):
def
create_seq_labeling_task
(
feature
,
labels
,
seq_len
,
num_classes
):
logits
=
fluid
.
layers
.
fc
(
input
=
feature
,
size
=
num_classes
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录