Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
ca3e9774
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 2 年 前同步成功
通知
285
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ca3e9774
编写于
12月 25, 2019
作者:
K
kinghuin
提交者:
wuzewu
12月 25, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Kinghuin optimpred (#280)
* finish coding predict * optimize predict pr * optimize _postprocessing * optim namedtuple
上级
6ee63f7e
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
93 addition
and
75 deletion
+93
-75
demo/multi-label-classification/predict.py
demo/multi-label-classification/predict.py
+12
-10
paddlehub/finetune/task/base_task.py
paddlehub/finetune/task/base_task.py
+10
-1
paddlehub/finetune/task/classifier_task.py
paddlehub/finetune/task/classifier_task.py
+29
-0
paddlehub/finetune/task/reading_comprehension_task.py
paddlehub/finetune/task/reading_comprehension_task.py
+16
-64
paddlehub/finetune/task/regression_task.py
paddlehub/finetune/task/regression_task.py
+7
-0
paddlehub/finetune/task/sequence_task.py
paddlehub/finetune/task/sequence_task.py
+19
-0
未找到文件。
demo/multi-label-classification/predict.py
浏览文件 @
ca3e9774
...
@@ -41,7 +41,7 @@ args = parser.parse_args()
...
@@ -41,7 +41,7 @@ args = parser.parse_args()
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
# Load Paddlehub BERT pretrained model
# Load Paddlehub BERT pretrained model
module
=
hub
.
Module
(
name
=
"ernie_
eng_base.hub_modul
e"
)
module
=
hub
.
Module
(
name
=
"ernie_
v2_eng_bas
e"
)
inputs
,
outputs
,
program
=
module
.
context
(
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
...
@@ -97,12 +97,14 @@ if __name__ == '__main__':
...
@@ -97,12 +97,14 @@ if __name__ == '__main__':
index
=
0
index
=
0
run_states
=
multi_label_cls_task
.
predict
(
data
=
data
)
run_states
=
multi_label_cls_task
.
predict
(
data
=
data
)
results
=
[
run_state
.
run_results
for
run_state
in
run_states
]
for
result
in
results
:
all_result
=
[]
# get predict index
for
batch_state
in
run_states
:
label_ids
=
[]
batch_result
=
batch_state
.
run_results
for
i
in
range
(
dataset
.
num_labels
):
for
sample_id
in
range
(
len
(
batch_result
[
0
])):
label_val
=
np
.
argmax
(
result
[
i
])
sample_result
=
[]
label_ids
.
append
(
label_val
)
for
category_id
in
range
(
dataset
.
num_labels
):
print
(
"%s
\t
predict=%s"
%
(
data
[
index
][
0
],
label_ids
))
sample_category_prob
=
batch_result
[
category_id
][
sample_id
]
index
+=
1
sample_result
.
append
(
np
.
argmax
(
sample_category_prob
))
all_result
.
append
(
sample_result
)
print
(
all_result
)
paddlehub/finetune/task/base_task.py
浏览文件 @
ca3e9774
...
@@ -767,7 +767,7 @@ class BaseTask(object):
...
@@ -767,7 +767,7 @@ class BaseTask(object):
self
.
_eval_end_event
(
run_states
)
self
.
_eval_end_event
(
run_states
)
return
run_states
return
run_states
def
predict
(
self
,
data
,
load_best_model
=
True
):
def
predict
(
self
,
data
,
load_best_model
=
True
,
return_result
=
False
):
with
self
.
phase_guard
(
phase
=
"predict"
):
with
self
.
phase_guard
(
phase
=
"predict"
):
if
load_best_model
:
if
load_best_model
:
self
.
init_if_load_best_model
()
self
.
init_if_load_best_model
()
...
@@ -778,8 +778,17 @@ class BaseTask(object):
...
@@ -778,8 +778,17 @@ class BaseTask(object):
run_states
=
self
.
_run
()
run_states
=
self
.
_run
()
self
.
_predict_end_event
(
run_states
)
self
.
_predict_end_event
(
run_states
)
self
.
_predict_data
=
None
self
.
_predict_data
=
None
if
return_result
:
return
self
.
_postprocessing
(
run_states
)
return
run_states
return
run_states
def
_postprocessing
(
self
,
run_states
):
results
=
[]
for
batch_state
in
run_states
:
batch_result
=
batch_state
.
run_results
[
0
]
results
+=
[
result
[
0
]
for
result
in
batch_result
]
return
results
def
_run
(
self
,
do_eval
=
False
):
def
_run
(
self
,
do_eval
=
False
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
if
self
.
config
.
use_pyreader
:
if
self
.
config
.
use_pyreader
:
...
...
paddlehub/finetune/task/classifier_task.py
浏览文件 @
ca3e9774
...
@@ -134,6 +134,22 @@ class ClassifierTask(BaseTask):
...
@@ -134,6 +134,22 @@ class ClassifierTask(BaseTask):
return
scores
,
avg_loss
,
run_speed
return
scores
,
avg_loss
,
run_speed
def
_postprocessing
(
self
,
run_states
):
try
:
id2label
=
{
val
:
key
for
key
,
val
in
self
.
_base_data_reader
.
label_map
.
items
()
}
except
:
raise
Exception
(
"image-classification does not support return_result now"
)
results
=
[]
for
batch_state
in
run_states
:
batch_result
=
batch_state
.
run_results
batch_infer
=
np
.
argmax
(
batch_result
,
axis
=
2
)[
0
]
results
+=
[
id2label
[
sample_infer
]
for
sample_infer
in
batch_infer
]
return
results
ImageClassifierTask
=
ClassifierTask
ImageClassifierTask
=
ClassifierTask
...
@@ -301,3 +317,16 @@ class MultiLabelClassifierTask(ClassifierTask):
...
@@ -301,3 +317,16 @@ class MultiLabelClassifierTask(ClassifierTask):
if
self
.
is_train_phase
or
self
.
is_test_phase
:
if
self
.
is_train_phase
or
self
.
is_test_phase
:
return
[
metric
.
name
for
metric
in
self
.
metrics
]
+
[
self
.
loss
.
name
]
return
[
metric
.
name
for
metric
in
self
.
metrics
]
+
[
self
.
loss
.
name
]
return
self
.
outputs
return
self
.
outputs
def
_postprocessing
(
self
,
run_states
):
results
=
[]
for
batch_state
in
run_states
:
batch_result
=
batch_state
.
run_results
for
sample_id
in
range
(
len
(
batch_result
[
0
])):
sample_result
=
[]
for
category_id
in
range
(
self
.
_base_data_reader
.
dataset
.
num_labels
):
sample_category_prob
=
batch_result
[
category_id
][
sample_id
]
sample_result
.
append
(
np
.
argmax
(
sample_category_prob
))
results
.
append
(
sample_result
)
return
results
paddlehub/finetune/task/reading_comprehension_task.py
浏览文件 @
ca3e9774
...
@@ -171,18 +171,15 @@ def get_final_text(pred_text, orig_text, do_lower_case, is_english):
...
@@ -171,18 +171,15 @@ def get_final_text(pred_text, orig_text, do_lower_case, is_english):
return
output_text
return
output_text
def
write_predictions
(
all_examples
,
all_features
,
all_results
,
n_best_size
,
def
get_predictions
(
all_examples
,
all_features
,
all_results
,
n_best_size
,
max_answer_length
,
do_lower_case
,
output_prediction_file
,
max_answer_length
,
do_lower_case
,
version_2_with_negative
,
output_nbest_file
,
output_null_log_odds_file
,
null_score_diff_threshold
,
is_english
):
version_2_with_negative
,
null_score_diff_threshold
,
is_english
):
_PrelimPrediction
=
collections
.
namedtuple
(
"PrelimPrediction"
,
[
_PrelimPrediction
=
collections
.
namedtuple
(
"PrelimPrediction"
,
[
"feature_index"
,
"start_index"
,
"end_index"
,
"start_logit"
,
"end_logit"
"feature_index"
,
"start_index"
,
"end_index"
,
"start_logit"
,
"end_logit"
])
])
_NbestPrediction
=
collections
.
namedtuple
(
_NbestPrediction
=
collections
.
namedtuple
(
"NbestPrediction"
,
[
"text"
,
"start_logit"
,
"end_logit"
])
"NbestPrediction"
,
[
"text"
,
"start_logit"
,
"end_logit"
])
example_index_to_features
=
collections
.
defaultdict
(
list
)
example_index_to_features
=
collections
.
defaultdict
(
list
)
for
feature
in
all_features
:
for
feature
in
all_features
:
example_index_to_features
[
feature
.
example_index
].
append
(
feature
)
example_index_to_features
[
feature
.
example_index
].
append
(
feature
)
...
@@ -363,25 +360,8 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
...
@@ -363,25 +360,8 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
all_predictions
[
example
.
qas_id
]
=
best_non_null_entry
.
text
all_predictions
[
example
.
qas_id
]
=
best_non_null_entry
.
text
all_nbest_json
[
example
.
qas_id
]
=
nbest_json
all_nbest_json
[
example
.
qas_id
]
=
nbest_json
"""Write final predictions to the json file and log-odds of null if needed."""
with
open
(
output_prediction_file
,
"w"
)
as
writer
:
return
all_predictions
,
all_nbest_json
,
scores_diff_json
logger
.
info
(
"Writing predictions to: %s"
%
(
output_prediction_file
))
writer
.
write
(
json
.
dumps
(
all_predictions
,
indent
=
4
,
ensure_ascii
=
is_english
)
+
"
\n
"
)
with
open
(
output_nbest_file
,
"w"
)
as
writer
:
logger
.
info
(
"Writing nbest to: %s"
%
(
output_nbest_file
))
writer
.
write
(
json
.
dumps
(
all_nbest_json
,
indent
=
4
,
ensure_ascii
=
is_english
)
+
"
\n
"
)
if
version_2_with_negative
:
logger
.
info
(
"Writing null_log_odds to: %s"
%
(
output_nbest_file
))
with
open
(
output_null_log_odds_file
,
"w"
)
as
writer
:
writer
.
write
(
json
.
dumps
(
scores_diff_json
,
indent
=
4
,
ensure_ascii
=
is_english
)
+
"
\n
"
)
class
ReadingComprehensionTask
(
BaseTask
):
class
ReadingComprehensionTask
(
BaseTask
):
...
@@ -419,6 +399,8 @@ class ReadingComprehensionTask(BaseTask):
...
@@ -419,6 +399,8 @@ class ReadingComprehensionTask(BaseTask):
self
.
null_score_diff_threshold
=
null_score_diff_threshold
self
.
null_score_diff_threshold
=
null_score_diff_threshold
self
.
n_best_size
=
n_best_size
self
.
n_best_size
=
n_best_size
self
.
max_answer_length
=
max_answer_length
self
.
max_answer_length
=
max_answer_length
self
.
RawResult
=
collections
.
namedtuple
(
"RawResult"
,
[
"unique_id"
,
"start_logits"
,
"end_logits"
])
self
.
RawResult
=
collections
.
namedtuple
(
self
.
RawResult
=
collections
.
namedtuple
(
"RawResult"
,
[
"unique_id"
,
"start_logits"
,
"end_logits"
])
"RawResult"
,
[
"unique_id"
,
"start_logits"
,
"end_logits"
])
...
@@ -522,24 +504,15 @@ class ReadingComprehensionTask(BaseTask):
...
@@ -522,24 +504,15 @@ class ReadingComprehensionTask(BaseTask):
scores
=
OrderedDict
()
scores
=
OrderedDict
()
# If none of metrics has been implemented, loss will be used to evaluate.
# If none of metrics has been implemented, loss will be used to evaluate.
if
self
.
is_test_phase
:
if
self
.
is_test_phase
:
output_prediction_file
=
os
.
path
.
join
(
self
.
config
.
checkpoint_dir
,
"predictions.json"
)
output_nbest_file
=
os
.
path
.
join
(
self
.
config
.
checkpoint_dir
,
"nbest_predictions.json"
)
output_null_log_odds_file
=
os
.
path
.
join
(
self
.
config
.
checkpoint_dir
,
"null_odds.json"
)
all_examples
=
self
.
data_reader
.
all_examples
[
self
.
phase
]
all_examples
=
self
.
data_reader
.
all_examples
[
self
.
phase
]
all_features
=
self
.
data_reader
.
all_features
[
self
.
phase
]
all_features
=
self
.
data_reader
.
all_features
[
self
.
phase
]
write
_predictions
(
all_predictions
,
all_nbest_json
,
scores_diff_json
=
get
_predictions
(
all_examples
=
all_examples
,
all_examples
=
all_examples
,
all_features
=
all_features
,
all_features
=
all_features
,
all_results
=
all_results
,
all_results
=
all_results
,
n_best_size
=
self
.
n_best_size
,
n_best_size
=
self
.
n_best_size
,
max_answer_length
=
self
.
max_answer_length
,
max_answer_length
=
self
.
max_answer_length
,
do_lower_case
=
True
,
do_lower_case
=
True
,
output_prediction_file
=
output_prediction_file
,
output_nbest_file
=
output_nbest_file
,
output_null_log_odds_file
=
output_null_log_odds_file
,
version_2_with_negative
=
self
.
version_2_with_negative
,
version_2_with_negative
=
self
.
version_2_with_negative
,
null_score_diff_threshold
=
self
.
null_score_diff_threshold
,
null_score_diff_threshold
=
self
.
null_score_diff_threshold
,
is_english
=
self
.
is_english
)
is_english
=
self
.
is_english
)
...
@@ -558,25 +531,17 @@ class ReadingComprehensionTask(BaseTask):
...
@@ -558,25 +531,17 @@ class ReadingComprehensionTask(BaseTask):
else
:
else
:
raise
Exception
(
"Error phase: %s when runing _calculate_metrics"
raise
Exception
(
"Error phase: %s when runing _calculate_metrics"
%
self
.
phase
)
%
self
.
phase
)
with
open
(
output_prediction_file
,
'r'
,
encoding
=
"utf8"
)
as
prediction_file
:
predictions
=
json
.
load
(
prediction_file
)
if
self
.
sub_task
==
"squad"
:
if
self
.
sub_task
==
"squad"
:
scores
=
squad1_evaluate
.
evaluate
(
dataset
,
predictions
)
scores
=
squad1_evaluate
.
evaluate
(
dataset
,
all_
predictions
)
elif
self
.
sub_task
==
"squad2.0"
:
elif
self
.
sub_task
==
"squad2.0"
:
with
open
(
scores
=
squad2_evaluate
.
evaluate
(
dataset
,
all_predictions
,
output_null_log_odds_file
,
'r'
,
scores_diff_json
)
encoding
=
"utf8"
)
as
odds_file
:
na_probs
=
json
.
load
(
odds_file
)
scores
=
squad2_evaluate
.
evaluate
(
dataset
,
predictions
,
na_probs
)
elif
self
.
sub_task
in
[
"cmrc2018"
,
"drcd"
]:
elif
self
.
sub_task
in
[
"cmrc2018"
,
"drcd"
]:
scores
=
cmrc2018_evaluate
.
get_eval
(
dataset
,
predictions
)
scores
=
cmrc2018_evaluate
.
get_eval
(
dataset
,
all_
predictions
)
return
scores
,
avg_loss
,
run_speed
return
scores
,
avg_loss
,
run_speed
def
_
default_predict_end_event
(
self
,
run_states
):
def
_
postprocessing
(
self
,
run_states
):
all_results
=
[]
all_results
=
[]
for
run_state
in
run_states
:
for
run_state
in
run_states
:
np_unique_ids
=
run_state
.
run_results
[
0
]
np_unique_ids
=
run_state
.
run_results
[
0
]
...
@@ -591,29 +556,16 @@ class ReadingComprehensionTask(BaseTask):
...
@@ -591,29 +556,16 @@ class ReadingComprehensionTask(BaseTask):
unique_id
=
unique_id
,
unique_id
=
unique_id
,
start_logits
=
start_logits
,
start_logits
=
start_logits
,
end_logits
=
end_logits
))
end_logits
=
end_logits
))
# If none of metrics has been implemented, loss will be used to evaluate.
output_prediction_file
=
os
.
path
.
join
(
self
.
config
.
checkpoint_dir
,
"predict_predictions.json"
)
output_nbest_file
=
os
.
path
.
join
(
self
.
config
.
checkpoint_dir
,
"predict_nbest_predictions.json"
)
output_null_log_odds_file
=
os
.
path
.
join
(
self
.
config
.
checkpoint_dir
,
"predict_null_odds.json"
)
all_examples
=
self
.
data_reader
.
all_examples
[
self
.
phase
]
all_examples
=
self
.
data_reader
.
all_examples
[
self
.
phase
]
all_features
=
self
.
data_reader
.
all_features
[
self
.
phase
]
all_features
=
self
.
data_reader
.
all_features
[
self
.
phase
]
write
_predictions
(
all_predictions
,
all_nbest_json
,
scores_diff_json
=
get
_predictions
(
all_examples
=
all_examples
,
all_examples
=
all_examples
,
all_features
=
all_features
,
all_features
=
all_features
,
all_results
=
all_results
,
all_results
=
all_results
,
n_best_size
=
self
.
n_best_size
,
n_best_size
=
self
.
n_best_size
,
max_answer_length
=
self
.
max_answer_length
,
max_answer_length
=
self
.
max_answer_length
,
do_lower_case
=
True
,
do_lower_case
=
True
,
output_prediction_file
=
output_prediction_file
,
output_nbest_file
=
output_nbest_file
,
output_null_log_odds_file
=
output_null_log_odds_file
,
version_2_with_negative
=
self
.
version_2_with_negative
,
version_2_with_negative
=
self
.
version_2_with_negative
,
null_score_diff_threshold
=
self
.
null_score_diff_threshold
,
null_score_diff_threshold
=
self
.
null_score_diff_threshold
,
is_english
=
self
.
is_english
)
is_english
=
self
.
is_english
)
return
all_predictions
logger
.
info
(
"PaddleHub predict finished."
)
logger
.
info
(
"You can see the prediction in %s"
%
output_prediction_file
)
paddlehub/finetune/task/regression_task.py
浏览文件 @
ca3e9774
...
@@ -120,3 +120,10 @@ class RegressionTask(BaseTask):
...
@@ -120,3 +120,10 @@ class RegressionTask(BaseTask):
else
:
else
:
raise
ValueError
(
"Not Support Metric:
\"
%s
\"
"
%
metric
)
raise
ValueError
(
"Not Support Metric:
\"
%s
\"
"
%
metric
)
return
scores
,
avg_loss
,
run_speed
return
scores
,
avg_loss
,
run_speed
def
_postprocessing
(
self
,
run_states
):
results
=
[]
for
batch_state
in
run_states
:
batch_result
=
batch_state
.
run_results
[
0
]
results
+=
[
result
[
0
]
for
result
in
batch_result
]
return
results
paddlehub/finetune/task/sequence_task.py
浏览文件 @
ca3e9774
...
@@ -216,3 +216,22 @@ class SequenceLabelTask(BaseTask):
...
@@ -216,3 +216,22 @@ class SequenceLabelTask(BaseTask):
elif
self
.
is_predict_phase
:
elif
self
.
is_predict_phase
:
return
[
self
.
ret_infers
.
name
]
+
[
self
.
seq_len
.
name
]
return
[
self
.
ret_infers
.
name
]
+
[
self
.
seq_len
.
name
]
return
[
output
.
name
for
output
in
self
.
outputs
]
return
[
output
.
name
for
output
in
self
.
outputs
]
def
_postprocessing
(
self
,
run_states
):
id2label
=
{
val
:
key
for
key
,
val
in
self
.
_base_data_reader
.
label_map
.
items
()
}
results
=
[]
for
batch_states
in
run_states
:
batch_results
=
batch_states
.
run_results
batch_infers
=
batch_results
[
0
].
reshape
([
-
1
]).
astype
(
np
.
int32
).
tolist
()
seq_lens
=
batch_results
[
1
].
reshape
([
-
1
]).
astype
(
np
.
int32
).
tolist
()
current_id
=
0
for
length
in
seq_lens
:
seq_infers
=
batch_infers
[
current_id
:
current_id
+
length
]
seq_result
=
list
(
map
(
id2label
.
get
,
seq_infers
[
1
:
-
1
]))
current_id
+=
int
(
length
)
results
.
append
(
seq_result
)
return
results
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录