Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
b3b8cb0f
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 2 年 前同步成功
通知
285
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b3b8cb0f
编写于
5月 10, 2020
作者:
S
Steffy-zxf
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add preset network (biltsm, bow, cnn, dpcnn, gru, lstm) for text classification task
上级
ae9edc1c
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
74 addition
and
49 deletion
+74
-49
paddlehub/__init__.py
paddlehub/__init__.py
+1
-0
paddlehub/finetune/__init__.py
paddlehub/finetune/__init__.py
+2
-2
paddlehub/finetune/task/base_task.py
paddlehub/finetune/task/base_task.py
+9
-32
paddlehub/finetune/task/classifier_task.py
paddlehub/finetune/task/classifier_task.py
+55
-9
paddlehub/reader/nlp_reader.py
paddlehub/reader/nlp_reader.py
+7
-6
未找到文件。
paddlehub/__init__.py
浏览文件 @
b3b8cb0f
...
@@ -28,6 +28,7 @@ from . import io
...
@@ -28,6 +28,7 @@ from . import io
from
.
import
dataset
from
.
import
dataset
from
.
import
finetune
from
.
import
finetune
from
.
import
reader
from
.
import
reader
from
.
import
network
from
.common.dir
import
USER_HOME
from
.common.dir
import
USER_HOME
from
.common.dir
import
HUB_HOME
from
.common.dir
import
HUB_HOME
...
...
paddlehub/finetune/__init__.py
浏览文件 @
b3b8cb0f
#coding:utf-8
#
coding:utf-8
#
Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License"
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
...
paddlehub/finetune/task/base_task.py
浏览文件 @
b3b8cb0f
...
@@ -344,10 +344,6 @@ class BaseTask(object):
...
@@ -344,10 +344,6 @@ class BaseTask(object):
# set default phase
# set default phase
self
.
enter_phase
(
"train"
)
self
.
enter_phase
(
"train"
)
@
property
def
base_main_program
(
self
):
return
self
.
_base_main_program
@
contextlib
.
contextmanager
@
contextlib
.
contextmanager
def
phase_guard
(
self
,
phase
):
def
phase_guard
(
self
,
phase
):
self
.
enter_phase
(
phase
)
self
.
enter_phase
(
phase
)
...
@@ -397,7 +393,7 @@ class BaseTask(object):
...
@@ -397,7 +393,7 @@ class BaseTask(object):
self
.
_build_env_start_event
()
self
.
_build_env_start_event
()
self
.
env
.
is_inititalized
=
True
self
.
env
.
is_inititalized
=
True
self
.
env
.
main_program
=
clone_program
(
self
.
env
.
main_program
=
clone_program
(
self
.
base_main_program
,
for_test
=
False
)
self
.
_
base_main_program
,
for_test
=
False
)
self
.
env
.
startup_program
=
fluid
.
Program
()
self
.
env
.
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
self
.
env
.
main_program
,
with
fluid
.
program_guard
(
self
.
env
.
main_program
,
...
@@ -410,7 +406,6 @@ class BaseTask(object):
...
@@ -410,7 +406,6 @@ class BaseTask(object):
self
.
env
.
metrics
=
self
.
_add_metrics
()
self
.
env
.
metrics
=
self
.
_add_metrics
()
if
self
.
is_predict_phase
or
self
.
is_test_phase
:
if
self
.
is_predict_phase
or
self
.
is_test_phase
:
# Todo: paddle.fluid.core_avx.EnforceNotMet: Getting 'tensor_desc' is not supported by the type of var kCUDNNFwdAlgoCache. at
self
.
env
.
main_program
=
clone_program
(
self
.
env
.
main_program
=
clone_program
(
self
.
env
.
main_program
,
for_test
=
True
)
self
.
env
.
main_program
,
for_test
=
True
)
hub
.
common
.
paddle_helper
.
set_op_attr
(
hub
.
common
.
paddle_helper
.
set_op_attr
(
...
@@ -1063,10 +1058,8 @@ class BaseTask(object):
...
@@ -1063,10 +1058,8 @@ class BaseTask(object):
capacity
=
64
,
capacity
=
64
,
use_double_buffer
=
True
,
use_double_buffer
=
True
,
iterable
=
True
)
iterable
=
True
)
data_reader
=
data_loader
.
set_sample_list_generator
(
data_reader
=
data_loader
.
set_batch_generator
(
self
.
reader
,
self
.
places
)
self
.
reader
,
places
=
self
.
places
)
# data_reader = data_loader.set_batch_generator(
# self.reader, places=self.places)
else
:
else
:
data_feeder
=
fluid
.
DataFeeder
(
data_feeder
=
fluid
.
DataFeeder
(
feed_list
=
self
.
feed_list
,
place
=
self
.
place
)
feed_list
=
self
.
feed_list
,
place
=
self
.
place
)
...
@@ -1083,28 +1076,12 @@ class BaseTask(object):
...
@@ -1083,28 +1076,12 @@ class BaseTask(object):
step_run_state
.
run_step
=
1
step_run_state
.
run_step
=
1
num_batch_examples
=
len
(
batch
)
num_batch_examples
=
len
(
batch
)
if
self
.
return_numpy
==
2
:
fetch_result
=
self
.
exe
.
run
(
fetch_result
=
self
.
exe
.
run
(
self
.
main_program_to_be_run
,
self
.
main_program_to_be_run
,
feed
=
batch
,
feed
=
batch
,
fetch_list
=
self
.
fetch_list
,
fetch_list
=
self
.
fetch_list
,
return_numpy
=
self
.
return_numpy
)
return_numpy
=
False
)
if
not
self
.
return_numpy
:
# fetch_result = [x if isinstance(x,fluid.LoDTensor) else np.array(x) for x in fetch_result]
fetch_result
=
[
x
if
hasattr
(
x
,
'recursive_sequence_lengths'
)
else
np
.
array
(
x
)
for
x
in
fetch_result
]
elif
self
.
return_numpy
:
fetch_result
=
self
.
exe
.
run
(
self
.
main_program_to_be_run
,
feed
=
batch
,
fetch_list
=
self
.
fetch_list
)
else
:
fetch_result
=
self
.
exe
.
run
(
self
.
main_program_to_be_run
,
feed
=
batch
,
fetch_list
=
self
.
fetch_list
,
return_numpy
=
False
)
fetch_result
=
[
np
.
array
(
x
)
for
x
in
fetch_result
]
fetch_result
=
[
np
.
array
(
x
)
for
x
in
fetch_result
]
for
index
,
result
in
enumerate
(
fetch_result
):
for
index
,
result
in
enumerate
(
fetch_result
):
...
...
paddlehub/finetune/task/classifier_task.py
浏览文件 @
b3b8cb0f
...
@@ -20,9 +20,12 @@ from __future__ import print_function
...
@@ -20,9 +20,12 @@ from __future__ import print_function
import
time
import
time
from
collections
import
OrderedDict
from
collections
import
OrderedDict
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddlehub.finetune.evaluate
import
calculate_f1_np
,
matthews_corrcoef
from
paddlehub.finetune.evaluate
import
calculate_f1_np
,
matthews_corrcoef
from
paddlehub.common.utils
import
version_compare
import
paddlehub.network
as
net
from
.base_task
import
BaseTask
from
.base_task
import
BaseTask
...
@@ -104,7 +107,7 @@ class ClassifierTask(BaseTask):
...
@@ -104,7 +107,7 @@ class ClassifierTask(BaseTask):
run_examples
+=
run_state
.
run_examples
run_examples
+=
run_state
.
run_examples
run_step
+=
run_state
.
run_step
run_step
+=
run_state
.
run_step
loss_sum
+=
np
.
mean
(
loss_sum
+=
np
.
mean
(
run_state
.
run_results
[
-
1
])
*
run_state
.
run_examples
run_state
.
run_results
[
-
2
])
*
run_state
.
run_examples
acc_sum
+=
np
.
mean
(
acc_sum
+=
np
.
mean
(
run_state
.
run_results
[
2
])
*
run_state
.
run_examples
run_state
.
run_results
[
2
])
*
run_state
.
run_examples
np_labels
=
run_state
.
run_results
[
0
]
np_labels
=
run_state
.
run_results
[
0
]
...
@@ -161,6 +164,7 @@ class TextClassifierTask(ClassifierTask):
...
@@ -161,6 +164,7 @@ class TextClassifierTask(ClassifierTask):
num_classes
,
num_classes
,
feed_list
,
feed_list
,
data_reader
,
data_reader
,
network
=
None
,
startup_program
=
None
,
startup_program
=
None
,
config
=
None
,
config
=
None
,
hidden_units
=
None
,
hidden_units
=
None
,
...
@@ -168,6 +172,7 @@ class TextClassifierTask(ClassifierTask):
...
@@ -168,6 +172,7 @@ class TextClassifierTask(ClassifierTask):
if
metrics_choices
==
"default"
:
if
metrics_choices
==
"default"
:
metrics_choices
=
[
"acc"
]
metrics_choices
=
[
"acc"
]
self
.
network
=
network
super
(
TextClassifierTask
,
self
).
__init__
(
super
(
TextClassifierTask
,
self
).
__init__
(
data_reader
=
data_reader
,
data_reader
=
data_reader
,
feature
=
feature
,
feature
=
feature
,
...
@@ -177,17 +182,42 @@ class TextClassifierTask(ClassifierTask):
...
@@ -177,17 +182,42 @@ class TextClassifierTask(ClassifierTask):
config
=
config
,
config
=
config
,
hidden_units
=
hidden_units
,
hidden_units
=
hidden_units
,
metrics_choices
=
metrics_choices
)
metrics_choices
=
metrics_choices
)
if
self
.
network
:
assert
self
.
network
in
[
'bilstm'
,
'bow'
,
'cnn'
,
'dpcnn'
,
'gru'
,
'lstm'
],
'network choice must be one of bilstm, bow, cnn, dpcnn, gru, lstm!'
assert
len
(
self
.
feature
.
shape
)
==
3
,
'The sequnece_output must be choosed rather than pooled_output of Transformer Model (ERNIE, BERT, RoBERTa and ELECTRA)!'
def
_build_net
(
self
):
def
_build_net
(
self
):
cls_feats
=
fluid
.
layers
.
dropout
(
self
.
seq_len
=
fluid
.
layers
.
data
(
x
=
self
.
feature
,
name
=
"seq_len"
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
0
)
dropout_prob
=
0.1
,
dropout_implementation
=
"upscale_in_train"
)
if
self
.
hidden_units
is
not
None
:
if
version_compare
(
paddle
.
__version__
,
"1.6"
):
for
n_hidden
in
self
.
hidden_units
:
self
.
seq_len_used
=
fluid
.
layers
.
squeeze
(
self
.
seq_len
,
axes
=
[
1
])
cls_feats
=
fluid
.
layers
.
fc
(
else
:
input
=
cls_feats
,
size
=
n_hidden
,
act
=
"relu"
)
self
.
seq_len_used
=
self
.
seq_len
unpad_feature
=
fluid
.
layers
.
sequence_unpad
(
self
.
feature
,
length
=
self
.
seq_len_used
)
if
self
.
network
:
net_func
=
getattr
(
net
.
classification
,
self
.
network
)
if
self
.
network
==
'dpcnn'
:
cls_feats
=
net_func
(
self
.
feature
)
else
:
cls_feats
=
net_func
(
unpad_feature
)
else
:
cls_feats
=
fluid
.
layers
.
dropout
(
x
=
self
.
feature
,
dropout_prob
=
0.1
,
dropout_implementation
=
"upscale_in_train"
)
if
self
.
hidden_units
is
not
None
:
for
n_hidden
in
self
.
hidden_units
:
cls_feats
=
fluid
.
layers
.
fc
(
input
=
cls_feats
,
size
=
n_hidden
,
act
=
"relu"
)
logits
=
fluid
.
layers
.
fc
(
logits
=
fluid
.
layers
.
fc
(
input
=
cls_feats
,
input
=
cls_feats
,
...
@@ -204,6 +234,22 @@ class TextClassifierTask(ClassifierTask):
...
@@ -204,6 +234,22 @@ class TextClassifierTask(ClassifierTask):
return
[
logits
]
return
[
logits
]
@
property
def
feed_list
(
self
):
feed_list
=
self
.
_base_feed_list
+
[
self
.
seq_len
.
name
]
if
self
.
is_train_phase
or
self
.
is_test_phase
:
feed_list
+=
[
self
.
labels
[
0
].
name
]
return
feed_list
@
property
def
fetch_list
(
self
):
if
self
.
is_train_phase
or
self
.
is_test_phase
:
return
[
self
.
labels
[
0
].
name
,
self
.
ret_infers
.
name
,
self
.
metrics
[
0
].
name
,
self
.
loss
.
name
,
self
.
seq_len
.
name
]
return
[
self
.
outputs
[
0
].
name
,
self
.
seq_len
.
name
]
class
MultiLabelClassifierTask
(
ClassifierTask
):
class
MultiLabelClassifierTask
(
ClassifierTask
):
def
__init__
(
self
,
def
__init__
(
self
,
...
...
paddlehub/reader/nlp_reader.py
浏览文件 @
b3b8cb0f
...
@@ -272,11 +272,12 @@ class ClassifyReader(BaseNLPReader):
...
@@ -272,11 +272,12 @@ class ClassifyReader(BaseNLPReader):
batch_text_type_ids
=
[
record
.
text_type_ids
for
record
in
batch_records
]
batch_text_type_ids
=
[
record
.
text_type_ids
for
record
in
batch_records
]
batch_position_ids
=
[
record
.
position_ids
for
record
in
batch_records
]
batch_position_ids
=
[
record
.
position_ids
for
record
in
batch_records
]
padded_token_ids
,
input_mask
=
pad_batch_data
(
padded_token_ids
,
input_mask
,
batch_seq_lens
=
pad_batch_data
(
batch_token_ids
,
batch_token_ids
,
max_seq_len
=
self
.
max_seq_len
,
max_seq_len
=
self
.
max_seq_len
,
pad_idx
=
self
.
pad_id
,
pad_idx
=
self
.
pad_id
,
return_input_mask
=
True
)
return_input_mask
=
True
,
return_seq_lens
=
True
)
padded_text_type_ids
=
pad_batch_data
(
padded_text_type_ids
=
pad_batch_data
(
batch_text_type_ids
,
batch_text_type_ids
,
max_seq_len
=
self
.
max_seq_len
,
max_seq_len
=
self
.
max_seq_len
,
...
@@ -293,7 +294,7 @@ class ClassifyReader(BaseNLPReader):
...
@@ -293,7 +294,7 @@ class ClassifyReader(BaseNLPReader):
return_list
=
[
return_list
=
[
padded_token_ids
,
padded_position_ids
,
padded_text_type_ids
,
padded_token_ids
,
padded_position_ids
,
padded_text_type_ids
,
input_mask
,
batch_labels
input_mask
,
batch_
seq_lens
,
batch_
labels
]
]
if
self
.
use_task_id
:
if
self
.
use_task_id
:
...
@@ -301,12 +302,12 @@ class ClassifyReader(BaseNLPReader):
...
@@ -301,12 +302,12 @@ class ClassifyReader(BaseNLPReader):
padded_token_ids
,
dtype
=
"int64"
)
*
self
.
task_id
padded_token_ids
,
dtype
=
"int64"
)
*
self
.
task_id
return_list
=
[
return_list
=
[
padded_token_ids
,
padded_position_ids
,
padded_text_type_ids
,
padded_token_ids
,
padded_position_ids
,
padded_text_type_ids
,
input_mask
,
padded_task_ids
,
batch_labels
input_mask
,
padded_task_ids
,
batch_
seq_lens
,
batch_
labels
]
]
else
:
else
:
return_list
=
[
return_list
=
[
padded_token_ids
,
padded_position_ids
,
padded_text_type_ids
,
padded_token_ids
,
padded_position_ids
,
padded_text_type_ids
,
input_mask
input_mask
,
batch_seq_lens
]
]
if
self
.
use_task_id
:
if
self
.
use_task_id
:
...
@@ -314,7 +315,7 @@ class ClassifyReader(BaseNLPReader):
...
@@ -314,7 +315,7 @@ class ClassifyReader(BaseNLPReader):
padded_token_ids
,
dtype
=
"int64"
)
*
self
.
task_id
padded_token_ids
,
dtype
=
"int64"
)
*
self
.
task_id
return_list
=
[
return_list
=
[
padded_token_ids
,
padded_position_ids
,
padded_text_type_ids
,
padded_token_ids
,
padded_position_ids
,
padded_text_type_ids
,
input_mask
,
padded_task_ids
input_mask
,
padded_task_ids
,
batch_seq_lens
]
]
return
return_list
return
return_list
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录