Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
b3b8cb0f
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 1 年 前同步成功
通知
280
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
b3b8cb0f
编写于
5月 10, 2020
作者:
S
Steffy-zxf
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add preset network (biltsm, bow, cnn, dpcnn, gru, lstm) for text classification task
上级
ae9edc1c
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
74 addition
and
49 deletion
+74
-49
paddlehub/__init__.py
paddlehub/__init__.py
+1
-0
paddlehub/finetune/__init__.py
paddlehub/finetune/__init__.py
+2
-2
paddlehub/finetune/task/base_task.py
paddlehub/finetune/task/base_task.py
+9
-32
paddlehub/finetune/task/classifier_task.py
paddlehub/finetune/task/classifier_task.py
+55
-9
paddlehub/reader/nlp_reader.py
paddlehub/reader/nlp_reader.py
+7
-6
未找到文件。
paddlehub/__init__.py
浏览文件 @
b3b8cb0f
...
...
@@ -28,6 +28,7 @@ from . import io
from
.
import
dataset
from
.
import
finetune
from
.
import
reader
from
.
import
network
from
.common.dir
import
USER_HOME
from
.common.dir
import
HUB_HOME
...
...
paddlehub/finetune/__init__.py
浏览文件 @
b3b8cb0f
#coding:utf-8
#
Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
...
...
paddlehub/finetune/task/base_task.py
浏览文件 @
b3b8cb0f
...
...
@@ -344,10 +344,6 @@ class BaseTask(object):
# set default phase
self
.
enter_phase
(
"train"
)
@
property
def
base_main_program
(
self
):
return
self
.
_base_main_program
@
contextlib
.
contextmanager
def
phase_guard
(
self
,
phase
):
self
.
enter_phase
(
phase
)
...
...
@@ -397,7 +393,7 @@ class BaseTask(object):
self
.
_build_env_start_event
()
self
.
env
.
is_inititalized
=
True
self
.
env
.
main_program
=
clone_program
(
self
.
base_main_program
,
for_test
=
False
)
self
.
_
base_main_program
,
for_test
=
False
)
self
.
env
.
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
self
.
env
.
main_program
,
...
...
@@ -410,7 +406,6 @@ class BaseTask(object):
self
.
env
.
metrics
=
self
.
_add_metrics
()
if
self
.
is_predict_phase
or
self
.
is_test_phase
:
# Todo: paddle.fluid.core_avx.EnforceNotMet: Getting 'tensor_desc' is not supported by the type of var kCUDNNFwdAlgoCache. at
self
.
env
.
main_program
=
clone_program
(
self
.
env
.
main_program
,
for_test
=
True
)
hub
.
common
.
paddle_helper
.
set_op_attr
(
...
...
@@ -1063,10 +1058,8 @@ class BaseTask(object):
capacity
=
64
,
use_double_buffer
=
True
,
iterable
=
True
)
data_reader
=
data_loader
.
set_sample_list_generator
(
self
.
reader
,
self
.
places
)
# data_reader = data_loader.set_batch_generator(
# self.reader, places=self.places)
data_reader
=
data_loader
.
set_batch_generator
(
self
.
reader
,
places
=
self
.
places
)
else
:
data_feeder
=
fluid
.
DataFeeder
(
feed_list
=
self
.
feed_list
,
place
=
self
.
place
)
...
...
@@ -1083,28 +1076,12 @@ class BaseTask(object):
step_run_state
.
run_step
=
1
num_batch_examples
=
len
(
batch
)
if
self
.
return_numpy
==
2
:
fetch_result
=
self
.
exe
.
run
(
self
.
main_program_to_be_run
,
feed
=
batch
,
fetch_list
=
self
.
fetch_list
,
return_numpy
=
False
)
# fetch_result = [x if isinstance(x,fluid.LoDTensor) else np.array(x) for x in fetch_result]
fetch_result
=
[
x
if
hasattr
(
x
,
'recursive_sequence_lengths'
)
else
np
.
array
(
x
)
for
x
in
fetch_result
]
elif
self
.
return_numpy
:
fetch_result
=
self
.
exe
.
run
(
self
.
main_program_to_be_run
,
feed
=
batch
,
fetch_list
=
self
.
fetch_list
)
else
:
fetch_result
=
self
.
exe
.
run
(
self
.
main_program_to_be_run
,
feed
=
batch
,
fetch_list
=
self
.
fetch_list
,
return_numpy
=
False
)
fetch_result
=
self
.
exe
.
run
(
self
.
main_program_to_be_run
,
feed
=
batch
,
fetch_list
=
self
.
fetch_list
,
return_numpy
=
self
.
return_numpy
)
if
not
self
.
return_numpy
:
fetch_result
=
[
np
.
array
(
x
)
for
x
in
fetch_result
]
for
index
,
result
in
enumerate
(
fetch_result
):
...
...
paddlehub/finetune/task/classifier_task.py
浏览文件 @
b3b8cb0f
...
...
@@ -20,9 +20,12 @@ from __future__ import print_function
import
time
from
collections
import
OrderedDict
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
from
paddlehub.finetune.evaluate
import
calculate_f1_np
,
matthews_corrcoef
from
paddlehub.common.utils
import
version_compare
import
paddlehub.network
as
net
from
.base_task
import
BaseTask
...
...
@@ -104,7 +107,7 @@ class ClassifierTask(BaseTask):
run_examples
+=
run_state
.
run_examples
run_step
+=
run_state
.
run_step
loss_sum
+=
np
.
mean
(
run_state
.
run_results
[
-
1
])
*
run_state
.
run_examples
run_state
.
run_results
[
-
2
])
*
run_state
.
run_examples
acc_sum
+=
np
.
mean
(
run_state
.
run_results
[
2
])
*
run_state
.
run_examples
np_labels
=
run_state
.
run_results
[
0
]
...
...
@@ -161,6 +164,7 @@ class TextClassifierTask(ClassifierTask):
num_classes
,
feed_list
,
data_reader
,
network
=
None
,
startup_program
=
None
,
config
=
None
,
hidden_units
=
None
,
...
...
@@ -168,6 +172,7 @@ class TextClassifierTask(ClassifierTask):
if
metrics_choices
==
"default"
:
metrics_choices
=
[
"acc"
]
self
.
network
=
network
super
(
TextClassifierTask
,
self
).
__init__
(
data_reader
=
data_reader
,
feature
=
feature
,
...
...
@@ -177,17 +182,42 @@ class TextClassifierTask(ClassifierTask):
config
=
config
,
hidden_units
=
hidden_units
,
metrics_choices
=
metrics_choices
)
if
self
.
network
:
assert
self
.
network
in
[
'bilstm'
,
'bow'
,
'cnn'
,
'dpcnn'
,
'gru'
,
'lstm'
],
'network choice must be one of bilstm, bow, cnn, dpcnn, gru, lstm!'
assert
len
(
self
.
feature
.
shape
)
==
3
,
'The sequnece_output must be choosed rather than pooled_output of Transformer Model (ERNIE, BERT, RoBERTa and ELECTRA)!'
def
_build_net
(
self
):
cls_feats
=
fluid
.
layers
.
dropout
(
x
=
self
.
feature
,
dropout_prob
=
0.1
,
dropout_implementation
=
"upscale_in_train"
)
self
.
seq_len
=
fluid
.
layers
.
data
(
name
=
"seq_len"
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
0
)
if
self
.
hidden_units
is
not
None
:
for
n_hidden
in
self
.
hidden_units
:
cls_feats
=
fluid
.
layers
.
fc
(
input
=
cls_feats
,
size
=
n_hidden
,
act
=
"relu"
)
if
version_compare
(
paddle
.
__version__
,
"1.6"
):
self
.
seq_len_used
=
fluid
.
layers
.
squeeze
(
self
.
seq_len
,
axes
=
[
1
])
else
:
self
.
seq_len_used
=
self
.
seq_len
unpad_feature
=
fluid
.
layers
.
sequence_unpad
(
self
.
feature
,
length
=
self
.
seq_len_used
)
if
self
.
network
:
net_func
=
getattr
(
net
.
classification
,
self
.
network
)
if
self
.
network
==
'dpcnn'
:
cls_feats
=
net_func
(
self
.
feature
)
else
:
cls_feats
=
net_func
(
unpad_feature
)
else
:
cls_feats
=
fluid
.
layers
.
dropout
(
x
=
self
.
feature
,
dropout_prob
=
0.1
,
dropout_implementation
=
"upscale_in_train"
)
if
self
.
hidden_units
is
not
None
:
for
n_hidden
in
self
.
hidden_units
:
cls_feats
=
fluid
.
layers
.
fc
(
input
=
cls_feats
,
size
=
n_hidden
,
act
=
"relu"
)
logits
=
fluid
.
layers
.
fc
(
input
=
cls_feats
,
...
...
@@ -204,6 +234,22 @@ class TextClassifierTask(ClassifierTask):
return
[
logits
]
@
property
def
feed_list
(
self
):
feed_list
=
self
.
_base_feed_list
+
[
self
.
seq_len
.
name
]
if
self
.
is_train_phase
or
self
.
is_test_phase
:
feed_list
+=
[
self
.
labels
[
0
].
name
]
return
feed_list
@
property
def
fetch_list
(
self
):
if
self
.
is_train_phase
or
self
.
is_test_phase
:
return
[
self
.
labels
[
0
].
name
,
self
.
ret_infers
.
name
,
self
.
metrics
[
0
].
name
,
self
.
loss
.
name
,
self
.
seq_len
.
name
]
return
[
self
.
outputs
[
0
].
name
,
self
.
seq_len
.
name
]
class
MultiLabelClassifierTask
(
ClassifierTask
):
def
__init__
(
self
,
...
...
paddlehub/reader/nlp_reader.py
浏览文件 @
b3b8cb0f
...
...
@@ -272,11 +272,12 @@ class ClassifyReader(BaseNLPReader):
batch_text_type_ids
=
[
record
.
text_type_ids
for
record
in
batch_records
]
batch_position_ids
=
[
record
.
position_ids
for
record
in
batch_records
]
padded_token_ids
,
input_mask
=
pad_batch_data
(
padded_token_ids
,
input_mask
,
batch_seq_lens
=
pad_batch_data
(
batch_token_ids
,
max_seq_len
=
self
.
max_seq_len
,
pad_idx
=
self
.
pad_id
,
return_input_mask
=
True
)
return_input_mask
=
True
,
return_seq_lens
=
True
)
padded_text_type_ids
=
pad_batch_data
(
batch_text_type_ids
,
max_seq_len
=
self
.
max_seq_len
,
...
...
@@ -293,7 +294,7 @@ class ClassifyReader(BaseNLPReader):
return_list
=
[
padded_token_ids
,
padded_position_ids
,
padded_text_type_ids
,
input_mask
,
batch_labels
input_mask
,
batch_
seq_lens
,
batch_
labels
]
if
self
.
use_task_id
:
...
...
@@ -301,12 +302,12 @@ class ClassifyReader(BaseNLPReader):
padded_token_ids
,
dtype
=
"int64"
)
*
self
.
task_id
return_list
=
[
padded_token_ids
,
padded_position_ids
,
padded_text_type_ids
,
input_mask
,
padded_task_ids
,
batch_labels
input_mask
,
padded_task_ids
,
batch_
seq_lens
,
batch_
labels
]
else
:
return_list
=
[
padded_token_ids
,
padded_position_ids
,
padded_text_type_ids
,
input_mask
input_mask
,
batch_seq_lens
]
if
self
.
use_task_id
:
...
...
@@ -314,7 +315,7 @@ class ClassifyReader(BaseNLPReader):
padded_token_ids
,
dtype
=
"int64"
)
*
self
.
task_id
return_list
=
[
padded_token_ids
,
padded_position_ids
,
padded_text_type_ids
,
input_mask
,
padded_task_ids
input_mask
,
padded_task_ids
,
batch_seq_lens
]
return
return_list
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录