Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
a271703a
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
1 年多 前同步成功
通知
283
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a271703a
编写于
6月 17, 2019
作者:
S
Steffy-zxf
提交者:
wuzewu
6月 17, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update the senta demo (#56)
上级
3c85dfe5
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
46 addition
and
45 deletion
+46
-45
demo/senta/predict.py
demo/senta/predict.py
+33
-35
demo/senta/run_finetune.sh
demo/senta/run_finetune.sh
+1
-1
demo/senta/senta_finetune.py
demo/senta/senta_finetune.py
+12
-9
未找到文件。
demo/senta/predict.py
浏览文件 @
a271703a
...
...
@@ -15,8 +15,9 @@ import paddlehub as hub
# yapf: disable
parser
=
argparse
.
ArgumentParser
(
__doc__
)
parser
.
add_argument
(
"--checkpoint_dir"
,
type
=
str
,
default
=
None
,
help
=
"Directory to model checkpoint"
)
parser
.
add_argument
(
"--use_gpu"
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"Whether use GPU for finetuning, input should be True or False"
)
parser
.
add_argument
(
"--checkpoint_dir"
,
type
=
str
,
default
=
None
,
help
=
"Directory to model checkpoint"
)
parser
.
add_argument
(
"--use_gpu"
,
type
=
ast
.
literal_eval
,
default
=
True
,
help
=
"Whether use GPU for finetuning, input should be True or False"
)
parser
.
add_argument
(
"--use_pyreader"
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"Whether use pyreader to feed data."
)
args
=
parser
.
parse_args
()
# yapf: enable.
...
...
@@ -30,42 +31,39 @@ if __name__ == '__main__':
reader
=
hub
.
reader
.
LACClassifyReader
(
dataset
=
dataset
,
vocab_path
=
module
.
get_vocab_path
())
place
=
fluid
.
CUDAPlace
(
0
)
if
args
.
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
with
fluid
.
program_guard
(
program
):
sent_feature
=
outputs
[
"sentence_feature"
]
strategy
=
hub
.
AdamWeightDecayStrategy
(
weight_decay
=
0.01
,
warmup_proportion
=
0.1
,
learning_rate
=
5e-5
,
lr_scheduler
=
"linear_decay"
,
optimizer_name
=
"adam"
)
# Define a classfication finetune task by PaddleHub's API
cls_task
=
hub
.
create_text_cls_task
(
feature
=
sent_feature
,
num_classes
=
dataset
.
num_labels
)
config
=
hub
.
RunConfig
(
use_data_parallel
=
False
,
use_pyreader
=
args
.
use_pyreader
,
use_cuda
=
args
.
use_gpu
,
batch_size
=
1
,
enable_memory_optim
=
False
,
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# Setup feed list for data feeder
# Must feed all the tensor of senta's module need
feed_list
=
[
inputs
[
"words"
].
name
,
cls_task
.
variable
(
'label'
).
name
]
sent_feature
=
outputs
[
"sentence_feature"
]
# classificatin probability tensor
probs
=
cls_task
.
variable
(
"probs"
)
feed_list
=
[
inputs
[
"words"
].
name
]
pred
=
fluid
.
layers
.
argmax
(
probs
,
axis
=
1
)
cls_task
=
hub
.
TextClassifierTask
(
data_reader
=
reader
,
feature
=
sent_feature
,
feed_list
=
feed_list
,
num_classes
=
dataset
.
num_labels
,
config
=
config
)
# load best model checkpoint
fluid
.
io
.
load_persistables
(
exe
,
args
.
checkpoint_dir
)
data
=
[
"这家餐厅很好吃"
,
"这部电影真的很差劲"
]
inference_program
=
program
.
clone
(
for_test
=
True
)
data_feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
test_reader
=
reader
.
data_generator
(
phase
=
'test'
,
shuffle
=
False
)
test_examples
=
dataset
.
get_test_examples
()
total
=
0
correct
=
0
for
index
,
batch
in
enumerate
(
test_reader
()):
pred_v
=
exe
.
run
(
feed
=
data_feeder
.
feed
(
batch
),
fetch_list
=
[
pred
.
name
],
program
=
inference_program
)
total
+=
1
if
(
pred_v
[
0
][
0
]
==
int
(
test_examples
[
index
].
label
)):
correct
+=
1
acc
=
1.0
*
correct
/
total
print
(
"%s
\t
predict=%s"
%
(
test_examples
[
index
],
pred_v
[
0
][
0
]))
print
(
"accuracy = %f"
%
acc
)
results
=
cls_task
.
predict
(
data
=
data
)
index
=
0
for
batch_result
in
results
:
batch_result
=
np
.
argmax
(
batch_result
,
axis
=
2
)[
0
]
for
result
in
batch_result
:
print
(
"%s
\t
predict=%s"
%
(
data
[
index
],
result
))
index
+=
1
demo/senta/run_finetune.sh
浏览文件 @
a271703a
export
CUDA_VISIBLE_DEVICES
=
5
export
CUDA_VISIBLE_DEVICES
=
0
DATASET
=
"chnsenticorp"
CKPT_DIR
=
"./ckpt_
${
DATASET
}
"
...
...
demo/senta/senta_finetune.py
浏览文件 @
a271703a
...
...
@@ -8,7 +8,7 @@ import paddlehub as hub
# yapf: disable
parser
=
argparse
.
ArgumentParser
(
__doc__
)
parser
.
add_argument
(
"--num_epoch"
,
type
=
int
,
default
=
3
,
help
=
"Number of epoches for fine-tuning."
)
parser
.
add_argument
(
"--use_gpu"
,
type
=
ast
.
literal_eval
,
default
=
Fals
e
,
help
=
"Whether use GPU for finetuning, input should be True or False"
)
parser
.
add_argument
(
"--use_gpu"
,
type
=
ast
.
literal_eval
,
default
=
Tru
e
,
help
=
"Whether use GPU for finetuning, input should be True or False"
)
parser
.
add_argument
(
"--checkpoint_dir"
,
type
=
str
,
default
=
None
,
help
=
"Directory to model checkpoint"
)
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
32
,
help
=
"Total examples' number in batch for training."
)
args
=
parser
.
parse_args
()
...
...
@@ -19,7 +19,7 @@ if __name__ == '__main__':
module
=
hub
.
Module
(
name
=
"senta_bilstm"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
)
# Step2: Download dataset and use
TextClassification
Reader to read dataset
# Step2: Download dataset and use
LACClassify
Reader to read dataset
dataset
=
hub
.
dataset
.
ChnSentiCorp
()
reader
=
hub
.
reader
.
LACClassifyReader
(
...
...
@@ -27,13 +27,9 @@ if __name__ == '__main__':
sent_feature
=
outputs
[
"sentence_feature"
]
# Define a classfication finetune task by PaddleHub's API
cls_task
=
hub
.
create_text_cls_task
(
feature
=
sent_feature
,
num_classes
=
dataset
.
num_labels
)
# Setup feed list for data feeder
# Must feed all the tensor of senta's module need
feed_list
=
[
inputs
[
"words"
].
name
,
cls_task
.
variable
(
'label'
).
name
]
feed_list
=
[
inputs
[
"words"
].
name
]
strategy
=
hub
.
finetune
.
strategy
.
AdamWeightDecayStrategy
(
learning_rate
=
1e-4
,
weight_decay
=
0.01
,
warmup_proportion
=
0.05
)
...
...
@@ -45,7 +41,14 @@ if __name__ == '__main__':
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# Define a classfication finetune task by PaddleHub's API
cls_task
=
hub
.
TextClassifierTask
(
data_reader
=
reader
,
feature
=
sent_feature
,
feed_list
=
feed_list
,
num_classes
=
dataset
.
num_labels
,
config
=
config
)
# Finetune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
hub
.
finetune_and_eval
(
task
=
cls_task
,
data_reader
=
reader
,
feed_list
=
feed_list
,
config
=
config
)
cls_task
.
finetune_and_eval
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录