Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
6c1b67bc
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 1 年 前同步成功
通知
282
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6c1b67bc
编写于
4月 14, 2019
作者:
Z
Zeyu Chen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add sequence label README.md
上级
416c374c
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
32 addition
and
28 deletion
+32
-28
demo/sequence-labeling/README.md
demo/sequence-labeling/README.md
+1
-0
demo/sequence-labeling/run_sequence_label.sh
demo/sequence-labeling/run_sequence_label.sh
+2
-2
demo/sequence-labeling/sequence_label.py
demo/sequence-labeling/sequence_label.py
+1
-0
demo/text-classification/README.md
demo/text-classification/README.md
+1
-1
demo/text-classification/run_predict.sh
demo/text-classification/run_predict.sh
+1
-1
demo/text-classification/simple_demo.py
demo/text-classification/simple_demo.py
+10
-8
paddlehub/finetune/evaluate.py
paddlehub/finetune/evaluate.py
+6
-5
paddlehub/finetune/finetune.py
paddlehub/finetune/finetune.py
+10
-11
未找到文件。
demo/sequence-labeling/README.md
0 → 100644
浏览文件 @
6c1b67bc
# PaddleHub 序列标注
demo/sequence-labeling/run_sequence_label
ing
.sh
→
demo/sequence-labeling/run_sequence_label.sh
浏览文件 @
6c1b67bc
export
CUDA_VISIBLE_DEVICES
=
0
CKPT_DIR
=
"./ckpt_sequence_label
ing
"
CKPT_DIR
=
"./ckpt_sequence_label"
python
-u
sequence_label.py
\
--batch_size
16
\
--weight_decay
0.01
\
--checkpoint_dir
$CKPT_DIR
\
--num_epoch
3
\
--checkpoint_dir
$CKPT_DIR
\
--max_seq_len
256
\
--learning_rate
5e-5
demo/sequence-labeling/sequence_label.py
浏览文件 @
6c1b67bc
...
...
@@ -14,6 +14,7 @@
"""Finetuning on sequence labeling task."""
import
argparse
import
ast
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
...
...
demo/text-classification/README.md
浏览文件 @
6c1b67bc
...
...
@@ -133,7 +133,7 @@ hub.finetune_and_eval(task=cls_task, data_reader=reader, feed_list=feed_list, co
*
`eval_interval`
: 模型评估的间隔,默认每100个step评估一次验证集
*
`save_ckpt_interval`
: 模型保存间隔,请根据任务大小配置,默认只保存验证集效果最好的模型和训练结束的模型
*
`use_cuda`
: 是否使用GPU训练,默认为False
*
`checkpoint_dir`
: 模型checkpoint保存路径
*
`checkpoint_dir`
: 模型checkpoint保存路径
, 若用户没有指定,程序会自动生成
*
`num_epoch`
: finetune的轮数
*
`batch_size`
: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size
*
`enable_memory_optim`
: 是否使用内存优化, 默认为True
...
...
demo/text-classification/run_predict.sh
浏览文件 @
6c1b67bc
export
CUDA_VISIBLE_DEVICES
=
5
CKPT_DIR
=
"./ckpt_
chnsenticorp
/best_model"
CKPT_DIR
=
"./ckpt_
20190414203357
/best_model"
python
-u
predict.py
--checkpoint_dir
$CKPT_DIR
--max_seq_len
128
demo/text-classification/simple_demo.py
浏览文件 @
6c1b67bc
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
# Step1
# Step1
: Select pre-trained model
module
=
hub
.
Module
(
name
=
"ernie"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
128
)
# Step2
# Step2
: Prepare Dataset and DataReader
dataset
=
hub
.
dataset
.
ChnSentiCorp
()
reader
=
hub
.
reader
.
ClassifyReader
(
dataset
=
dataset
,
vocab_path
=
module
.
get_vocab_path
(),
max_seq_len
=
128
)
# Step3
# Step3
: Construct transfer learning task
with
fluid
.
program_guard
(
program
):
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
'int64'
)
pooled_output
=
outputs
[
"pooled_output"
]
cls_task
=
hub
.
create_text_classification_task
(
cls_task
=
hub
.
create_text_cls_task
(
feature
=
pooled_output
,
label
=
label
,
num_classes
=
dataset
.
num_labels
)
# Step4
# Step4
: Setup config then start finetune
strategy
=
hub
.
AdamWeightDecayStrategy
(
learning_rate
=
5e-5
,
weight_decay
=
0.01
)
config
=
hub
.
RunConfig
(
use_cuda
=
True
,
num_epoch
=
3
,
batch_size
=
32
,
strategy
=
strategy
)
use_cuda
=
True
,
checkpoint_dir
=
"./ckpt"
,
num_epoch
=
3
,
batch_size
=
32
,
strategy
=
strategy
)
feed_list
=
[
inputs
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
...
...
paddlehub/finetune/evaluate.py
浏览文件 @
6c1b67bc
...
...
@@ -57,6 +57,7 @@ def evaluate_cls_task(task, data_reader, feed_list, phase="test", config=None):
avg_loss
=
loss_sum
/
num_eval_examples
avg_acc
=
acc_sum
/
num_eval_examples
eval_speed
=
eval_step
/
eval_time_used
logger
.
info
(
"[%s dataset evaluation result] loss=%.5f acc=%.5f [step/sec: %.2f]"
%
(
phase
,
avg_loss
,
avg_acc
,
eval_speed
))
...
...
@@ -64,11 +65,11 @@ def evaluate_cls_task(task, data_reader, feed_list, phase="test", config=None):
return
avg_loss
,
avg_acc
,
eval_speed
def
evaluate_seq_label
ing
_task
(
task
,
data_reader
,
feed_list
,
phase
=
"test"
,
config
=
None
):
def
evaluate_seq_label_task
(
task
,
data_reader
,
feed_list
,
phase
=
"test"
,
config
=
None
):
fetch_list
=
[
task
.
variable
(
"labels"
).
name
,
task
.
variable
(
"infers"
).
name
,
...
...
paddlehub/finetune/finetune.py
浏览文件 @
6c1b67bc
...
...
@@ -27,7 +27,7 @@ from visualdl import LogWriter
from
paddlehub.common.logger
import
logger
from
paddlehub.finetune.strategy
import
AdamWeightDecayStrategy
,
DefaultStrategy
from
paddlehub.finetune.checkpoint
import
load_checkpoint
,
save_checkpoint
from
paddlehub.finetune.evaluate
import
evaluate_cls_task
,
evaluate_seq_label
ing
_task
from
paddlehub.finetune.evaluate
import
evaluate_cls_task
,
evaluate_seq_label_task
import
paddlehub
as
hub
...
...
@@ -126,13 +126,13 @@ def _finetune_seq_label_task(task,
exe
=
exe
)
if
do_eval
and
global_step
%
config
.
eval_interval
==
0
:
evaluate_seq_label
ing
_task
(
evaluate_seq_label_task
(
task
,
data_reader
,
feed_list
,
phase
=
"test"
,
config
=
config
)
evaluate_seq_label
ing
_task
(
evaluate_seq_label_task
(
task
,
data_reader
,
feed_list
,
...
...
@@ -148,9 +148,9 @@ def _finetune_seq_label_task(task,
exe
=
exe
)
if
do_eval
:
evaluate_seq_label
ing
_task
(
evaluate_seq_label_task
(
task
,
data_reader
,
feed_list
,
phase
=
"dev"
,
config
=
config
)
evaluate_seq_label
ing
_task
(
evaluate_seq_label_task
(
task
,
data_reader
,
feed_list
,
phase
=
"test"
,
config
=
config
)
logger
.
info
(
"PaddleHub finetune finished."
)
...
...
@@ -164,8 +164,8 @@ def _finetune_cls_task(task, data_reader, feed_list, config=None,
num_epoch
=
config
.
num_epoch
batch_size
=
config
.
batch_size
log_writ
t
er
=
LogWriter
(
os
.
path
.
join
(
config
.
checkpoint_dir
,
"vdllog"
),
sync_cycle
=
1
0
)
log_writer
=
LogWriter
(
os
.
path
.
join
(
config
.
checkpoint_dir
,
"vdllog"
),
sync_cycle
=
1
)
place
,
dev_count
=
hub
.
common
.
get_running_device_info
(
config
)
with
fluid
.
program_guard
(
main_program
,
startup_program
):
...
...
@@ -190,10 +190,10 @@ def _finetune_cls_task(task, data_reader, feed_list, config=None,
logger
.
info
(
"PaddleHub finetune start"
)
# add visualdl scalar
with
log_writ
t
er
.
mode
(
"train"
)
as
logw
:
with
log_writer
.
mode
(
"train"
)
as
logw
:
train_loss_scalar
=
logw
.
scalar
(
tag
=
"loss[train]"
)
train_acc_scalar
=
logw
.
scalar
(
tag
=
"accuracy[train]"
)
with
log_writ
t
er
.
mode
(
"evaluate"
)
as
logw
:
with
log_writer
.
mode
(
"evaluate"
)
as
logw
:
eval_loss_scalar
=
logw
.
scalar
(
tag
=
"loss[evaluate]"
)
eval_acc_scalar
=
logw
.
scalar
(
tag
=
"accuracy[evaluate]"
)
...
...
@@ -276,8 +276,7 @@ def finetune_and_eval(task, data_reader, feed_list, config=None):
if
task
.
task_type
==
"sequence_labeling"
:
_finetune_seq_label_task
(
task
,
data_reader
,
feed_list
,
config
,
do_eval
=
True
)
# if it's image_classification and text classificaiton
else
:
elif
task
.
task_type
==
"image_classification"
or
task
.
task_type
==
"text_classification"
:
_finetune_cls_task
(
task
,
data_reader
,
feed_list
,
config
,
do_eval
=
True
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录