Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
6985001a
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 1 年 前同步成功
通知
282
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6985001a
编写于
4月 11, 2019
作者:
Z
Zeyu Chen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
reorganize code structure
上级
80c3b3f0
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
105 addition
and
111 deletion
+105
-111
demo/ernie-classification/question_answering.py
demo/ernie-classification/question_answering.py
+26
-28
demo/ernie-classification/question_matching.py
demo/ernie-classification/question_matching.py
+26
-28
demo/ernie-classification/sentiment_cls.py
demo/ernie-classification/sentiment_cls.py
+26
-28
demo/ernie-seq-labeling/run_sequence_labeling.sh
demo/ernie-seq-labeling/run_sequence_labeling.sh
+2
-2
demo/ernie-seq-labeling/sequence_labeling.py
demo/ernie-seq-labeling/sequence_labeling.py
+25
-25
未找到文件。
demo/ernie-classification/question_answering.py
浏览文件 @
6985001a
...
...
@@ -40,54 +40,52 @@ args = parser.parse_args()
# yapf: enable.
if
__name__
==
'__main__'
:
# Select a finetune strategy
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
eval_interval
=
100
,
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# loading Paddlehub ERNIE pretrained model
# Step1: load Paddlehub ERNIE pretrained model
module
=
hub
.
Module
(
name
=
"ernie"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Sentence classification dataset reader
# Step2: Download dataset and use ClassifyReader to read dataset
dataset
=
hub
.
dataset
.
NLPCC_DBQA
()
reader
=
hub
.
reader
.
ClassifyReader
(
dataset
=
hub
.
dataset
.
NLPCC_DBQA
(),
# download NLPCC_DBQA dataset
dataset
=
dataset
,
vocab_path
=
module
.
get_vocab_path
(),
max_seq_len
=
args
.
max_seq_len
)
num_labels
=
len
(
reader
.
get_labels
())
input_dict
,
output_dict
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Step3: construct transfer learning network
with
fluid
.
program_guard
(
program
):
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
'int64'
)
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output
s
" for token-level output.
pooled_output
=
output
_dict
[
"pooled_output"
]
# Use "sequence_output" for token-level output.
pooled_output
=
output
s
[
"pooled_output"
]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
feed_list
=
[
input_dict
[
"input_ids"
].
name
,
input_dict
[
"position_ids"
].
name
,
input_dict
[
"segment_ids"
].
name
,
input_dict
[
"input_mask"
].
name
,
label
.
name
inputs
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
inputs
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
,
label
.
name
]
# Define a classfication finetune task by PaddleHub's API
cls_task
=
hub
.
create_text_classification_task
(
pooled_output
,
label
,
num_classes
=
num_labels
)
# Step4: Select finetune strategy, setup config and finetune
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# Finetune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
hub
.
finetune_and_eval
(
...
...
demo/ernie-classification/question_matching.py
浏览文件 @
6985001a
...
...
@@ -40,54 +40,52 @@ args = parser.parse_args()
# yapf: enable.
if
__name__
==
'__main__'
:
# Select a finetune strategy
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
eval_interval
=
100
,
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# loading Paddlehub ERNIE pretrained model
# Step1: load Paddlehub ERNIE pretrained model
module
=
hub
.
Module
(
name
=
"ernie"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Sentence classification dataset reader
# Step2: Download dataset and use ClassifyReader to read dataset
dataset
=
hub
.
dataset
.
LCQMC
()
reader
=
hub
.
reader
.
ClassifyReader
(
dataset
=
hub
.
dataset
.
LCQMC
(),
# download LCQMC dataset
dataset
=
dataset
,
vocab_path
=
module
.
get_vocab_path
(),
max_seq_len
=
args
.
max_seq_len
)
num_labels
=
len
(
reader
.
get_labels
())
input_dict
,
output_dict
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Step3: construct transfer learning network
with
fluid
.
program_guard
(
program
):
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
'int64'
)
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output
s
" for token-level output.
pooled_output
=
output
_dict
[
"pooled_output"
]
# Use "sequence_output" for token-level output.
pooled_output
=
output
s
[
"pooled_output"
]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
feed_list
=
[
input_dict
[
"input_ids"
].
name
,
input_dict
[
"position_ids"
].
name
,
input_dict
[
"segment_ids"
].
name
,
input_dict
[
"input_mask"
].
name
,
label
.
name
inputs
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
inputs
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
,
label
.
name
]
# Define a classfication finetune task by PaddleHub's API
cls_task
=
hub
.
create_text_classification_task
(
pooled_output
,
label
,
num_classes
=
num_labels
)
# Step4: Select finetune strategy, setup config and finetune
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# Finetune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
hub
.
finetune_and_eval
(
...
...
demo/ernie-classification/sentiment_cls.py
浏览文件 @
6985001a
...
...
@@ -40,54 +40,52 @@ args = parser.parse_args()
# yapf: enable.
if
__name__
==
'__main__'
:
# Select a finetune strategy
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
eval_interval
=
100
,
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# loading Paddlehub ERNIE pretrained model
# Step1: load Paddlehub ERNIE pretrained model
module
=
hub
.
Module
(
name
=
"ernie"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Sentence classification dataset reader
# Step2: Download dataset and use ClassifyReader to read dataset
dataset
=
hub
.
dataset
.
ChnSentiCorp
()
reader
=
hub
.
reader
.
ClassifyReader
(
dataset
=
hub
.
dataset
.
ChnSentiCorp
(),
# download chnsenticorp dataset
dataset
=
dataset
,
vocab_path
=
module
.
get_vocab_path
(),
max_seq_len
=
args
.
max_seq_len
)
num_labels
=
len
(
reader
.
get_labels
())
input_dict
,
output_dict
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Step3: construct transfer learning network
with
fluid
.
program_guard
(
program
):
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
'int64'
)
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output
s
" for token-level output.
pooled_output
=
output
_dict
[
"pooled_output"
]
# Use "sequence_output" for token-level output.
pooled_output
=
output
s
[
"pooled_output"
]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
feed_list
=
[
input_dict
[
"input_ids"
].
name
,
input_dict
[
"position_ids"
].
name
,
input_dict
[
"segment_ids"
].
name
,
input_dict
[
"input_mask"
].
name
,
label
.
name
inputs
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
inputs
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
,
label
.
name
]
# Define a classfication finetune task by PaddleHub's API
cls_task
=
hub
.
create_text_classification_task
(
pooled_output
,
label
,
num_classes
=
num_labels
)
# Step4: Select finetune strategy, setup config and finetune
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# Finetune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
hub
.
finetune_and_eval
(
...
...
demo/ernie-seq-labeling/run_
fintune_with_hub
.sh
→
demo/ernie-seq-labeling/run_
sequence_labeling
.sh
浏览文件 @
6985001a
export
CUDA_VISIBLE_DEVICES
=
0
CKPT_DIR
=
"./ckpt"
CKPT_DIR
=
"./ckpt
_sequence_labeling
"
python
-u
finetune_with_hub
.py
\
python
-u
sequence_labeling
.py
\
--batch_size
16
\
--weight_decay
0.01
\
--checkpoint_dir
$CKPT_DIR
\
...
...
demo/ernie-seq-labeling/
finetune_with_hub
.py
→
demo/ernie-seq-labeling/
sequence_labeling
.py
浏览文件 @
6985001a
...
...
@@ -40,35 +40,21 @@ args = parser.parse_args()
# yapf: enable.
if
__name__
==
'__main__'
:
# Select a finetune strategy
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
eval_interval
=
100
,
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
strategy
=
strategy
)
# loading Paddlehub ERNIE pretrained model
# Step1: load Paddlehub ERNIE pretrained model
module
=
hub
.
Module
(
name
=
"ernie"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Sequence Label dataset reader
# Step2: Download dataset and use SequenceLabelReader to read dataset
dataset
=
hub
.
dataset
.
MSRA_NER
(),
reader
=
hub
.
reader
.
SequenceLabelReader
(
dataset
=
hub
.
dataset
.
MSRA_NER
()
,
dataset
=
dataset
,
vocab_path
=
module
.
get_vocab_path
(),
max_seq_len
=
args
.
max_seq_len
)
num_labels
=
len
(
reader
.
get_labels
())
input_dict
,
output_dict
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Step3: construct transfer learning network
with
fluid
.
program_guard
(
program
):
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
...
...
@@ -76,14 +62,15 @@ if __name__ == '__main__':
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output" for token-level output.
sequence_output
=
output
_dict
[
"sequence_output"
]
sequence_output
=
output
s
[
"sequence_output"
]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
# Compared to classification task, we need add seq_len tensor to feedlist
feed_list
=
[
input
_dict
[
"input_ids"
].
name
,
input_dict
[
"position_ids"
].
name
,
input
_dict
[
"segment_ids"
].
name
,
input_dict
[
"input_mask"
]
.
name
,
label
.
name
,
seq_len
input
s
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
input
s
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
,
label
.
name
,
seq_len
]
# Define a sequence labeling finetune task by PaddleHub's API
seq_label_task
=
hub
.
create_seq_labeling_task
(
...
...
@@ -92,6 +79,19 @@ if __name__ == '__main__':
seq_len
=
seq_len
,
num_classes
=
num_labels
)
# Select a finetune strategy
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
strategy
=
strategy
)
# Finetune and evaluate model by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
hub
.
finetune_and_eval
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录