Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
ed3d2276
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 1 年 前同步成功
通知
282
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ed3d2276
编写于
4月 11, 2019
作者:
W
wuzewu
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/PaddleHub
into develop
上级
2b2a3d2f
6985001a
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
111 addition
and
117 deletion
+111
-117
demo/ernie-classification/question_answering.py
demo/ernie-classification/question_answering.py
+26
-28
demo/ernie-classification/question_matching.py
demo/ernie-classification/question_matching.py
+26
-28
demo/ernie-classification/sentiment_cls.py
demo/ernie-classification/sentiment_cls.py
+26
-28
demo/ernie-seq-labeling/run_sequence_labeling.sh
demo/ernie-seq-labeling/run_sequence_labeling.sh
+2
-2
demo/ernie-seq-labeling/sequence_labeling.py
demo/ernie-seq-labeling/sequence_labeling.py
+25
-25
paddlehub/module/base_processor.py
paddlehub/module/base_processor.py
+3
-3
paddlehub/module/module.py
paddlehub/module/module.py
+3
-3
未找到文件。
demo/ernie-classification/question_answering.py
浏览文件 @
ed3d2276
...
...
@@ -40,54 +40,52 @@ args = parser.parse_args()
# yapf: enable.
if
__name__
==
'__main__'
:
# Select a finetune strategy
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
eval_interval
=
100
,
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# loading Paddlehub ERNIE pretrained model
# Step1: load Paddlehub ERNIE pretrained model
module
=
hub
.
Module
(
name
=
"ernie"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Sentence classification dataset reader
# Step2: Download dataset and use ClassifyReader to read dataset
dataset
=
hub
.
dataset
.
NLPCC_DBQA
()
reader
=
hub
.
reader
.
ClassifyReader
(
dataset
=
hub
.
dataset
.
NLPCC_DBQA
(),
# download NLPCC_DBQA dataset
dataset
=
dataset
,
vocab_path
=
module
.
get_vocab_path
(),
max_seq_len
=
args
.
max_seq_len
)
num_labels
=
len
(
reader
.
get_labels
())
input_dict
,
output_dict
,
program
=
module
.
context
(
sign_name
=
"tokens"
,
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Step3: construct transfer learning network
with
fluid
.
program_guard
(
program
):
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
'int64'
)
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output
s
" for token-level output.
pooled_output
=
output
_dict
[
"pooled_output"
]
# Use "sequence_output" for token-level output.
pooled_output
=
output
s
[
"pooled_output"
]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
feed_list
=
[
input_dict
[
"input_ids"
].
name
,
input_dict
[
"position_ids"
].
name
,
input_dict
[
"segment_ids"
].
name
,
input_dict
[
"input_mask"
].
name
,
label
.
name
inputs
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
inputs
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
,
label
.
name
]
# Define a classfication finetune task by PaddleHub's API
cls_task
=
hub
.
create_text_classification_task
(
pooled_output
,
label
,
num_classes
=
num_labels
)
# Step4: Select finetune strategy, setup config and finetune
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# Finetune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
hub
.
finetune_and_eval
(
...
...
demo/ernie-classification/question_matching.py
浏览文件 @
ed3d2276
...
...
@@ -40,54 +40,52 @@ args = parser.parse_args()
# yapf: enable.
if
__name__
==
'__main__'
:
# Select a finetune strategy
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
eval_interval
=
100
,
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# loading Paddlehub ERNIE pretrained model
# Step1: load Paddlehub ERNIE pretrained model
module
=
hub
.
Module
(
name
=
"ernie"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Sentence classification dataset reader
# Step2: Download dataset and use ClassifyReader to read dataset
dataset
=
hub
.
dataset
.
LCQMC
()
reader
=
hub
.
reader
.
ClassifyReader
(
dataset
=
hub
.
dataset
.
LCQMC
(),
# download LCQMC dataset
dataset
=
dataset
,
vocab_path
=
module
.
get_vocab_path
(),
max_seq_len
=
args
.
max_seq_len
)
num_labels
=
len
(
reader
.
get_labels
())
input_dict
,
output_dict
,
program
=
module
.
context
(
sign_name
=
"tokens"
,
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Step3: construct transfer learning network
with
fluid
.
program_guard
(
program
):
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
'int64'
)
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output
s
" for token-level output.
pooled_output
=
output
_dict
[
"pooled_output"
]
# Use "sequence_output" for token-level output.
pooled_output
=
output
s
[
"pooled_output"
]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
feed_list
=
[
input_dict
[
"input_ids"
].
name
,
input_dict
[
"position_ids"
].
name
,
input_dict
[
"segment_ids"
].
name
,
input_dict
[
"input_mask"
].
name
,
label
.
name
inputs
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
inputs
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
,
label
.
name
]
# Define a classfication finetune task by PaddleHub's API
cls_task
=
hub
.
create_text_classification_task
(
pooled_output
,
label
,
num_classes
=
num_labels
)
# Step4: Select finetune strategy, setup config and finetune
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# Finetune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
hub
.
finetune_and_eval
(
...
...
demo/ernie-classification/sentiment_cls.py
浏览文件 @
ed3d2276
...
...
@@ -40,54 +40,52 @@ args = parser.parse_args()
# yapf: enable.
if
__name__
==
'__main__'
:
# Select a finetune strategy
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
eval_interval
=
100
,
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# loading Paddlehub ERNIE pretrained model
# Step1: load Paddlehub ERNIE pretrained model
module
=
hub
.
Module
(
name
=
"ernie"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Sentence classification dataset reader
# Step2: Download dataset and use ClassifyReader to read dataset
dataset
=
hub
.
dataset
.
ChnSentiCorp
()
reader
=
hub
.
reader
.
ClassifyReader
(
dataset
=
hub
.
dataset
.
ChnSentiCorp
(),
# download chnsenticorp dataset
dataset
=
dataset
,
vocab_path
=
module
.
get_vocab_path
(),
max_seq_len
=
args
.
max_seq_len
)
num_labels
=
len
(
reader
.
get_labels
())
input_dict
,
output_dict
,
program
=
module
.
context
(
sign_name
=
"tokens"
,
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Step3: construct transfer learning network
with
fluid
.
program_guard
(
program
):
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
'int64'
)
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output
s
" for token-level output.
pooled_output
=
output
_dict
[
"pooled_output"
]
# Use "sequence_output" for token-level output.
pooled_output
=
output
s
[
"pooled_output"
]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
feed_list
=
[
input_dict
[
"input_ids"
].
name
,
input_dict
[
"position_ids"
].
name
,
input_dict
[
"segment_ids"
].
name
,
input_dict
[
"input_mask"
].
name
,
label
.
name
inputs
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
inputs
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
,
label
.
name
]
# Define a classfication finetune task by PaddleHub's API
cls_task
=
hub
.
create_text_classification_task
(
pooled_output
,
label
,
num_classes
=
num_labels
)
# Step4: Select finetune strategy, setup config and finetune
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
checkpoint_dir
=
args
.
checkpoint_dir
,
strategy
=
strategy
)
# Finetune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
hub
.
finetune_and_eval
(
...
...
demo/ernie-seq-labeling/run_
fintune_with_hub
.sh
→
demo/ernie-seq-labeling/run_
sequence_labeling
.sh
浏览文件 @
ed3d2276
export
CUDA_VISIBLE_DEVICES
=
0
CKPT_DIR
=
"./ckpt"
CKPT_DIR
=
"./ckpt
_sequence_labeling
"
python
-u
finetune_with_hub
.py
\
python
-u
sequence_labeling
.py
\
--batch_size
16
\
--weight_decay
0.01
\
--checkpoint_dir
$CKPT_DIR
\
...
...
demo/ernie-seq-labeling/
finetune_with_hub
.py
→
demo/ernie-seq-labeling/
sequence_labeling
.py
浏览文件 @
ed3d2276
...
...
@@ -40,35 +40,21 @@ args = parser.parse_args()
# yapf: enable.
if
__name__
==
'__main__'
:
# Select a finetune strategy
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
eval_interval
=
100
,
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
strategy
=
strategy
)
# loading Paddlehub ERNIE pretrained model
# Step1: load Paddlehub ERNIE pretrained model
module
=
hub
.
Module
(
name
=
"ernie"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Sequence Label dataset reader
# Step2: Download dataset and use SequenceLabelReader to read dataset
dataset
=
hub
.
dataset
.
MSRA_NER
(),
reader
=
hub
.
reader
.
SequenceLabelReader
(
dataset
=
hub
.
dataset
.
MSRA_NER
()
,
dataset
=
dataset
,
vocab_path
=
module
.
get_vocab_path
(),
max_seq_len
=
args
.
max_seq_len
)
num_labels
=
len
(
reader
.
get_labels
())
input_dict
,
output_dict
,
program
=
module
.
context
(
sign_name
=
"tokens"
,
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Step3: construct transfer learning network
with
fluid
.
program_guard
(
program
):
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
args
.
max_seq_len
,
1
],
dtype
=
'int64'
)
...
...
@@ -76,14 +62,15 @@ if __name__ == '__main__':
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_output" for token-level output.
sequence_output
=
output
_dict
[
"sequence_output"
]
sequence_output
=
output
s
[
"sequence_output"
]
# Setup feed list for data feeder
# Must feed all the tensor of ERNIE's module need
# Compared to classification task, we need add seq_len tensor to feedlist
feed_list
=
[
input
_dict
[
"input_ids"
].
name
,
input_dict
[
"position_ids"
].
name
,
input
_dict
[
"segment_ids"
].
name
,
input_dict
[
"input_mask"
]
.
name
,
label
.
name
,
seq_len
input
s
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
input
s
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
,
label
.
name
,
seq_len
]
# Define a sequence labeling finetune task by PaddleHub's API
seq_label_task
=
hub
.
create_seq_labeling_task
(
...
...
@@ -92,6 +79,19 @@ if __name__ == '__main__':
seq_len
=
seq_len
,
num_classes
=
num_labels
)
# Select a finetune strategy
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
,
learning_rate
=
args
.
learning_rate
,
warmup_strategy
=
"linear_warmup_decay"
,
)
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
use_cuda
=
True
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
strategy
=
strategy
)
# Finetune and evaluate model by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
hub
.
finetune_and_eval
(
...
...
paddlehub/module/base_processor.py
浏览文件 @
ed3d2276
...
...
@@ -23,12 +23,12 @@ class BaseProcessor(object):
def
preprocess
(
self
,
sign_name
,
data_dict
):
raise
NotImplementedError
(
"BaseProcessor' preprocess should not be call!"
)
"BaseProcessor' preprocess should not be call
ed
!"
)
def
postprocess
(
self
,
sign_name
,
data_out
,
data_info
,
**
kwargs
):
raise
NotImplementedError
(
"BaseProcessor' postprocess should not be call!"
)
"BaseProcessor' postprocess should not be call
ed
!"
)
def
data_format
(
self
,
sign_name
):
raise
NotImplementedError
(
"BaseProcessor' data_format should not be call!"
)
"BaseProcessor' data_format should not be call
ed
!"
)
paddlehub/module/module.py
浏览文件 @
ed3d2276
...
...
@@ -119,7 +119,7 @@ class Module(object):
if
processor
:
if
not
issubclass
(
processor
,
BaseProcessor
):
raise
TypeError
(
"
p
rocessor shoule be an instance of paddlehub.BaseProcessor"
"
P
rocessor shoule be an instance of paddlehub.BaseProcessor"
)
if
assets
:
self
.
assets
=
utils
.
to_list
(
assets
)
...
...
@@ -129,10 +129,10 @@ class Module(object):
self
.
_generate_module_info
(
module_info
)
self
.
_init_with_signature
(
signatures
=
signatures
)
else
:
raise
ValueError
(
"
Error!
Module initialized parameter is empty"
)
raise
ValueError
(
"Module initialized parameter is empty"
)
def
_init_with_name
(
self
,
name
):
logger
.
info
(
"
Try installing module %s
"
%
name
)
logger
.
info
(
"
Installing %s module
"
%
name
)
result
,
tips
,
module_dir
=
default_module_manager
.
install_module
(
module_name
=
name
)
if
not
result
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录