Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
c2c1427c
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 1 年 前同步成功
通知
282
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c2c1427c
编写于
11月 21, 2019
作者:
K
kinghuin
提交者:
wuzewu
11月 22, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
discard use_taskid
上级
1430c9e6
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
30 addition
and
96 deletion
+30
-96
demo/multi-label-classification/multi_label_classifier.py
demo/multi-label-classification/multi_label_classifier.py
+8
-23
demo/multi-label-classification/predict.py
demo/multi-label-classification/predict.py
+10
-25
demo/multi-label-classification/run_classifier.sh
demo/multi-label-classification/run_classifier.sh
+1
-2
demo/multi-label-classification/run_predict.sh
demo/multi-label-classification/run_predict.sh
+1
-1
demo/regression/regression.py
demo/regression/regression.py
+1
-5
demo/regression/run_regssion.sh
demo/regression/run_regssion.sh
+1
-2
demo/sequence-labeling/sequence_label.py
demo/sequence-labeling/sequence_label.py
+0
-4
demo/text-classification/predict.py
demo/text-classification/predict.py
+8
-33
demo/text-classification/run_predict.sh
demo/text-classification/run_predict.sh
+0
-1
未找到文件。
demo/multi-label-classification/multi_label_classifier.py
浏览文件 @
c2c1427c
...
...
@@ -30,36 +30,21 @@ parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup
parser
.
add_argument
(
"--checkpoint_dir"
,
type
=
str
,
default
=
None
,
help
=
"Directory to model checkpoint"
)
parser
.
add_argument
(
"--max_seq_len"
,
type
=
int
,
default
=
128
,
help
=
"Number of words of the longest seqence."
)
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
1
,
help
=
"Total examples' number in batch for training."
)
parser
.
add_argument
(
"--use_taskid"
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"Whether to user ernie v2 , if not to use bert."
)
args
=
parser
.
parse_args
()
# yapf: enable.
if
__name__
==
'__main__'
:
# Load Paddlehub BERT pretrained model
if
args
.
use_taskid
:
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Setup feed list for data feeder
feed_list
=
[
inputs
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
inputs
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
]
else
:
module
=
hub
.
Module
(
name
=
"bert_uncased_L-12_H-768_A-12"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Setup feed list for data feeder
feed_list
=
[
inputs
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
inputs
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
,
]
# Setup feed list for data feeder
feed_list
=
[
inputs
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
inputs
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
]
# Download dataset and use MultiLabelReader to read dataset
dataset
=
hub
.
dataset
.
Toxic
()
...
...
demo/multi-label-classification/predict.py
浏览文件 @
c2c1427c
...
...
@@ -36,38 +36,23 @@ parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
1
,
help
=
"Total examples' number in batch for training."
)
parser
.
add_argument
(
"--max_seq_len"
,
type
=
int
,
default
=
128
,
help
=
"Number of words of the longest seqence."
)
parser
.
add_argument
(
"--use_gpu"
,
type
=
ast
.
literal_eval
,
default
=
True
,
help
=
"Whether use GPU for finetuning, input should be True or False"
)
parser
.
add_argument
(
"--use_taskid"
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"Whether to user ernie v2 , if not to use bert."
)
args
=
parser
.
parse_args
()
# yapf: enable.
if
__name__
==
'__main__'
:
# Load Paddlehub BERT pretrained model
if
args
.
use_taskid
:
module
=
hub
.
Module
(
name
=
"ernie_eng_base.hub_module"
)
module
=
hub
.
Module
(
name
=
"ernie_eng_base.hub_module"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Setup feed list for data feeder
feed_list
=
[
inputs
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
inputs
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
,
]
else
:
module
=
hub
.
Module
(
name
=
"bert_uncased_L-12_H-768_A-12"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
# Setup feed list for data feeder
feed_list
=
[
inputs
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
inputs
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
,
]
# Setup feed list for data feeder
feed_list
=
[
inputs
[
"input_ids"
].
name
,
inputs
[
"position_ids"
].
name
,
inputs
[
"segment_ids"
].
name
,
inputs
[
"input_mask"
].
name
,
]
# Download dataset and use MultiLabelReader to read dataset
dataset
=
hub
.
dataset
.
Toxic
()
...
...
demo/multi-label-classification/run_classifier.sh
浏览文件 @
c2c1427c
...
...
@@ -16,5 +16,4 @@ python -u multi_label_classifier.py \
--learning_rate
=
5e-5
\
--weight_decay
=
0.01
\
--max_seq_len
=
128
\
--num_epoch
=
3
\
--use_taskid
=
False
--num_epoch
=
3
demo/multi-label-classification/run_predict.sh
浏览文件 @
c2c1427c
...
...
@@ -2,4 +2,4 @@ export FLAGS_eager_delete_tensor_gb=0.0
export
CUDA_VISIBLE_DEVICES
=
0
CKPT_DIR
=
"./ckpt_toxic"
python
-u
predict.py
--checkpoint_dir
$CKPT_DIR
--max_seq_len
128
--use_gpu
True
--use_taskid
False
python
-u
predict.py
--checkpoint_dir
$CKPT_DIR
--max_seq_len
128
--use_gpu
True
demo/regression/regression.py
浏览文件 @
c2c1427c
...
...
@@ -34,7 +34,6 @@ parser.add_argument("--max_seq_len", type=int, default=512, help="Number of word
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
32
,
help
=
"Total examples' number in batch for training."
)
parser
.
add_argument
(
"--use_pyreader"
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"Whether use pyreader to feed data."
)
parser
.
add_argument
(
"--use_data_parallel"
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"Whether use data parallel."
)
parser
.
add_argument
(
"--use_taskid"
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"Whether to use taskid ,if yes to use ernie v2."
)
args
=
parser
.
parse_args
()
# yapf: enable.
...
...
@@ -43,10 +42,7 @@ if __name__ == '__main__':
# Download dataset and use ClassifyReader to read dataset
if
args
.
dataset
.
lower
()
==
"sts-b"
:
dataset
=
hub
.
dataset
.
GLUE
(
"STS-B"
)
if
args
.
use_taskid
:
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
else
:
module
=
hub
.
Module
(
name
=
"bert_uncased_L-12_H-768_A-12"
)
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
else
:
raise
ValueError
(
"%s dataset is not defined"
%
args
.
dataset
)
...
...
demo/regression/run_regssion.sh
浏览文件 @
c2c1427c
...
...
@@ -16,5 +16,4 @@ python -u regression.py \
--max_seq_len
=
128
\
--num_epoch
=
3
\
--use_pyreader
=
True
\
--use_data_parallel
=
True
\
--use_taskid
=
False
\
--use_data_parallel
=
True
demo/sequence-labeling/sequence_label.py
浏览文件 @
c2c1427c
...
...
@@ -40,10 +40,6 @@ if __name__ == '__main__':
module
=
hub
.
Module
(
name
=
"ernie_tiny"
)
inputs
,
outputs
,
program
=
module
.
context
(
trainable
=
True
,
max_seq_len
=
args
.
max_seq_len
)
if
module
.
name
.
startswith
(
"ernie_v2"
):
use_taskid
=
True
else
:
use_taskid
=
False
# Download dataset and use SequenceLabelReader to read dataset
dataset
=
hub
.
dataset
.
MSRA_NER
()
...
...
demo/text-classification/predict.py
浏览文件 @
c2c1427c
...
...
@@ -36,7 +36,6 @@ parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whe
parser
.
add_argument
(
"--use_pyreader"
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"Whether use pyreader to feed data."
)
parser
.
add_argument
(
"--dataset"
,
type
=
str
,
default
=
"chnsenticorp"
,
help
=
"The choice of dataset"
)
parser
.
add_argument
(
"--use_data_parallel"
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"Whether use data parallel."
)
parser
.
add_argument
(
"--use_taskid"
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"Whether to use taskid ,if yes to use ernie v2."
)
args
=
parser
.
parse_args
()
# yapf: enable.
...
...
@@ -58,60 +57,36 @@ if __name__ == '__main__':
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
"mrpc"
:
dataset
=
hub
.
dataset
.
GLUE
(
"MRPC"
)
if
args
.
use_taskid
:
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
else
:
module
=
hub
.
Module
(
name
=
"bert_uncased_L-12_H-768_A-12"
)
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
metrics_choices
=
[
"f1"
,
"acc"
]
# The first metric will be choose to eval. Ref: task.py:799
elif
args
.
dataset
.
lower
()
==
"qqp"
:
dataset
=
hub
.
dataset
.
GLUE
(
"QQP"
)
if
args
.
use_taskid
:
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
else
:
module
=
hub
.
Module
(
name
=
"bert_uncased_L-12_H-768_A-12"
)
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
metrics_choices
=
[
"f1"
,
"acc"
]
elif
args
.
dataset
.
lower
()
==
"sst-2"
:
dataset
=
hub
.
dataset
.
GLUE
(
"SST-2"
)
if
args
.
use_taskid
:
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
else
:
module
=
hub
.
Module
(
name
=
"bert_uncased_L-12_H-768_A-12"
)
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
"cola"
:
dataset
=
hub
.
dataset
.
GLUE
(
"CoLA"
)
if
args
.
use_taskid
:
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
else
:
module
=
hub
.
Module
(
name
=
"bert_uncased_L-12_H-768_A-12"
)
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
metrics_choices
=
[
"matthews"
,
"acc"
]
elif
args
.
dataset
.
lower
()
==
"qnli"
:
dataset
=
hub
.
dataset
.
GLUE
(
"QNLI"
)
if
args
.
use_taskid
:
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
else
:
module
=
hub
.
Module
(
name
=
"bert_uncased_L-12_H-768_A-12"
)
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
"rte"
:
dataset
=
hub
.
dataset
.
GLUE
(
"RTE"
)
if
args
.
use_taskid
:
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
else
:
module
=
hub
.
Module
(
name
=
"bert_uncased_L-12_H-768_A-12"
)
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
"mnli"
or
args
.
dataset
.
lower
()
==
"mnli_m"
:
dataset
=
hub
.
dataset
.
GLUE
(
"MNLI_m"
)
if
args
.
use_taskid
:
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
else
:
module
=
hub
.
Module
(
name
=
"bert_uncased_L-12_H-768_A-12"
)
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
"mnli_mm"
:
dataset
=
hub
.
dataset
.
GLUE
(
"MNLI_mm"
)
if
args
.
use_taskid
:
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
else
:
module
=
hub
.
Module
(
name
=
"bert_uncased_L-12_H-768_A-12"
)
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
().
startswith
(
"xnli"
):
dataset
=
hub
.
dataset
.
XNLI
(
language
=
args
.
dataset
.
lower
()[
-
2
:])
...
...
demo/text-classification/run_predict.sh
浏览文件 @
c2c1427c
...
...
@@ -18,4 +18,3 @@ python -u predict.py --checkpoint_dir=$CKPT_DIR \
--use_gpu
=
True
\
--dataset
=
${
DATASET
}
\
--batch_size
=
150
\
--use_taskid
=
False
\
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录