Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
edf5630f
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 1 年 前同步成功
通知
282
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
edf5630f
编写于
11月 26, 2019
作者:
Z
zhangxuefei
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/PaddleHub
into develop
上级
fba1f244
f68214c9
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
48 addition
and
20 deletion
+48
-20
demo/image-classification/run_classifier.sh
demo/image-classification/run_classifier.sh
+2
-0
demo/image-classification/run_predict.sh
demo/image-classification/run_predict.sh
+2
-0
demo/qa_classification/run_classifier.sh
demo/qa_classification/run_classifier.sh
+0
-1
demo/reading-comprehension/reading_comprehension.py
demo/reading-comprehension/reading_comprehension.py
+0
-2
demo/reading-comprehension/run_finetune.sh
demo/reading-comprehension/run_finetune.sh
+1
-0
demo/reading-comprehension/run_predict.sh
demo/reading-comprehension/run_predict.sh
+1
-0
demo/regression/run_predict.sh
demo/regression/run_predict.sh
+1
-1
demo/sequence-labeling/predict.py
demo/sequence-labeling/predict.py
+4
-2
demo/sequence-labeling/run_sequence_label.sh
demo/sequence-labeling/run_sequence_label.sh
+1
-0
demo/sequence-labeling/sequence_label.py
demo/sequence-labeling/sequence_label.py
+0
-3
demo/text-classification/predict.py
demo/text-classification/predict.py
+24
-4
demo/text-classification/run_classifier.sh
demo/text-classification/run_classifier.sh
+1
-0
demo/text-classification/run_predict.sh
demo/text-classification/run_predict.sh
+1
-1
demo/text-classification/text_classifier.py
demo/text-classification/text_classifier.py
+6
-6
paddlehub/reader/cv_reader.py
paddlehub/reader/cv_reader.py
+4
-0
未找到文件。
demo/image-classification/run_classifier.sh
浏览文件 @
edf5630f
export
FLAGS_eager_delete_tensor_gb
=
0.0
export
CUDA_VISIBLE_DEVICES
=
0
python
-u
img_classifier.py
$@
demo/image-classification/run_predict.sh
浏览文件 @
edf5630f
export
FLAGS_eager_delete_tensor_gb
=
0.0
export
CUDA_VISIBLE_DEVICES
=
0
python
-u
predict.py
$@
demo/qa_classification/run_classifier.sh
浏览文件 @
edf5630f
export
FLAGS_eager_delete_tensor_gb
=
0.0
export
CUDA_VISIBLE_DEVICES
=
0
CKPT_DIR
=
"./ckpt_qa"
# Recommending hyper parameters for difference task
# ChnSentiCorp: batch_size=24, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
...
...
demo/reading-comprehension/reading_comprehension.py
浏览文件 @
edf5630f
...
...
@@ -89,9 +89,7 @@ if __name__ == '__main__':
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
log_interval
=
10
,
eval_interval
=
300
,
save_ckpt_interval
=
10000
,
use_pyreader
=
args
.
use_pyreader
,
use_data_parallel
=
args
.
use_data_parallel
,
use_cuda
=
args
.
use_gpu
,
...
...
demo/reading-comprehension/run_finetune.sh
浏览文件 @
edf5630f
export
FLAGS_eager_delete_tensor_gb
=
0.0
export
CUDA_VISIBLE_DEVICES
=
0
# Recommending hyper parameters for difference task
# squad: batch_size=8, weight_decay=0, num_epoch=3, max_seq_len=512, lr=5e-5
...
...
demo/reading-comprehension/run_predict.sh
浏览文件 @
edf5630f
export
FLAGS_eager_delete_tensor_gb
=
0.0
export
CUDA_VISIBLE_DEVICES
=
0
CKPT_DIR
=
"./ckpt_cmrc2018"
dataset
=
cmrc2018
...
...
demo/regression/run_predict.sh
浏览文件 @
edf5630f
export
FLAGS_eager_delete_tensor_gb
=
0.0
#
export CUDA_VISIBLE_DEVICES=0
export
CUDA_VISIBLE_DEVICES
=
0
# User can select chnsenticorp, nlpcc_dbqa, lcqmc and so on for different task
DATASET
=
"STS-B"
...
...
demo/sequence-labeling/predict.py
浏览文件 @
edf5630f
...
...
@@ -41,7 +41,7 @@ args = parser.parse_args()
if
__name__
==
'__main__'
:
# loading Paddlehub ERNIE pretrained model
module
=
hub
.
Module
(
name
=
"ernie"
)
module
=
hub
.
Module
(
name
=
"ernie
_tiny
"
)
inputs
,
outputs
,
program
=
module
.
context
(
max_seq_len
=
args
.
max_seq_len
)
# Sentence labeling dataset reader
...
...
@@ -49,7 +49,9 @@ if __name__ == '__main__':
reader
=
hub
.
reader
.
SequenceLabelReader
(
dataset
=
dataset
,
vocab_path
=
module
.
get_vocab_path
(),
max_seq_len
=
args
.
max_seq_len
)
max_seq_len
=
args
.
max_seq_len
,
sp_model_path
=
module
.
get_spm_path
(),
word_dict_path
=
module
.
get_word_dict_path
())
inv_label_map
=
{
val
:
key
for
key
,
val
in
reader
.
label_map
.
items
()}
place
=
fluid
.
CUDAPlace
(
0
)
if
args
.
use_gpu
else
fluid
.
CPUPlace
()
...
...
demo/sequence-labeling/run_sequence_label.sh
浏览文件 @
edf5630f
export
FLAGS_eager_delete_tensor_gb
=
0.0
export
CUDA_VISIBLE_DEVICES
=
0
CKPT_DIR
=
"./ckpt_sequence_label"
python
-u
sequence_label.py
\
...
...
demo/sequence-labeling/sequence_label.py
浏览文件 @
edf5630f
...
...
@@ -71,9 +71,6 @@ if __name__ == '__main__':
# Setup runing config for PaddleHub Finetune API
config
=
hub
.
RunConfig
(
log_interval
=
10
,
eval_interval
=
300
,
save_ckpt_interval
=
10000
,
use_data_parallel
=
args
.
use_data_parallel
,
use_pyreader
=
args
.
use_pyreader
,
use_cuda
=
args
.
use_gpu
,
...
...
demo/text-classification/predict.py
浏览文件 @
edf5630f
...
...
@@ -45,15 +45,35 @@ if __name__ == '__main__':
# Download dataset and use ClassifyReader to read dataset
if
args
.
dataset
.
lower
()
==
"chnsenticorp"
:
dataset
=
hub
.
dataset
.
ChnSentiCorp
()
module
=
hub
.
Module
(
name
=
"ernie"
)
module
=
hub
.
Module
(
name
=
"ernie_tiny"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
"tnews"
:
dataset
=
hub
.
dataset
.
TNews
()
module
=
hub
.
Module
(
name
=
"roberta_wwm_ext_chinese_L-24_H-1024_A-16"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
"nlpcc_dbqa"
:
dataset
=
hub
.
dataset
.
NLPCC_DBQA
()
module
=
hub
.
Module
(
name
=
"
ernie
"
)
module
=
hub
.
Module
(
name
=
"
roberta_wwm_ext_chinese_L-24_H-1024_A-16
"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
"lcqmc"
:
dataset
=
hub
.
dataset
.
LCQMC
()
module
=
hub
.
Module
(
name
=
"ernie"
)
module
=
hub
.
Module
(
name
=
"roberta_wwm_ext_chinese_L-24_H-1024_A-16"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
'inews'
:
dataset
=
hub
.
dataset
.
INews
()
module
=
hub
.
Module
(
name
=
"roberta_wwm_ext_chinese_L-24_H-1024_A-16"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
'bq'
:
dataset
=
hub
.
dataset
.
BQ
()
module
=
hub
.
Module
(
name
=
"roberta_wwm_ext_chinese_L-24_H-1024_A-16"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
'thucnews'
:
dataset
=
hub
.
dataset
.
THUCNEWS
()
module
=
hub
.
Module
(
name
=
"roberta_wwm_ext_chinese_L-24_H-1024_A-16"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
'iflytek'
:
dataset
=
hub
.
dataset
.
IFLYTEK
()
module
=
hub
.
Module
(
name
=
"roberta_wwm_ext_chinese_L-24_H-1024_A-16"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
"mrpc"
:
dataset
=
hub
.
dataset
.
GLUE
(
"MRPC"
)
...
...
@@ -90,7 +110,7 @@ if __name__ == '__main__':
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
().
startswith
(
"xnli"
):
dataset
=
hub
.
dataset
.
XNLI
(
language
=
args
.
dataset
.
lower
()[
-
2
:])
module
=
hub
.
Module
(
name
=
"
bert_multi_cased_L-12_H-768_A-12
"
)
module
=
hub
.
Module
(
name
=
"
roberta_wwm_ext_chinese_L-24_H-1024_A-16
"
)
metrics_choices
=
[
"acc"
]
else
:
raise
ValueError
(
"%s dataset is not defined"
%
args
.
dataset
)
...
...
demo/text-classification/run_classifier.sh
浏览文件 @
edf5630f
export
FLAGS_eager_delete_tensor_gb
=
0.0
export
CUDA_VISIBLE_DEVICES
=
0
# User can select chnsenticorp, nlpcc_dbqa, lcqmc and so on for different task
DATASET
=
"chnsenticorp"
...
...
demo/text-classification/run_predict.sh
浏览文件 @
edf5630f
...
...
@@ -17,4 +17,4 @@ python -u predict.py --checkpoint_dir=$CKPT_DIR \
--max_seq_len
=
128
\
--use_gpu
=
True
\
--dataset
=
${
DATASET
}
\
--batch_size
=
150
\
--batch_size
=
32
\
demo/text-classification/text_classifier.py
浏览文件 @
edf5630f
...
...
@@ -47,7 +47,7 @@ if __name__ == '__main__':
elif
args
.
dataset
.
lower
()
==
"tnews"
:
dataset
=
hub
.
dataset
.
TNews
()
module
=
hub
.
Module
(
name
=
"roberta_wwm_ext_chinese_L-24_H-1024_A-16"
)
metrics_choices
=
[
"acc"
,
"f1"
]
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
"nlpcc_dbqa"
:
dataset
=
hub
.
dataset
.
NLPCC_DBQA
()
module
=
hub
.
Module
(
name
=
"roberta_wwm_ext_chinese_L-24_H-1024_A-16"
)
...
...
@@ -59,19 +59,19 @@ if __name__ == '__main__':
elif
args
.
dataset
.
lower
()
==
'inews'
:
dataset
=
hub
.
dataset
.
INews
()
module
=
hub
.
Module
(
name
=
"roberta_wwm_ext_chinese_L-24_H-1024_A-16"
)
metrics_choices
=
[
"acc"
,
"f1"
]
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
'bq'
:
dataset
=
hub
.
dataset
.
BQ
()
module
=
hub
.
Module
(
name
=
"roberta_wwm_ext_chinese_L-24_H-1024_A-16"
)
metrics_choices
=
[
"acc"
,
"f1"
]
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
'thucnews'
:
dataset
=
hub
.
dataset
.
THUCNEWS
()
module
=
hub
.
Module
(
name
=
"roberta_wwm_ext_chinese_L-24_H-1024_A-16"
)
metrics_choices
=
[
"acc"
,
"f1"
]
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
'iflytek'
:
dataset
=
hub
.
dataset
.
IFLYTEK
()
module
=
hub
.
Module
(
name
=
"roberta_wwm_ext_chinese_L-24_H-1024_A-16"
)
metrics_choices
=
[
"acc"
,
"f1"
]
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
"mrpc"
:
dataset
=
hub
.
dataset
.
GLUE
(
"MRPC"
)
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
...
...
@@ -97,7 +97,7 @@ if __name__ == '__main__':
dataset
=
hub
.
dataset
.
GLUE
(
"RTE"
)
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
metrics_choices
=
[
"acc"
]
elif
args
.
dataset
.
lower
()
==
"mnli"
or
args
.
dataset
.
lower
()
==
"mnli"
:
elif
args
.
dataset
.
lower
()
==
"mnli"
or
args
.
dataset
.
lower
()
==
"mnli
_m
"
:
dataset
=
hub
.
dataset
.
GLUE
(
"MNLI_m"
)
module
=
hub
.
Module
(
name
=
"ernie_v2_eng_base"
)
metrics_choices
=
[
"acc"
]
...
...
paddlehub/reader/cv_reader.py
浏览文件 @
edf5630f
...
...
@@ -49,6 +49,7 @@ class ImageClassificationReader(object):
self
.
data_augmentation
=
data_augmentation
self
.
images_std
=
images_std
self
.
images_mean
=
images_mean
self
.
num_examples
=
{
'train'
:
-
1
,
'dev'
:
-
1
,
'test'
:
-
1
}
if
self
.
images_mean
is
None
:
try
:
...
...
@@ -80,12 +81,15 @@ class ImageClassificationReader(object):
raise
ValueError
(
"The dataset is none and it's not allowed!"
)
if
phase
==
"train"
:
data
=
self
.
dataset
.
train_data
(
shuffle
)
self
.
num_examples
[
'train'
]
=
len
(
self
.
get_train_examples
())
elif
phase
==
"test"
:
shuffle
=
False
data
=
self
.
dataset
.
test_data
(
shuffle
)
self
.
num_examples
[
'test'
]
=
len
(
self
.
get_test_examples
())
elif
phase
==
"val"
or
phase
==
"dev"
:
shuffle
=
False
data
=
self
.
dataset
.
validate_data
(
shuffle
)
self
.
num_examples
[
'dev'
]
=
len
(
self
.
get_dev_examples
())
elif
phase
==
"predict"
:
data
=
data
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录