Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
90bbb041
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 1 年 前同步成功
通知
282
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
90bbb041
编写于
1月 08, 2020
作者:
K
kinghuin
提交者:
wuzewu
1月 08, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix typo (#316)
* fix typo * enhance predict dataset
上级
39643114
变更
23
隐藏空白更改
内联
并排
Showing
23 changed file
with
93 addition
and
75 deletion
+93
-75
paddlehub/dataset/base_cv_dataset.py
paddlehub/dataset/base_cv_dataset.py
+3
-3
paddlehub/dataset/base_nlp_dataset.py
paddlehub/dataset/base_nlp_dataset.py
+42
-24
paddlehub/dataset/bq.py
paddlehub/dataset/bq.py
+2
-2
paddlehub/dataset/chnsenticorp.py
paddlehub/dataset/chnsenticorp.py
+2
-2
paddlehub/dataset/cmrc2018.py
paddlehub/dataset/cmrc2018.py
+2
-2
paddlehub/dataset/dataset.py
paddlehub/dataset/dataset.py
+9
-9
paddlehub/dataset/dogcat.py
paddlehub/dataset/dogcat.py
+2
-2
paddlehub/dataset/drcd.py
paddlehub/dataset/drcd.py
+2
-2
paddlehub/dataset/flowers.py
paddlehub/dataset/flowers.py
+2
-2
paddlehub/dataset/food101.py
paddlehub/dataset/food101.py
+2
-2
paddlehub/dataset/glue.py
paddlehub/dataset/glue.py
+2
-2
paddlehub/dataset/iflytek.py
paddlehub/dataset/iflytek.py
+2
-2
paddlehub/dataset/indoor67.py
paddlehub/dataset/indoor67.py
+2
-2
paddlehub/dataset/inews.py
paddlehub/dataset/inews.py
+2
-2
paddlehub/dataset/lcqmc.py
paddlehub/dataset/lcqmc.py
+2
-2
paddlehub/dataset/msra_ner.py
paddlehub/dataset/msra_ner.py
+2
-2
paddlehub/dataset/nlpcc_dbqa.py
paddlehub/dataset/nlpcc_dbqa.py
+2
-2
paddlehub/dataset/squad.py
paddlehub/dataset/squad.py
+2
-2
paddlehub/dataset/stanford_dogs.py
paddlehub/dataset/stanford_dogs.py
+2
-2
paddlehub/dataset/thucnews.py
paddlehub/dataset/thucnews.py
+2
-2
paddlehub/dataset/toxic.py
paddlehub/dataset/toxic.py
+2
-2
paddlehub/dataset/xnli.py
paddlehub/dataset/xnli.py
+2
-2
paddlehub/finetune/task/classifier_task.py
paddlehub/finetune/task/classifier_task.py
+1
-1
未找到文件。
paddlehub/dataset/base_cv_dataset.py
浏览文件 @
90bbb041
...
@@ -26,7 +26,7 @@ from paddlehub.common.downloader import default_downloader
...
@@ -26,7 +26,7 @@ from paddlehub.common.downloader import default_downloader
from
paddlehub.common.logger
import
logger
from
paddlehub.common.logger
import
logger
class
BaseCVDatast
(
BaseDataset
):
class
BaseCVDatas
e
t
(
BaseDataset
):
def
__init__
(
self
,
def
__init__
(
self
,
base_path
,
base_path
,
train_list_file
=
None
,
train_list_file
=
None
,
...
@@ -35,7 +35,7 @@ class BaseCVDatast(BaseDataset):
...
@@ -35,7 +35,7 @@ class BaseCVDatast(BaseDataset):
predict_list_file
=
None
,
predict_list_file
=
None
,
label_list_file
=
None
,
label_list_file
=
None
,
label_list
=
None
):
label_list
=
None
):
super
(
BaseCVDatast
,
self
).
__init__
(
super
(
BaseCVDatas
e
t
,
self
).
__init__
(
base_path
=
base_path
,
base_path
=
base_path
,
train_file
=
train_list_file
,
train_file
=
train_list_file
,
dev_file
=
validate_list_file
,
dev_file
=
validate_list_file
,
...
@@ -65,7 +65,7 @@ class BaseCVDatast(BaseDataset):
...
@@ -65,7 +65,7 @@ class BaseCVDatast(BaseDataset):
return
data
return
data
# discarded. please use BaseCVDatast
# discarded. please use BaseCVDatas
e
t
class
ImageClassificationDataset
(
object
):
class
ImageClassificationDataset
(
object
):
def
__init__
(
self
):
def
__init__
(
self
):
logger
.
warning
(
logger
.
warning
(
...
...
paddlehub/dataset/base_nlp_dataset.py
浏览文件 @
90bbb041
...
@@ -21,9 +21,10 @@ import io
...
@@ -21,9 +21,10 @@ import io
import
csv
import
csv
from
paddlehub.dataset
import
InputExample
,
BaseDataset
from
paddlehub.dataset
import
InputExample
,
BaseDataset
from
paddlehub.common.logger
import
logger
class
BaseNLPDatast
(
BaseDataset
):
class
BaseNLPDatas
e
t
(
BaseDataset
):
def
__init__
(
self
,
def
__init__
(
self
,
base_path
,
base_path
,
train_file
=
None
,
train_file
=
None
,
...
@@ -32,11 +33,11 @@ class BaseNLPDatast(BaseDataset):
...
@@ -32,11 +33,11 @@ class BaseNLPDatast(BaseDataset):
predict_file
=
None
,
predict_file
=
None
,
label_file
=
None
,
label_file
=
None
,
label_list
=
None
,
label_list
=
None
,
train_file_with_head
=
False
,
train_file_with_head
er
=
False
,
dev_file_with_head
=
False
,
dev_file_with_head
er
=
False
,
test_file_with_head
=
False
,
test_file_with_head
er
=
False
,
predict_file_with_head
=
False
):
predict_file_with_head
er
=
False
):
super
(
BaseNLPDatast
,
self
).
__init__
(
super
(
BaseNLPDatas
e
t
,
self
).
__init__
(
base_path
=
base_path
,
base_path
=
base_path
,
train_file
=
train_file
,
train_file
=
train_file
,
dev_file
=
dev_file
,
dev_file
=
dev_file
,
...
@@ -44,37 +45,54 @@ class BaseNLPDatast(BaseDataset):
...
@@ -44,37 +45,54 @@ class BaseNLPDatast(BaseDataset):
predict_file
=
predict_file
,
predict_file
=
predict_file
,
label_file
=
label_file
,
label_file
=
label_file
,
label_list
=
label_list
,
label_list
=
label_list
,
train_file_with_head
=
train_file_with_head
,
train_file_with_head
er
=
train_file_with_header
,
dev_file_with_head
=
dev_file_with_head
,
dev_file_with_head
er
=
dev_file_with_header
,
test_file_with_head
=
test_file_with_head
,
test_file_with_head
er
=
test_file_with_header
,
predict_file_with_head
=
predict_file_with_head
)
predict_file_with_head
er
=
predict_file_with_header
)
def
_read_file
(
self
,
input_file
,
phase
=
None
):
def
_read_file
(
self
,
input_file
,
phase
=
None
):
"""Reads a tab separated value file."""
"""Reads a tab separated value file."""
has_warned
=
False
with
io
.
open
(
input_file
,
"r"
,
encoding
=
"UTF-8"
)
as
file
:
with
io
.
open
(
input_file
,
"r"
,
encoding
=
"UTF-8"
)
as
file
:
reader
=
csv
.
reader
(
file
,
delimiter
=
"
\t
"
,
quotechar
=
None
)
reader
=
csv
.
reader
(
file
,
delimiter
=
"
\t
"
,
quotechar
=
None
)
examples
=
[]
examples
=
[]
for
(
i
,
line
)
in
enumerate
(
reader
):
for
(
i
,
line
)
in
enumerate
(
reader
):
if
i
==
0
:
if
i
==
0
:
ncol
=
len
(
line
)
ncol
=
len
(
line
)
if
self
.
if_file_with_head
[
phase
]:
if
self
.
if_file_with_head
er
[
phase
]:
continue
continue
if
ncol
==
1
:
if
phase
!=
"predict"
:
if
phase
!=
"predict"
:
if
ncol
==
1
:
example
=
InputExample
(
guid
=
i
,
text_a
=
line
[
0
])
else
:
raise
Exception
(
raise
Exception
(
"the %s file: %s only has one column but it is not a predict file"
"the %s file: %s only has one column but it is not a predict file"
%
(
phase
,
input_file
))
%
(
phase
,
input_file
))
elif
ncol
==
2
:
elif
ncol
==
2
:
example
=
InputExample
(
example
=
InputExample
(
guid
=
i
,
text_a
=
line
[
0
],
label
=
line
[
1
])
guid
=
i
,
text_a
=
line
[
0
],
label
=
line
[
1
])
elif
ncol
==
3
:
elif
ncol
==
3
:
example
=
InputExample
(
example
=
InputExample
(
guid
=
i
,
text_a
=
line
[
0
],
text_b
=
line
[
1
],
label
=
line
[
2
])
guid
=
i
,
text_a
=
line
[
0
],
text_b
=
line
[
1
],
label
=
line
[
2
])
else
:
raise
Exception
(
"the %s file: %s has too many columns (should <=3)"
%
(
phase
,
input_file
))
else
:
else
:
raise
Exception
(
if
ncol
==
1
:
"the %s file: %s has too many columns (should <=3)"
%
example
=
InputExample
(
guid
=
i
,
text_a
=
line
[
0
])
(
phase
,
input_file
))
elif
ncol
==
2
:
if
not
has_warned
:
logger
.
warning
(
"the predict file: %s has 2 columns, as it is a predict file, the second one will be regarded as text_b"
%
(
input_file
))
has_warned
=
True
example
=
InputExample
(
guid
=
i
,
text_a
=
line
[
0
],
text_b
=
line
[
1
])
else
:
raise
Exception
(
"the predict file: %s has too many columns (should <=2)"
%
(
input_file
))
examples
.
append
(
example
)
examples
.
append
(
example
)
return
examples
return
examples
paddlehub/dataset/bq.py
浏览文件 @
90bbb041
...
@@ -20,10 +20,10 @@ from __future__ import print_function
...
@@ -20,10 +20,10 @@ from __future__ import print_function
import
os
import
os
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
class
BQ
(
BaseNLPDatast
):
class
BQ
(
BaseNLPDatas
e
t
):
def
__init__
(
self
):
def
__init__
(
self
):
dataset_dir
=
os
.
path
.
join
(
DATA_HOME
,
"bq"
)
dataset_dir
=
os
.
path
.
join
(
DATA_HOME
,
"bq"
)
base_path
=
self
.
_download_dataset
(
base_path
=
self
.
_download_dataset
(
...
...
paddlehub/dataset/chnsenticorp.py
浏览文件 @
90bbb041
...
@@ -23,10 +23,10 @@ import csv
...
@@ -23,10 +23,10 @@ import csv
from
paddlehub.dataset
import
InputExample
from
paddlehub.dataset
import
InputExample
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
class
ChnSentiCorp
(
BaseNLPDatast
):
class
ChnSentiCorp
(
BaseNLPDatas
e
t
):
"""
"""
ChnSentiCorp (by Tan Songbo at ICT of Chinese Academy of Sciences, and for
ChnSentiCorp (by Tan Songbo at ICT of Chinese Academy of Sciences, and for
opinion mining)
opinion mining)
...
...
paddlehub/dataset/cmrc2018.py
浏览文件 @
90bbb041
...
@@ -20,7 +20,7 @@ import os
...
@@ -20,7 +20,7 @@ import os
from
paddlehub.reader
import
tokenization
from
paddlehub.reader
import
tokenization
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.logger
import
logger
from
paddlehub.common.logger
import
logger
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/cmrc2018.tar.gz"
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/cmrc2018.tar.gz"
SPIECE_UNDERLINE
=
'▁'
SPIECE_UNDERLINE
=
'▁'
...
@@ -62,7 +62,7 @@ class CMRC2018Example(object):
...
@@ -62,7 +62,7 @@ class CMRC2018Example(object):
return
s
return
s
class
CMRC2018
(
BaseNLPDatast
):
class
CMRC2018
(
BaseNLPDatas
e
t
):
"""A single set of features of data."""
"""A single set of features of data."""
def
__init__
(
self
):
def
__init__
(
self
):
...
...
paddlehub/dataset/dataset.py
浏览文件 @
90bbb041
...
@@ -64,10 +64,10 @@ class BaseDataset(object):
...
@@ -64,10 +64,10 @@ class BaseDataset(object):
predict_file
=
None
,
predict_file
=
None
,
label_file
=
None
,
label_file
=
None
,
label_list
=
None
,
label_list
=
None
,
train_file_with_head
=
False
,
train_file_with_head
er
=
False
,
dev_file_with_head
=
False
,
dev_file_with_head
er
=
False
,
test_file_with_head
=
False
,
test_file_with_head
er
=
False
,
predict_file_with_head
=
False
):
predict_file_with_head
er
=
False
):
if
not
(
train_file
or
dev_file
or
test_file
):
if
not
(
train_file
or
dev_file
or
test_file
):
raise
ValueError
(
"At least one file should be assigned"
)
raise
ValueError
(
"At least one file should be assigned"
)
self
.
base_path
=
base_path
self
.
base_path
=
base_path
...
@@ -83,11 +83,11 @@ class BaseDataset(object):
...
@@ -83,11 +83,11 @@ class BaseDataset(object):
self
.
test_examples
=
[]
self
.
test_examples
=
[]
self
.
predict_examples
=
[]
self
.
predict_examples
=
[]
self
.
if_file_with_head
=
{
self
.
if_file_with_head
er
=
{
"train"
:
train_file_with_head
,
"train"
:
train_file_with_head
er
,
"dev"
:
dev_file_with_head
,
"dev"
:
dev_file_with_head
er
,
"test"
:
test_file_with_head
,
"test"
:
test_file_with_head
er
,
"predict"
:
predict_file_with_head
"predict"
:
predict_file_with_head
er
}
}
if
train_file
:
if
train_file
:
...
...
paddlehub/dataset/dogcat.py
浏览文件 @
90bbb041
...
@@ -20,10 +20,10 @@ from __future__ import print_function
...
@@ -20,10 +20,10 @@ from __future__ import print_function
import
os
import
os
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.dataset.base_cv_dataset
import
BaseCVDatast
from
paddlehub.dataset.base_cv_dataset
import
BaseCVDatas
e
t
class
DogCatDataset
(
BaseCVDatast
):
class
DogCatDataset
(
BaseCVDatas
e
t
):
def
__init__
(
self
):
def
__init__
(
self
):
dataset_path
=
os
.
path
.
join
(
hub
.
common
.
dir
.
DATA_HOME
,
"dog-cat"
)
dataset_path
=
os
.
path
.
join
(
hub
.
common
.
dir
.
DATA_HOME
,
"dog-cat"
)
base_path
=
self
.
_download_dataset
(
base_path
=
self
.
_download_dataset
(
...
...
paddlehub/dataset/drcd.py
浏览文件 @
90bbb041
...
@@ -20,7 +20,7 @@ import os
...
@@ -20,7 +20,7 @@ import os
from
paddlehub.reader
import
tokenization
from
paddlehub.reader
import
tokenization
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.logger
import
logger
from
paddlehub.common.logger
import
logger
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/drcd.tar.gz"
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/drcd.tar.gz"
SPIECE_UNDERLINE
=
'▁'
SPIECE_UNDERLINE
=
'▁'
...
@@ -62,7 +62,7 @@ class DRCDExample(object):
...
@@ -62,7 +62,7 @@ class DRCDExample(object):
return
s
return
s
class
DRCD
(
BaseNLPDatast
):
class
DRCD
(
BaseNLPDatas
e
t
):
"""A single set of features of data."""
"""A single set of features of data."""
def
__init__
(
self
):
def
__init__
(
self
):
...
...
paddlehub/dataset/flowers.py
浏览文件 @
90bbb041
...
@@ -20,10 +20,10 @@ from __future__ import print_function
...
@@ -20,10 +20,10 @@ from __future__ import print_function
import
os
import
os
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.dataset.base_cv_dataset
import
BaseCVDatast
from
paddlehub.dataset.base_cv_dataset
import
BaseCVDatas
e
t
class
FlowersDataset
(
BaseCVDatast
):
class
FlowersDataset
(
BaseCVDatas
e
t
):
def
__init__
(
self
):
def
__init__
(
self
):
dataset_path
=
os
.
path
.
join
(
hub
.
common
.
dir
.
DATA_HOME
,
"flower_photos"
)
dataset_path
=
os
.
path
.
join
(
hub
.
common
.
dir
.
DATA_HOME
,
"flower_photos"
)
base_path
=
self
.
_download_dataset
(
base_path
=
self
.
_download_dataset
(
...
...
paddlehub/dataset/food101.py
浏览文件 @
90bbb041
...
@@ -20,10 +20,10 @@ from __future__ import print_function
...
@@ -20,10 +20,10 @@ from __future__ import print_function
import
os
import
os
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.dataset.base_cv_dataset
import
BaseCVDatast
from
paddlehub.dataset.base_cv_dataset
import
BaseCVDatas
e
t
class
Food101Dataset
(
BaseCVDatast
):
class
Food101Dataset
(
BaseCVDatas
e
t
):
def
__init__
(
self
):
def
__init__
(
self
):
dataset_path
=
os
.
path
.
join
(
hub
.
common
.
dir
.
DATA_HOME
,
"food-101"
,
dataset_path
=
os
.
path
.
join
(
hub
.
common
.
dir
.
DATA_HOME
,
"food-101"
,
"images"
)
"images"
)
...
...
paddlehub/dataset/glue.py
浏览文件 @
90bbb041
...
@@ -24,12 +24,12 @@ import io
...
@@ -24,12 +24,12 @@ import io
from
paddlehub.dataset
import
InputExample
from
paddlehub.dataset
import
InputExample
from
paddlehub.common.logger
import
logger
from
paddlehub.common.logger
import
logger
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/glue_data.tar.gz"
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/glue_data.tar.gz"
class
GLUE
(
BaseNLPDatast
):
class
GLUE
(
BaseNLPDatas
e
t
):
"""
"""
Please refer to
Please refer to
https://gluebenchmark.com
https://gluebenchmark.com
...
...
paddlehub/dataset/iflytek.py
浏览文件 @
90bbb041
...
@@ -22,12 +22,12 @@ import os
...
@@ -22,12 +22,12 @@ import os
from
paddlehub.dataset
import
InputExample
from
paddlehub.dataset
import
InputExample
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/iflytek.tar.gz"
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/iflytek.tar.gz"
class
IFLYTEK
(
BaseNLPDatast
):
class
IFLYTEK
(
BaseNLPDatas
e
t
):
def
__init__
(
self
):
def
__init__
(
self
):
dataset_dir
=
os
.
path
.
join
(
DATA_HOME
,
"iflytek"
)
dataset_dir
=
os
.
path
.
join
(
DATA_HOME
,
"iflytek"
)
base_path
=
self
.
_download_dataset
(
dataset_dir
,
url
=
_DATA_URL
)
base_path
=
self
.
_download_dataset
(
dataset_dir
,
url
=
_DATA_URL
)
...
...
paddlehub/dataset/indoor67.py
浏览文件 @
90bbb041
...
@@ -20,10 +20,10 @@ from __future__ import print_function
...
@@ -20,10 +20,10 @@ from __future__ import print_function
import
os
import
os
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.dataset.base_cv_dataset
import
BaseCVDatast
from
paddlehub.dataset.base_cv_dataset
import
BaseCVDatas
e
t
class
Indoor67Dataset
(
BaseCVDatast
):
class
Indoor67Dataset
(
BaseCVDatas
e
t
):
def
__init__
(
self
):
def
__init__
(
self
):
dataset_path
=
os
.
path
.
join
(
hub
.
common
.
dir
.
DATA_HOME
,
"Indoor67"
)
dataset_path
=
os
.
path
.
join
(
hub
.
common
.
dir
.
DATA_HOME
,
"Indoor67"
)
base_path
=
self
.
_download_dataset
(
base_path
=
self
.
_download_dataset
(
...
...
paddlehub/dataset/inews.py
浏览文件 @
90bbb041
...
@@ -23,12 +23,12 @@ import csv
...
@@ -23,12 +23,12 @@ import csv
from
paddlehub.dataset
import
InputExample
from
paddlehub.dataset
import
InputExample
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/inews.tar.gz"
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/inews.tar.gz"
class
INews
(
BaseNLPDatast
):
class
INews
(
BaseNLPDatas
e
t
):
"""
"""
INews is a sentiment analysis dataset for Internet News
INews is a sentiment analysis dataset for Internet News
"""
"""
...
...
paddlehub/dataset/lcqmc.py
浏览文件 @
90bbb041
...
@@ -23,12 +23,12 @@ import csv
...
@@ -23,12 +23,12 @@ import csv
from
paddlehub.dataset
import
InputExample
from
paddlehub.dataset
import
InputExample
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/lcqmc.tar.gz"
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/lcqmc.tar.gz"
class
LCQMC
(
BaseNLPDatast
):
class
LCQMC
(
BaseNLPDatas
e
t
):
def
__init__
(
self
):
def
__init__
(
self
):
dataset_dir
=
os
.
path
.
join
(
DATA_HOME
,
"lcqmc"
)
dataset_dir
=
os
.
path
.
join
(
DATA_HOME
,
"lcqmc"
)
base_path
=
self
.
_download_dataset
(
dataset_dir
,
url
=
_DATA_URL
)
base_path
=
self
.
_download_dataset
(
dataset_dir
,
url
=
_DATA_URL
)
...
...
paddlehub/dataset/msra_ner.py
浏览文件 @
90bbb041
...
@@ -23,12 +23,12 @@ import csv
...
@@ -23,12 +23,12 @@ import csv
from
paddlehub.dataset
import
InputExample
from
paddlehub.dataset
import
InputExample
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/msra_ner.tar.gz"
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/msra_ner.tar.gz"
class
MSRA_NER
(
BaseNLPDatast
):
class
MSRA_NER
(
BaseNLPDatas
e
t
):
"""
"""
A set of manually annotated Chinese word-segmentation data and
A set of manually annotated Chinese word-segmentation data and
specifications for training and testing a Chinese word-segmentation system
specifications for training and testing a Chinese word-segmentation system
...
...
paddlehub/dataset/nlpcc_dbqa.py
浏览文件 @
90bbb041
...
@@ -23,12 +23,12 @@ import csv
...
@@ -23,12 +23,12 @@ import csv
from
paddlehub.dataset
import
InputExample
from
paddlehub.dataset
import
InputExample
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/nlpcc-dbqa.tar.gz"
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/nlpcc-dbqa.tar.gz"
class
NLPCC_DBQA
(
BaseNLPDatast
):
class
NLPCC_DBQA
(
BaseNLPDatas
e
t
):
"""
"""
Please refer to
Please refer to
http://tcci.ccf.org.cn/conference/2017/dldoc/taskgline05.pdf
http://tcci.ccf.org.cn/conference/2017/dldoc/taskgline05.pdf
...
...
paddlehub/dataset/squad.py
浏览文件 @
90bbb041
...
@@ -20,7 +20,7 @@ import os
...
@@ -20,7 +20,7 @@ import os
from
paddlehub.reader
import
tokenization
from
paddlehub.reader
import
tokenization
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.logger
import
logger
from
paddlehub.common.logger
import
logger
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/squad.tar.gz"
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/squad.tar.gz"
...
@@ -65,7 +65,7 @@ class SquadExample(object):
...
@@ -65,7 +65,7 @@ class SquadExample(object):
return
s
return
s
class
SQUAD
(
BaseNLPDatast
):
class
SQUAD
(
BaseNLPDatas
e
t
):
"""A single set of features of data."""
"""A single set of features of data."""
def
__init__
(
self
,
version_2_with_negative
=
False
):
def
__init__
(
self
,
version_2_with_negative
=
False
):
...
...
paddlehub/dataset/stanford_dogs.py
浏览文件 @
90bbb041
...
@@ -20,10 +20,10 @@ from __future__ import print_function
...
@@ -20,10 +20,10 @@ from __future__ import print_function
import
os
import
os
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.dataset.base_cv_dataset
import
BaseCVDatast
from
paddlehub.dataset.base_cv_dataset
import
BaseCVDatas
e
t
class
StanfordDogsDataset
(
BaseCVDatast
):
class
StanfordDogsDataset
(
BaseCVDatas
e
t
):
def
__init__
(
self
):
def
__init__
(
self
):
dataset_path
=
os
.
path
.
join
(
hub
.
common
.
dir
.
DATA_HOME
,
dataset_path
=
os
.
path
.
join
(
hub
.
common
.
dir
.
DATA_HOME
,
"StanfordDogs-120"
)
"StanfordDogs-120"
)
...
...
paddlehub/dataset/thucnews.py
浏览文件 @
90bbb041
...
@@ -22,12 +22,12 @@ import os
...
@@ -22,12 +22,12 @@ import os
from
paddlehub.dataset
import
InputExample
from
paddlehub.dataset
import
InputExample
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/thucnews.tar.gz"
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/thucnews.tar.gz"
class
THUCNEWS
(
BaseNLPDatast
):
class
THUCNEWS
(
BaseNLPDatas
e
t
):
def
__init__
(
self
):
def
__init__
(
self
):
dataset_dir
=
os
.
path
.
join
(
DATA_HOME
,
"thucnews"
)
dataset_dir
=
os
.
path
.
join
(
DATA_HOME
,
"thucnews"
)
base_path
=
self
.
_download_dataset
(
dataset_dir
,
url
=
_DATA_URL
)
base_path
=
self
.
_download_dataset
(
dataset_dir
,
url
=
_DATA_URL
)
...
...
paddlehub/dataset/toxic.py
浏览文件 @
90bbb041
...
@@ -22,12 +22,12 @@ import pandas as pd
...
@@ -22,12 +22,12 @@ import pandas as pd
from
paddlehub.dataset
import
InputExample
from
paddlehub.dataset
import
InputExample
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/toxic.tar.gz"
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/toxic.tar.gz"
class
Toxic
(
BaseNLPDatast
):
class
Toxic
(
BaseNLPDatas
e
t
):
"""
"""
The kaggle Toxic dataset:
The kaggle Toxic dataset:
https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge
https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge
...
...
paddlehub/dataset/xnli.py
浏览文件 @
90bbb041
...
@@ -25,12 +25,12 @@ import csv
...
@@ -25,12 +25,12 @@ import csv
from
paddlehub.dataset
import
InputExample
from
paddlehub.dataset
import
InputExample
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.common.dir
import
DATA_HOME
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatast
from
paddlehub.dataset.base_nlp_dataset
import
BaseNLPDatas
e
t
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/XNLI-lan.tar.gz"
_DATA_URL
=
"https://bj.bcebos.com/paddlehub-dataset/XNLI-lan.tar.gz"
class
XNLI
(
BaseNLPDatast
):
class
XNLI
(
BaseNLPDatas
e
t
):
"""
"""
Please refer to
Please refer to
https://arxiv.org/pdf/1809.05053.pdf
https://arxiv.org/pdf/1809.05053.pdf
...
...
paddlehub/finetune/task/classifier_task.py
浏览文件 @
90bbb041
...
@@ -142,7 +142,7 @@ class ClassifierTask(BaseTask):
...
@@ -142,7 +142,7 @@ class ClassifierTask(BaseTask):
}
}
except
:
except
:
raise
Exception
(
raise
Exception
(
"ImageClassificationDataset does not support postprocessing, please use BaseCVDatast instead"
"ImageClassificationDataset does not support postprocessing, please use BaseCVDatas
e
t instead"
)
)
results
=
[]
results
=
[]
for
batch_state
in
run_states
:
for
batch_state
in
run_states
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录