Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5305b274
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5305b274
编写于
11月 11, 2020
作者:
K
Kaipeng Deng
提交者:
GitHub
11月 11, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
deprecated APIs under paddle.dataset. test=develop (#28423)
上级
d7cfee9b
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
209 addition
and
17 deletion
+209
-17
python/paddle/__init__.py
python/paddle/__init__.py
+0
-2
python/paddle/dataset/__init__.py
python/paddle/dataset/__init__.py
+2
-15
python/paddle/dataset/cifar.py
python/paddle/dataset/cifar.py
+21
-0
python/paddle/dataset/conll05.py
python/paddle/dataset/conll05.py
+17
-0
python/paddle/dataset/flowers.py
python/paddle/dataset/flowers.py
+13
-0
python/paddle/dataset/imdb.py
python/paddle/dataset/imdb.py
+21
-0
python/paddle/dataset/imikolov.py
python/paddle/dataset/imikolov.py
+13
-0
python/paddle/dataset/mnist.py
python/paddle/dataset/mnist.py
+13
-0
python/paddle/dataset/movielens.py
python/paddle/dataset/movielens.py
+37
-0
python/paddle/dataset/uci_housing.py
python/paddle/dataset/uci_housing.py
+17
-0
python/paddle/dataset/voc2012.py
python/paddle/dataset/voc2012.py
+13
-0
python/paddle/dataset/wmt14.py
python/paddle/dataset/wmt14.py
+21
-0
python/paddle/dataset/wmt16.py
python/paddle/dataset/wmt16.py
+21
-0
未找到文件。
python/paddle/__init__.py
浏览文件 @
5305b274
...
...
@@ -27,8 +27,6 @@ except ImportError:
import paddle from the source directory; please install paddlepaddle*.whl firstly.'''
)
import
paddle.reader
import
paddle.dataset
import
paddle.batch
batch
=
batch
.
batch
from
.fluid
import
monkey_patch_variable
...
...
python/paddle/dataset/__init__.py
浏览文件 @
5305b274
...
...
@@ -29,18 +29,5 @@ import paddle.dataset.flowers
import
paddle.dataset.voc2012
import
paddle.dataset.image
__all__
=
[
'mnist'
,
'imikolov'
,
'imdb'
,
'cifar'
,
'movielens'
,
'conll05'
,
'uci_housing'
,
'wmt14'
,
'wmt16'
,
'mq2007'
,
'flowers'
,
'voc2012'
,
'image'
,
]
# set __all__ as empty for not showing APIs under paddle.dataset
__all__
=
[]
python/paddle/dataset/cifar.py
浏览文件 @
5305b274
...
...
@@ -32,6 +32,7 @@ from __future__ import print_function
import
itertools
import
numpy
import
paddle.dataset.common
import
paddle.utils.deprecated
as
deprecated
import
tarfile
import
six
from
six.moves
import
cPickle
as
pickle
...
...
@@ -75,6 +76,10 @@ def reader_creator(filename, sub_name, cycle=False):
return
reader
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.Cifar100"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
train100
():
"""
CIFAR-100 training set creator.
...
...
@@ -90,6 +95,10 @@ def train100():
'train'
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.Cifar100"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
test100
():
"""
CIFAR-100 test set creator.
...
...
@@ -105,6 +114,10 @@ def test100():
'test'
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.Cifar10"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
train10
(
cycle
=
False
):
"""
CIFAR-10 training set creator.
...
...
@@ -123,6 +136,10 @@ def train10(cycle=False):
cycle
=
cycle
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.Cifar10"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
test10
(
cycle
=
False
):
"""
CIFAR-10 test set creator.
...
...
@@ -141,6 +158,10 @@ def test10(cycle=False):
cycle
=
cycle
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.Cifar10"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
fetch
():
paddle
.
dataset
.
common
.
download
(
CIFAR10_URL
,
'cifar'
,
CIFAR10_MD5
)
paddle
.
dataset
.
common
.
download
(
CIFAR100_URL
,
'cifar'
,
CIFAR100_MD5
)
python/paddle/dataset/conll05.py
浏览文件 @
5305b274
...
...
@@ -27,6 +27,7 @@ import gzip
import
itertools
import
paddle.dataset.common
import
paddle.compat
as
cpt
import
paddle.utils.deprecated
as
deprecated
from
six.moves
import
zip
,
range
__all__
=
[
'test, get_dict'
,
'get_embedding'
]
...
...
@@ -202,6 +203,10 @@ def reader_creator(corpus_reader,
return
reader
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Conll05st"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
get_dict
():
"""
Get the word, verb and label dictionary of Wikipedia corpus.
...
...
@@ -215,6 +220,10 @@ def get_dict():
return
word_dict
,
verb_dict
,
label_dict
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Conll05st"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
get_embedding
():
"""
Get the trained word vector based on Wikipedia corpus.
...
...
@@ -222,6 +231,10 @@ def get_embedding():
return
paddle
.
dataset
.
common
.
download
(
EMB_URL
,
'conll05st'
,
EMB_MD5
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Conll05st"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
test
():
"""
Conll05 test set creator.
...
...
@@ -242,6 +255,10 @@ def test():
return
reader_creator
(
reader
,
word_dict
,
verb_dict
,
label_dict
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Conll05st"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
fetch
():
paddle
.
dataset
.
common
.
download
(
WORDDICT_URL
,
'conll05st'
,
WORDDICT_MD5
)
paddle
.
dataset
.
common
.
download
(
VERBDICT_URL
,
'conll05st'
,
VERBDICT_MD5
)
...
...
python/paddle/dataset/flowers.py
浏览文件 @
5305b274
...
...
@@ -39,6 +39,7 @@ import scipy.io as scio
from
paddle.dataset.image
import
*
from
paddle.reader
import
map_readers
,
xmap_readers
from
paddle
import
compat
as
cpt
import
paddle.utils.deprecated
as
deprecated
import
os
import
numpy
as
np
from
multiprocessing
import
cpu_count
...
...
@@ -143,6 +144,10 @@ def reader_creator(data_file,
return
map_readers
(
mapper
,
reader
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.Flowers"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
train
(
mapper
=
train_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
,
cycle
=
False
):
'''
Create flowers training set reader.
...
...
@@ -172,6 +177,10 @@ def train(mapper=train_mapper, buffered_size=1024, use_xmap=True, cycle=False):
cycle
=
cycle
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.Flowers"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
test
(
mapper
=
test_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
,
cycle
=
False
):
'''
Create flowers test set reader.
...
...
@@ -201,6 +210,10 @@ def test(mapper=test_mapper, buffered_size=1024, use_xmap=True, cycle=False):
cycle
=
cycle
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.Flowers"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
valid
(
mapper
=
test_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
):
'''
Create flowers validation set reader.
...
...
python/paddle/dataset/imdb.py
浏览文件 @
5305b274
...
...
@@ -23,6 +23,7 @@ Besides, this module also provides API for building dictionary.
from
__future__
import
print_function
import
paddle.dataset.common
import
paddle.utils.deprecated
as
deprecated
import
collections
import
tarfile
import
re
...
...
@@ -76,6 +77,10 @@ def build_dict(pattern, cutoff):
return
word_idx
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Imdb"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
reader_creator
(
pos_pattern
,
neg_pattern
,
word_idx
):
UNK
=
word_idx
[
'<unk>'
]
INS
=
[]
...
...
@@ -94,6 +99,10 @@ def reader_creator(pos_pattern, neg_pattern, word_idx):
return
reader
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Imdb"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
train
(
word_idx
):
"""
IMDB training set creator.
...
...
@@ -111,6 +120,10 @@ def train(word_idx):
re
.
compile
(
"aclImdb/train/neg/.*\.txt$"
),
word_idx
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Imdb"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
test
(
word_idx
):
"""
IMDB test set creator.
...
...
@@ -128,6 +141,10 @@ def test(word_idx):
re
.
compile
(
"aclImdb/test/neg/.*\.txt$"
),
word_idx
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Imdb"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
word_dict
():
"""
Build a word dictionary from the corpus.
...
...
@@ -139,5 +156,9 @@ def word_dict():
re
.
compile
(
"aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"
),
150
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Imdb"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
fetch
():
paddle
.
dataset
.
common
.
download
(
URL
,
'imdb'
,
MD5
)
python/paddle/dataset/imikolov.py
浏览文件 @
5305b274
...
...
@@ -22,6 +22,7 @@ into paddle reader creators.
from
__future__
import
print_function
import
paddle.dataset.common
import
paddle.utils.deprecated
as
deprecated
import
collections
import
tarfile
import
six
...
...
@@ -111,6 +112,10 @@ def reader_creator(filename, word_idx, n, data_type):
return
reader
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Imikolov"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
train
(
word_idx
,
n
,
data_type
=
DataType
.
NGRAM
):
"""
imikolov training set creator.
...
...
@@ -131,6 +136,10 @@ def train(word_idx, n, data_type=DataType.NGRAM):
data_type
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Imikolov"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
test
(
word_idx
,
n
,
data_type
=
DataType
.
NGRAM
):
"""
imikolov test set creator.
...
...
@@ -151,5 +160,9 @@ def test(word_idx, n, data_type=DataType.NGRAM):
data_type
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Imikolov"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
fetch
():
paddle
.
dataset
.
common
.
download
(
URL
,
"imikolov"
,
MD5
)
python/paddle/dataset/mnist.py
浏览文件 @
5305b274
...
...
@@ -21,6 +21,7 @@ parse training set and test set into paddle reader creators.
from
__future__
import
print_function
import
paddle.dataset.common
import
paddle.utils.deprecated
as
deprecated
import
gzip
import
numpy
import
struct
...
...
@@ -88,6 +89,10 @@ def reader_creator(image_filename, label_filename, buffer_size):
return
reader
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.MNIST"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
train
():
"""
MNIST training set creator.
...
...
@@ -105,6 +110,10 @@ def train():
TRAIN_LABEL_MD5
),
100
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.MNIST"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
test
():
"""
MNIST test set creator.
...
...
@@ -121,6 +130,10 @@ def test():
100
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.MNIST"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
fetch
():
paddle
.
dataset
.
common
.
download
(
TRAIN_IMAGE_URL
,
'mnist'
,
TRAIN_IMAGE_MD5
)
paddle
.
dataset
.
common
.
download
(
TRAIN_LABEL_URL
,
'mnist'
,
TRAIN_LABEL_MD5
)
...
...
python/paddle/dataset/movielens.py
浏览文件 @
5305b274
...
...
@@ -27,6 +27,7 @@ from __future__ import print_function
import
numpy
as
np
import
zipfile
import
paddle.dataset.common
import
paddle.utils.deprecated
as
deprecated
import
re
import
random
import
functools
...
...
@@ -167,6 +168,10 @@ def __reader__(rand_seed=0, test_ratio=0.1, is_test=False):
yield
usr
.
value
()
+
mov
.
value
()
+
[[
rating
]]
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Movielens"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
__reader_creator__
(
**
kwargs
):
return
lambda
:
__reader__
(
**
kwargs
)
...
...
@@ -175,6 +180,10 @@ train = functools.partial(__reader_creator__, is_test=False)
test
=
functools
.
partial
(
__reader_creator__
,
is_test
=
True
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Movielens"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
get_movie_title_dict
():
"""
Get movie title dictionary.
...
...
@@ -190,6 +199,10 @@ def __max_index_info__(a, b):
return
b
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Movielens"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
max_movie_id
():
"""
Get the maximum value of movie id.
...
...
@@ -198,6 +211,10 @@ def max_movie_id():
return
six
.
moves
.
reduce
(
__max_index_info__
,
list
(
MOVIE_INFO
.
values
())).
index
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Movielens"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
max_user_id
():
"""
Get the maximum value of user id.
...
...
@@ -213,6 +230,10 @@ def __max_job_id_impl__(a, b):
return
b
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Movielens"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
max_job_id
():
"""
Get the maximum value of job id.
...
...
@@ -222,6 +243,10 @@ def max_job_id():
list
(
USER_INFO
.
values
())).
job_id
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Movielens"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
movie_categories
():
"""
Get movie categories dictionary.
...
...
@@ -230,6 +255,10 @@ def movie_categories():
return
CATEGORIES_DICT
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Movielens"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
user_info
():
"""
Get user info dictionary.
...
...
@@ -238,6 +267,10 @@ def user_info():
return
USER_INFO
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Movielens"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
movie_info
():
"""
Get movie info dictionary.
...
...
@@ -255,6 +288,10 @@ def unittest():
print
(
train_count
,
test_count
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.Movielens"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
fetch
():
paddle
.
dataset
.
common
.
download
(
URL
,
"movielens"
,
MD5
)
...
...
python/paddle/dataset/uci_housing.py
浏览文件 @
5305b274
...
...
@@ -27,6 +27,7 @@ import tempfile
import
tarfile
import
os
import
paddle.dataset.common
import
paddle.utils.deprecated
as
deprecated
__all__
=
[
'train'
,
'test'
]
...
...
@@ -83,6 +84,10 @@ def load_data(filename, feature_num=14, ratio=0.8):
UCI_TEST_DATA
=
data
[
offset
:]
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.UCIHousing"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
train
():
"""
UCI_HOUSING training set creator.
...
...
@@ -103,6 +108,10 @@ def train():
return
reader
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.UCIHousing"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
test
():
"""
UCI_HOUSING test set creator.
...
...
@@ -134,6 +143,10 @@ def fluid_model():
return
dirpath
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.UCIHousing"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
predict_reader
():
"""
It returns just one tuple data to do inference.
...
...
@@ -146,5 +159,9 @@ def predict_reader():
return
(
UCI_TEST_DATA
[
0
][:
-
1
],
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.UCIHousing"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
fetch
():
paddle
.
dataset
.
common
.
download
(
URL
,
'uci_housing'
,
MD5
)
python/paddle/dataset/voc2012.py
浏览文件 @
5305b274
...
...
@@ -26,6 +26,7 @@ import io
import
numpy
as
np
from
paddle.dataset.common
import
download
from
paddle.dataset.image
import
*
import
paddle.utils.deprecated
as
deprecated
from
PIL
import
Image
__all__
=
[
'train'
,
'test'
,
'val'
]
...
...
@@ -66,6 +67,10 @@ def reader_creator(filename, sub_name):
return
reader
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.VOC2012"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
train
():
"""
Create a train dataset reader containing 2913 images in HWC order.
...
...
@@ -73,6 +78,10 @@ def train():
return
reader_creator
(
download
(
VOC_URL
,
CACHE_DIR
,
VOC_MD5
),
'trainval'
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.VOC2012"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
test
():
"""
Create a test dataset reader containing 1464 images in HWC order.
...
...
@@ -80,6 +89,10 @@ def test():
return
reader_creator
(
download
(
VOC_URL
,
CACHE_DIR
,
VOC_MD5
),
'train'
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.vision.datasets.VOC2012"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
val
():
"""
Create a val dataset reader containing 1449 images in HWC order.
...
...
python/paddle/dataset/wmt14.py
浏览文件 @
5305b274
...
...
@@ -28,6 +28,7 @@ import gzip
import
paddle.dataset.common
import
paddle.compat
as
cpt
import
paddle.utils.deprecated
as
deprecated
__all__
=
[
'train'
,
...
...
@@ -114,6 +115,10 @@ def reader_creator(tar_file, file_name, dict_size):
return
reader
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.WMT14"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
train
(
dict_size
):
"""
WMT14 training set creator.
...
...
@@ -130,6 +135,10 @@ def train(dict_size):
'train/train'
,
dict_size
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.WMT14"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
test
(
dict_size
):
"""
WMT14 test set creator.
...
...
@@ -146,12 +155,20 @@ def test(dict_size):
'test/test'
,
dict_size
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.WMT14"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
gen
(
dict_size
):
return
reader_creator
(
paddle
.
dataset
.
common
.
download
(
URL_TRAIN
,
'wmt14'
,
MD5_TRAIN
),
'gen/gen'
,
dict_size
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.WMT14"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
get_dict
(
dict_size
,
reverse
=
True
):
# if reverse = False, return dict = {'a':'001', 'b':'002', ...}
# else reverse = true, return dict = {'001':'a', '002':'b', ...}
...
...
@@ -163,6 +180,10 @@ def get_dict(dict_size, reverse=True):
return
src_dict
,
trg_dict
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.WMT14"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
fetch
():
paddle
.
dataset
.
common
.
download
(
URL_TRAIN
,
'wmt14'
,
MD5_TRAIN
)
paddle
.
dataset
.
common
.
download
(
URL_MODEL
,
'wmt14'
,
MD5_MODEL
)
python/paddle/dataset/wmt16.py
浏览文件 @
5305b274
...
...
@@ -38,6 +38,7 @@ from collections import defaultdict
import
paddle
import
paddle.compat
as
cpt
import
paddle.utils.deprecated
as
deprecated
__all__
=
[
"train"
,
...
...
@@ -144,6 +145,10 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
return
reader
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.WMT16"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
train
(
src_dict_size
,
trg_dict_size
,
src_lang
=
"en"
):
"""
WMT16 train set reader.
...
...
@@ -193,6 +198,10 @@ def train(src_dict_size, trg_dict_size, src_lang="en"):
src_lang
=
src_lang
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.WMT16"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
test
(
src_dict_size
,
trg_dict_size
,
src_lang
=
"en"
):
"""
WMT16 test set reader.
...
...
@@ -242,6 +251,10 @@ def test(src_dict_size, trg_dict_size, src_lang="en"):
src_lang
=
src_lang
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.WMT16"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
validation
(
src_dict_size
,
trg_dict_size
,
src_lang
=
"en"
):
"""
WMT16 validation set reader.
...
...
@@ -289,6 +302,10 @@ def validation(src_dict_size, trg_dict_size, src_lang="en"):
src_lang
=
src_lang
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.WMT16"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
get_dict
(
lang
,
dict_size
,
reverse
=
False
):
"""
return the word dictionary for the specified language.
...
...
@@ -319,6 +336,10 @@ def get_dict(lang, dict_size, reverse=False):
return
__load_dict
(
tar_file
,
dict_size
,
lang
,
reverse
)
@
deprecated
(
since
=
"2.0.0"
,
update_to
=
"paddle.text.datasets.WMT16"
,
reason
=
"Please use new dataset API which supports paddle.io.DataLoader"
)
def
fetch
():
"""download the entire dataset.
"""
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录