未验证 提交 5305b274 编写于 作者: K Kaipeng Deng 提交者: GitHub

deprecated APIs under paddle.dataset. test=develop (#28423)

上级 d7cfee9b
......@@ -27,8 +27,6 @@ except ImportError:
import paddle from the source directory; please install paddlepaddle*.whl firstly.'''
)
import paddle.reader
import paddle.dataset
import paddle.batch
batch = batch.batch
from .fluid import monkey_patch_variable
......
......@@ -29,18 +29,5 @@ import paddle.dataset.flowers
import paddle.dataset.voc2012
import paddle.dataset.image
__all__ = [
'mnist',
'imikolov',
'imdb',
'cifar',
'movielens',
'conll05',
'uci_housing',
'wmt14',
'wmt16',
'mq2007',
'flowers',
'voc2012',
'image',
]
# set __all__ as empty for not showing APIs under paddle.dataset
__all__ = []
......@@ -32,6 +32,7 @@ from __future__ import print_function
import itertools
import numpy
import paddle.dataset.common
import paddle.utils.deprecated as deprecated
import tarfile
import six
from six.moves import cPickle as pickle
......@@ -75,6 +76,10 @@ def reader_creator(filename, sub_name, cycle=False):
return reader
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.Cifar100",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def train100():
"""
CIFAR-100 training set creator.
......@@ -90,6 +95,10 @@ def train100():
'train')
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.Cifar100",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def test100():
"""
CIFAR-100 test set creator.
......@@ -105,6 +114,10 @@ def test100():
'test')
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.Cifar10",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def train10(cycle=False):
"""
CIFAR-10 training set creator.
......@@ -123,6 +136,10 @@ def train10(cycle=False):
cycle=cycle)
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.Cifar10",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def test10(cycle=False):
"""
CIFAR-10 test set creator.
......@@ -141,6 +158,10 @@ def test10(cycle=False):
cycle=cycle)
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.Cifar10",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def fetch():
paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5)
paddle.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5)
......@@ -27,6 +27,7 @@ import gzip
import itertools
import paddle.dataset.common
import paddle.compat as cpt
import paddle.utils.deprecated as deprecated
from six.moves import zip, range
__all__ = ['test, get_dict', 'get_embedding']
......@@ -202,6 +203,10 @@ def reader_creator(corpus_reader,
return reader
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Conll05st",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def get_dict():
"""
Get the word, verb and label dictionary of Wikipedia corpus.
......@@ -215,6 +220,10 @@ def get_dict():
return word_dict, verb_dict, label_dict
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Conll05st",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def get_embedding():
"""
Get the trained word vector based on Wikipedia corpus.
......@@ -222,6 +231,10 @@ def get_embedding():
return paddle.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Conll05st",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def test():
"""
Conll05 test set creator.
......@@ -242,6 +255,10 @@ def test():
return reader_creator(reader, word_dict, verb_dict, label_dict)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Conll05st",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def fetch():
paddle.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)
paddle.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)
......
......@@ -39,6 +39,7 @@ import scipy.io as scio
from paddle.dataset.image import *
from paddle.reader import map_readers, xmap_readers
from paddle import compat as cpt
import paddle.utils.deprecated as deprecated
import os
import numpy as np
from multiprocessing import cpu_count
......@@ -143,6 +144,10 @@ def reader_creator(data_file,
return map_readers(mapper, reader)
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.Flowers",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def train(mapper=train_mapper, buffered_size=1024, use_xmap=True, cycle=False):
'''
Create flowers training set reader.
......@@ -172,6 +177,10 @@ def train(mapper=train_mapper, buffered_size=1024, use_xmap=True, cycle=False):
cycle=cycle)
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.Flowers",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def test(mapper=test_mapper, buffered_size=1024, use_xmap=True, cycle=False):
'''
Create flowers test set reader.
......@@ -201,6 +210,10 @@ def test(mapper=test_mapper, buffered_size=1024, use_xmap=True, cycle=False):
cycle=cycle)
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.Flowers",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def valid(mapper=test_mapper, buffered_size=1024, use_xmap=True):
'''
Create flowers validation set reader.
......
......@@ -23,6 +23,7 @@ Besides, this module also provides API for building dictionary.
from __future__ import print_function
import paddle.dataset.common
import paddle.utils.deprecated as deprecated
import collections
import tarfile
import re
......@@ -76,6 +77,10 @@ def build_dict(pattern, cutoff):
return word_idx
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Imdb",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def reader_creator(pos_pattern, neg_pattern, word_idx):
UNK = word_idx['<unk>']
INS = []
......@@ -94,6 +99,10 @@ def reader_creator(pos_pattern, neg_pattern, word_idx):
return reader
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Imdb",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def train(word_idx):
"""
IMDB training set creator.
......@@ -111,6 +120,10 @@ def train(word_idx):
re.compile("aclImdb/train/neg/.*\.txt$"), word_idx)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Imdb",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def test(word_idx):
"""
IMDB test set creator.
......@@ -128,6 +141,10 @@ def test(word_idx):
re.compile("aclImdb/test/neg/.*\.txt$"), word_idx)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Imdb",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def word_dict():
"""
Build a word dictionary from the corpus.
......@@ -139,5 +156,9 @@ def word_dict():
re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Imdb",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def fetch():
paddle.dataset.common.download(URL, 'imdb', MD5)
......@@ -22,6 +22,7 @@ into paddle reader creators.
from __future__ import print_function
import paddle.dataset.common
import paddle.utils.deprecated as deprecated
import collections
import tarfile
import six
......@@ -111,6 +112,10 @@ def reader_creator(filename, word_idx, n, data_type):
return reader
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Imikolov",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def train(word_idx, n, data_type=DataType.NGRAM):
"""
imikolov training set creator.
......@@ -131,6 +136,10 @@ def train(word_idx, n, data_type=DataType.NGRAM):
data_type)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Imikolov",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def test(word_idx, n, data_type=DataType.NGRAM):
"""
imikolov test set creator.
......@@ -151,5 +160,9 @@ def test(word_idx, n, data_type=DataType.NGRAM):
data_type)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Imikolov",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def fetch():
paddle.dataset.common.download(URL, "imikolov", MD5)
......@@ -21,6 +21,7 @@ parse training set and test set into paddle reader creators.
from __future__ import print_function
import paddle.dataset.common
import paddle.utils.deprecated as deprecated
import gzip
import numpy
import struct
......@@ -88,6 +89,10 @@ def reader_creator(image_filename, label_filename, buffer_size):
return reader
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.MNIST",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def train():
"""
MNIST training set creator.
......@@ -105,6 +110,10 @@ def train():
TRAIN_LABEL_MD5), 100)
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.MNIST",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def test():
"""
MNIST test set creator.
......@@ -121,6 +130,10 @@ def test():
100)
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.MNIST",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def fetch():
paddle.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5)
paddle.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5)
......
......@@ -27,6 +27,7 @@ from __future__ import print_function
import numpy as np
import zipfile
import paddle.dataset.common
import paddle.utils.deprecated as deprecated
import re
import random
import functools
......@@ -167,6 +168,10 @@ def __reader__(rand_seed=0, test_ratio=0.1, is_test=False):
yield usr.value() + mov.value() + [[rating]]
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Movielens",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def __reader_creator__(**kwargs):
return lambda: __reader__(**kwargs)
......@@ -175,6 +180,10 @@ train = functools.partial(__reader_creator__, is_test=False)
test = functools.partial(__reader_creator__, is_test=True)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Movielens",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def get_movie_title_dict():
"""
Get movie title dictionary.
......@@ -190,6 +199,10 @@ def __max_index_info__(a, b):
return b
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Movielens",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def max_movie_id():
"""
Get the maximum value of movie id.
......@@ -198,6 +211,10 @@ def max_movie_id():
return six.moves.reduce(__max_index_info__, list(MOVIE_INFO.values())).index
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Movielens",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def max_user_id():
"""
Get the maximum value of user id.
......@@ -213,6 +230,10 @@ def __max_job_id_impl__(a, b):
return b
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Movielens",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def max_job_id():
"""
Get the maximum value of job id.
......@@ -222,6 +243,10 @@ def max_job_id():
list(USER_INFO.values())).job_id
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Movielens",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def movie_categories():
"""
Get movie categories dictionary.
......@@ -230,6 +255,10 @@ def movie_categories():
return CATEGORIES_DICT
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Movielens",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def user_info():
"""
Get user info dictionary.
......@@ -238,6 +267,10 @@ def user_info():
return USER_INFO
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Movielens",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def movie_info():
"""
Get movie info dictionary.
......@@ -255,6 +288,10 @@ def unittest():
print(train_count, test_count)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.Movielens",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def fetch():
paddle.dataset.common.download(URL, "movielens", MD5)
......
......@@ -27,6 +27,7 @@ import tempfile
import tarfile
import os
import paddle.dataset.common
import paddle.utils.deprecated as deprecated
__all__ = ['train', 'test']
......@@ -83,6 +84,10 @@ def load_data(filename, feature_num=14, ratio=0.8):
UCI_TEST_DATA = data[offset:]
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.UCIHousing",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def train():
"""
UCI_HOUSING training set creator.
......@@ -103,6 +108,10 @@ def train():
return reader
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.UCIHousing",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def test():
"""
UCI_HOUSING test set creator.
......@@ -134,6 +143,10 @@ def fluid_model():
return dirpath
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.UCIHousing",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def predict_reader():
"""
It returns just one tuple data to do inference.
......@@ -146,5 +159,9 @@ def predict_reader():
return (UCI_TEST_DATA[0][:-1], )
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.UCIHousing",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def fetch():
paddle.dataset.common.download(URL, 'uci_housing', MD5)
......@@ -26,6 +26,7 @@ import io
import numpy as np
from paddle.dataset.common import download
from paddle.dataset.image import *
import paddle.utils.deprecated as deprecated
from PIL import Image
__all__ = ['train', 'test', 'val']
......@@ -66,6 +67,10 @@ def reader_creator(filename, sub_name):
return reader
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.VOC2012",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def train():
"""
Create a train dataset reader containing 2913 images in HWC order.
......@@ -73,6 +78,10 @@ def train():
return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'trainval')
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.VOC2012",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def test():
"""
Create a test dataset reader containing 1464 images in HWC order.
......@@ -80,6 +89,10 @@ def test():
return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'train')
@deprecated(
since="2.0.0",
update_to="paddle.vision.datasets.VOC2012",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def val():
"""
Create a val dataset reader containing 1449 images in HWC order.
......
......@@ -28,6 +28,7 @@ import gzip
import paddle.dataset.common
import paddle.compat as cpt
import paddle.utils.deprecated as deprecated
__all__ = [
'train',
......@@ -114,6 +115,10 @@ def reader_creator(tar_file, file_name, dict_size):
return reader
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.WMT14",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def train(dict_size):
"""
WMT14 training set creator.
......@@ -130,6 +135,10 @@ def train(dict_size):
'train/train', dict_size)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.WMT14",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def test(dict_size):
"""
WMT14 test set creator.
......@@ -146,12 +155,20 @@ def test(dict_size):
'test/test', dict_size)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.WMT14",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def gen(dict_size):
return reader_creator(
paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
'gen/gen', dict_size)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.WMT14",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def get_dict(dict_size, reverse=True):
# if reverse = False, return dict = {'a':'001', 'b':'002', ...}
# else reverse = true, return dict = {'001':'a', '002':'b', ...}
......@@ -163,6 +180,10 @@ def get_dict(dict_size, reverse=True):
return src_dict, trg_dict
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.WMT14",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def fetch():
paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
paddle.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL)
......@@ -38,6 +38,7 @@ from collections import defaultdict
import paddle
import paddle.compat as cpt
import paddle.utils.deprecated as deprecated
__all__ = [
"train",
......@@ -144,6 +145,10 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
return reader
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.WMT16",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def train(src_dict_size, trg_dict_size, src_lang="en"):
"""
WMT16 train set reader.
......@@ -193,6 +198,10 @@ def train(src_dict_size, trg_dict_size, src_lang="en"):
src_lang=src_lang)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.WMT16",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def test(src_dict_size, trg_dict_size, src_lang="en"):
"""
WMT16 test set reader.
......@@ -242,6 +251,10 @@ def test(src_dict_size, trg_dict_size, src_lang="en"):
src_lang=src_lang)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.WMT16",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def validation(src_dict_size, trg_dict_size, src_lang="en"):
"""
WMT16 validation set reader.
......@@ -289,6 +302,10 @@ def validation(src_dict_size, trg_dict_size, src_lang="en"):
src_lang=src_lang)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.WMT16",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def get_dict(lang, dict_size, reverse=False):
"""
return the word dictionary for the specified language.
......@@ -319,6 +336,10 @@ def get_dict(lang, dict_size, reverse=False):
return __load_dict(tar_file, dict_size, lang, reverse)
@deprecated(
since="2.0.0",
update_to="paddle.text.datasets.WMT16",
reason="Please use new dataset API which supports paddle.io.DataLoader")
def fetch():
"""download the entire dataset.
"""
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册