From 5305b2749a4c5f913b0fa8b5ffe6ba616b621bab Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Wed, 11 Nov 2020 15:04:26 +0800 Subject: [PATCH] deprecated APIs under paddle.dataset. test=develop (#28423) --- python/paddle/__init__.py | 2 -- python/paddle/dataset/__init__.py | 17 ++----------- python/paddle/dataset/cifar.py | 21 ++++++++++++++++ python/paddle/dataset/conll05.py | 17 +++++++++++++ python/paddle/dataset/flowers.py | 13 ++++++++++ python/paddle/dataset/imdb.py | 21 ++++++++++++++++ python/paddle/dataset/imikolov.py | 13 ++++++++++ python/paddle/dataset/mnist.py | 13 ++++++++++ python/paddle/dataset/movielens.py | 37 ++++++++++++++++++++++++++++ python/paddle/dataset/uci_housing.py | 17 +++++++++++++ python/paddle/dataset/voc2012.py | 13 ++++++++++ python/paddle/dataset/wmt14.py | 21 ++++++++++++++++ python/paddle/dataset/wmt16.py | 21 ++++++++++++++++ 13 files changed, 209 insertions(+), 17 deletions(-) diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 50c1142c7b..400dbc85d6 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -27,8 +27,6 @@ except ImportError: import paddle from the source directory; please install paddlepaddle*.whl firstly.''' ) -import paddle.reader -import paddle.dataset import paddle.batch batch = batch.batch from .fluid import monkey_patch_variable diff --git a/python/paddle/dataset/__init__.py b/python/paddle/dataset/__init__.py index d1e5975856..c2739d3805 100644 --- a/python/paddle/dataset/__init__.py +++ b/python/paddle/dataset/__init__.py @@ -29,18 +29,5 @@ import paddle.dataset.flowers import paddle.dataset.voc2012 import paddle.dataset.image -__all__ = [ - 'mnist', - 'imikolov', - 'imdb', - 'cifar', - 'movielens', - 'conll05', - 'uci_housing', - 'wmt14', - 'wmt16', - 'mq2007', - 'flowers', - 'voc2012', - 'image', -] +# set __all__ as empty for not showing APIs under paddle.dataset +__all__ = [] diff --git a/python/paddle/dataset/cifar.py b/python/paddle/dataset/cifar.py index 16f06f2400..2ee95c3723 100644 --- a/python/paddle/dataset/cifar.py +++ b/python/paddle/dataset/cifar.py @@ -32,6 +32,7 @@ from __future__ import print_function import itertools import numpy import paddle.dataset.common +import paddle.utils.deprecated as deprecated import tarfile import six from six.moves import cPickle as pickle @@ -75,6 +76,10 @@ def reader_creator(filename, sub_name, cycle=False): return reader +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.Cifar100", + reason="Please use new dataset API which supports paddle.io.DataLoader") def train100(): """ CIFAR-100 training set creator. @@ -90,6 +95,10 @@ def train100(): 'train') +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.Cifar100", + reason="Please use new dataset API which supports paddle.io.DataLoader") def test100(): """ CIFAR-100 test set creator. @@ -105,6 +114,10 @@ def test100(): 'test') +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.Cifar10", + reason="Please use new dataset API which supports paddle.io.DataLoader") def train10(cycle=False): """ CIFAR-10 training set creator. @@ -123,6 +136,10 @@ def train10(cycle=False): cycle=cycle) +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.Cifar10", + reason="Please use new dataset API which supports paddle.io.DataLoader") def test10(cycle=False): """ CIFAR-10 test set creator. @@ -141,6 +158,10 @@ def test10(cycle=False): cycle=cycle) +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.Cifar10", + reason="Please use new dataset API which supports paddle.io.DataLoader") def fetch(): paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5) paddle.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5) diff --git a/python/paddle/dataset/conll05.py b/python/paddle/dataset/conll05.py index 81a8cfc2e6..e7176626ca 100644 --- a/python/paddle/dataset/conll05.py +++ b/python/paddle/dataset/conll05.py @@ -27,6 +27,7 @@ import gzip import itertools import paddle.dataset.common import paddle.compat as cpt +import paddle.utils.deprecated as deprecated from six.moves import zip, range __all__ = ['test, get_dict', 'get_embedding'] @@ -202,6 +203,10 @@ def reader_creator(corpus_reader, return reader +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Conll05st", + reason="Please use new dataset API which supports paddle.io.DataLoader") def get_dict(): """ Get the word, verb and label dictionary of Wikipedia corpus. @@ -215,6 +220,10 @@ def get_dict(): return word_dict, verb_dict, label_dict +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Conll05st", + reason="Please use new dataset API which supports paddle.io.DataLoader") def get_embedding(): """ Get the trained word vector based on Wikipedia corpus. @@ -222,6 +231,10 @@ def get_embedding(): return paddle.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Conll05st", + reason="Please use new dataset API which supports paddle.io.DataLoader") def test(): """ Conll05 test set creator. @@ -242,6 +255,10 @@ def test(): return reader_creator(reader, word_dict, verb_dict, label_dict) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Conll05st", + reason="Please use new dataset API which supports paddle.io.DataLoader") def fetch(): paddle.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) paddle.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) diff --git a/python/paddle/dataset/flowers.py b/python/paddle/dataset/flowers.py index bb60c58211..22e0838b12 100644 --- a/python/paddle/dataset/flowers.py +++ b/python/paddle/dataset/flowers.py @@ -39,6 +39,7 @@ import scipy.io as scio from paddle.dataset.image import * from paddle.reader import map_readers, xmap_readers from paddle import compat as cpt +import paddle.utils.deprecated as deprecated import os import numpy as np from multiprocessing import cpu_count @@ -143,6 +144,10 @@ def reader_creator(data_file, return map_readers(mapper, reader) +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.Flowers", + reason="Please use new dataset API which supports paddle.io.DataLoader") def train(mapper=train_mapper, buffered_size=1024, use_xmap=True, cycle=False): ''' Create flowers training set reader. @@ -172,6 +177,10 @@ def train(mapper=train_mapper, buffered_size=1024, use_xmap=True, cycle=False): cycle=cycle) +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.Flowers", + reason="Please use new dataset API which supports paddle.io.DataLoader") def test(mapper=test_mapper, buffered_size=1024, use_xmap=True, cycle=False): ''' Create flowers test set reader. @@ -201,6 +210,10 @@ def test(mapper=test_mapper, buffered_size=1024, use_xmap=True, cycle=False): cycle=cycle) +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.Flowers", + reason="Please use new dataset API which supports paddle.io.DataLoader") def valid(mapper=test_mapper, buffered_size=1024, use_xmap=True): ''' Create flowers validation set reader. diff --git a/python/paddle/dataset/imdb.py b/python/paddle/dataset/imdb.py index d76a533a55..e5a3b6074c 100644 --- a/python/paddle/dataset/imdb.py +++ b/python/paddle/dataset/imdb.py @@ -23,6 +23,7 @@ Besides, this module also provides API for building dictionary. from __future__ import print_function import paddle.dataset.common +import paddle.utils.deprecated as deprecated import collections import tarfile import re @@ -76,6 +77,10 @@ def build_dict(pattern, cutoff): return word_idx +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Imdb", + reason="Please use new dataset API which supports paddle.io.DataLoader") def reader_creator(pos_pattern, neg_pattern, word_idx): UNK = word_idx[''] INS = [] @@ -94,6 +99,10 @@ def reader_creator(pos_pattern, neg_pattern, word_idx): return reader +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Imdb", + reason="Please use new dataset API which supports paddle.io.DataLoader") def train(word_idx): """ IMDB training set creator. @@ -111,6 +120,10 @@ def train(word_idx): re.compile("aclImdb/train/neg/.*\.txt$"), word_idx) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Imdb", + reason="Please use new dataset API which supports paddle.io.DataLoader") def test(word_idx): """ IMDB test set creator. @@ -128,6 +141,10 @@ def test(word_idx): re.compile("aclImdb/test/neg/.*\.txt$"), word_idx) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Imdb", + reason="Please use new dataset API which supports paddle.io.DataLoader") def word_dict(): """ Build a word dictionary from the corpus. @@ -139,5 +156,9 @@ def word_dict(): re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Imdb", + reason="Please use new dataset API which supports paddle.io.DataLoader") def fetch(): paddle.dataset.common.download(URL, 'imdb', MD5) diff --git a/python/paddle/dataset/imikolov.py b/python/paddle/dataset/imikolov.py index e1967d3db8..cc8e95fc34 100644 --- a/python/paddle/dataset/imikolov.py +++ b/python/paddle/dataset/imikolov.py @@ -22,6 +22,7 @@ into paddle reader creators. from __future__ import print_function import paddle.dataset.common +import paddle.utils.deprecated as deprecated import collections import tarfile import six @@ -111,6 +112,10 @@ def reader_creator(filename, word_idx, n, data_type): return reader +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Imikolov", + reason="Please use new dataset API which supports paddle.io.DataLoader") def train(word_idx, n, data_type=DataType.NGRAM): """ imikolov training set creator. @@ -131,6 +136,10 @@ def train(word_idx, n, data_type=DataType.NGRAM): data_type) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Imikolov", + reason="Please use new dataset API which supports paddle.io.DataLoader") def test(word_idx, n, data_type=DataType.NGRAM): """ imikolov test set creator. @@ -151,5 +160,9 @@ def test(word_idx, n, data_type=DataType.NGRAM): data_type) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Imikolov", + reason="Please use new dataset API which supports paddle.io.DataLoader") def fetch(): paddle.dataset.common.download(URL, "imikolov", MD5) diff --git a/python/paddle/dataset/mnist.py b/python/paddle/dataset/mnist.py index f52ffa049b..14e54d593b 100644 --- a/python/paddle/dataset/mnist.py +++ b/python/paddle/dataset/mnist.py @@ -21,6 +21,7 @@ parse training set and test set into paddle reader creators. from __future__ import print_function import paddle.dataset.common +import paddle.utils.deprecated as deprecated import gzip import numpy import struct @@ -88,6 +89,10 @@ def reader_creator(image_filename, label_filename, buffer_size): return reader +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.MNIST", + reason="Please use new dataset API which supports paddle.io.DataLoader") def train(): """ MNIST training set creator. @@ -105,6 +110,10 @@ def train(): TRAIN_LABEL_MD5), 100) +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.MNIST", + reason="Please use new dataset API which supports paddle.io.DataLoader") def test(): """ MNIST test set creator. @@ -121,6 +130,10 @@ def test(): 100) +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.MNIST", + reason="Please use new dataset API which supports paddle.io.DataLoader") def fetch(): paddle.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5) paddle.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) diff --git a/python/paddle/dataset/movielens.py b/python/paddle/dataset/movielens.py index 22ecfac953..f753f405bb 100644 --- a/python/paddle/dataset/movielens.py +++ b/python/paddle/dataset/movielens.py @@ -27,6 +27,7 @@ from __future__ import print_function import numpy as np import zipfile import paddle.dataset.common +import paddle.utils.deprecated as deprecated import re import random import functools @@ -167,6 +168,10 @@ def __reader__(rand_seed=0, test_ratio=0.1, is_test=False): yield usr.value() + mov.value() + [[rating]] +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Movielens", + reason="Please use new dataset API which supports paddle.io.DataLoader") def __reader_creator__(**kwargs): return lambda: __reader__(**kwargs) @@ -175,6 +180,10 @@ train = functools.partial(__reader_creator__, is_test=False) test = functools.partial(__reader_creator__, is_test=True) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Movielens", + reason="Please use new dataset API which supports paddle.io.DataLoader") def get_movie_title_dict(): """ Get movie title dictionary. @@ -190,6 +199,10 @@ def __max_index_info__(a, b): return b +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Movielens", + reason="Please use new dataset API which supports paddle.io.DataLoader") def max_movie_id(): """ Get the maximum value of movie id. @@ -198,6 +211,10 @@ def max_movie_id(): return six.moves.reduce(__max_index_info__, list(MOVIE_INFO.values())).index +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Movielens", + reason="Please use new dataset API which supports paddle.io.DataLoader") def max_user_id(): """ Get the maximum value of user id. @@ -213,6 +230,10 @@ def __max_job_id_impl__(a, b): return b +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Movielens", + reason="Please use new dataset API which supports paddle.io.DataLoader") def max_job_id(): """ Get the maximum value of job id. @@ -222,6 +243,10 @@ def max_job_id(): list(USER_INFO.values())).job_id +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Movielens", + reason="Please use new dataset API which supports paddle.io.DataLoader") def movie_categories(): """ Get movie categories dictionary. @@ -230,6 +255,10 @@ def movie_categories(): return CATEGORIES_DICT +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Movielens", + reason="Please use new dataset API which supports paddle.io.DataLoader") def user_info(): """ Get user info dictionary. @@ -238,6 +267,10 @@ def user_info(): return USER_INFO +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Movielens", + reason="Please use new dataset API which supports paddle.io.DataLoader") def movie_info(): """ Get movie info dictionary. @@ -255,6 +288,10 @@ def unittest(): print(train_count, test_count) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.Movielens", + reason="Please use new dataset API which supports paddle.io.DataLoader") def fetch(): paddle.dataset.common.download(URL, "movielens", MD5) diff --git a/python/paddle/dataset/uci_housing.py b/python/paddle/dataset/uci_housing.py index f7930d34f9..daed62fbef 100644 --- a/python/paddle/dataset/uci_housing.py +++ b/python/paddle/dataset/uci_housing.py @@ -27,6 +27,7 @@ import tempfile import tarfile import os import paddle.dataset.common +import paddle.utils.deprecated as deprecated __all__ = ['train', 'test'] @@ -83,6 +84,10 @@ def load_data(filename, feature_num=14, ratio=0.8): UCI_TEST_DATA = data[offset:] +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.UCIHousing", + reason="Please use new dataset API which supports paddle.io.DataLoader") def train(): """ UCI_HOUSING training set creator. @@ -103,6 +108,10 @@ def train(): return reader +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.UCIHousing", + reason="Please use new dataset API which supports paddle.io.DataLoader") def test(): """ UCI_HOUSING test set creator. @@ -134,6 +143,10 @@ def fluid_model(): return dirpath +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.UCIHousing", + reason="Please use new dataset API which supports paddle.io.DataLoader") def predict_reader(): """ It returns just one tuple data to do inference. @@ -146,5 +159,9 @@ def predict_reader(): return (UCI_TEST_DATA[0][:-1], ) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.UCIHousing", + reason="Please use new dataset API which supports paddle.io.DataLoader") def fetch(): paddle.dataset.common.download(URL, 'uci_housing', MD5) diff --git a/python/paddle/dataset/voc2012.py b/python/paddle/dataset/voc2012.py index 5068893765..5a0ff76aab 100644 --- a/python/paddle/dataset/voc2012.py +++ b/python/paddle/dataset/voc2012.py @@ -26,6 +26,7 @@ import io import numpy as np from paddle.dataset.common import download from paddle.dataset.image import * +import paddle.utils.deprecated as deprecated from PIL import Image __all__ = ['train', 'test', 'val'] @@ -66,6 +67,10 @@ def reader_creator(filename, sub_name): return reader +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.VOC2012", + reason="Please use new dataset API which supports paddle.io.DataLoader") def train(): """ Create a train dataset reader containing 2913 images in HWC order. @@ -73,6 +78,10 @@ def train(): return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'trainval') +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.VOC2012", + reason="Please use new dataset API which supports paddle.io.DataLoader") def test(): """ Create a test dataset reader containing 1464 images in HWC order. @@ -80,6 +89,10 @@ def test(): return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'train') +@deprecated( + since="2.0.0", + update_to="paddle.vision.datasets.VOC2012", + reason="Please use new dataset API which supports paddle.io.DataLoader") def val(): """ Create a val dataset reader containing 1449 images in HWC order. diff --git a/python/paddle/dataset/wmt14.py b/python/paddle/dataset/wmt14.py index 129e1129fb..3bd5e8d5ba 100644 --- a/python/paddle/dataset/wmt14.py +++ b/python/paddle/dataset/wmt14.py @@ -28,6 +28,7 @@ import gzip import paddle.dataset.common import paddle.compat as cpt +import paddle.utils.deprecated as deprecated __all__ = [ 'train', @@ -114,6 +115,10 @@ def reader_creator(tar_file, file_name, dict_size): return reader +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.WMT14", + reason="Please use new dataset API which supports paddle.io.DataLoader") def train(dict_size): """ WMT14 training set creator. @@ -130,6 +135,10 @@ def train(dict_size): 'train/train', dict_size) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.WMT14", + reason="Please use new dataset API which supports paddle.io.DataLoader") def test(dict_size): """ WMT14 test set creator. @@ -146,12 +155,20 @@ def test(dict_size): 'test/test', dict_size) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.WMT14", + reason="Please use new dataset API which supports paddle.io.DataLoader") def gen(dict_size): return reader_creator( paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'gen/gen', dict_size) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.WMT14", + reason="Please use new dataset API which supports paddle.io.DataLoader") def get_dict(dict_size, reverse=True): # if reverse = False, return dict = {'a':'001', 'b':'002', ...} # else reverse = true, return dict = {'001':'a', '002':'b', ...} @@ -163,6 +180,10 @@ def get_dict(dict_size, reverse=True): return src_dict, trg_dict +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.WMT14", + reason="Please use new dataset API which supports paddle.io.DataLoader") def fetch(): paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) paddle.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py index 251e305104..7f11bc4b1f 100644 --- a/python/paddle/dataset/wmt16.py +++ b/python/paddle/dataset/wmt16.py @@ -38,6 +38,7 @@ from collections import defaultdict import paddle import paddle.compat as cpt +import paddle.utils.deprecated as deprecated __all__ = [ "train", @@ -144,6 +145,10 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang): return reader +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.WMT16", + reason="Please use new dataset API which supports paddle.io.DataLoader") def train(src_dict_size, trg_dict_size, src_lang="en"): """ WMT16 train set reader. @@ -193,6 +198,10 @@ def train(src_dict_size, trg_dict_size, src_lang="en"): src_lang=src_lang) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.WMT16", + reason="Please use new dataset API which supports paddle.io.DataLoader") def test(src_dict_size, trg_dict_size, src_lang="en"): """ WMT16 test set reader. @@ -242,6 +251,10 @@ def test(src_dict_size, trg_dict_size, src_lang="en"): src_lang=src_lang) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.WMT16", + reason="Please use new dataset API which supports paddle.io.DataLoader") def validation(src_dict_size, trg_dict_size, src_lang="en"): """ WMT16 validation set reader. @@ -289,6 +302,10 @@ def validation(src_dict_size, trg_dict_size, src_lang="en"): src_lang=src_lang) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.WMT16", + reason="Please use new dataset API which supports paddle.io.DataLoader") def get_dict(lang, dict_size, reverse=False): """ return the word dictionary for the specified language. @@ -319,6 +336,10 @@ def get_dict(lang, dict_size, reverse=False): return __load_dict(tar_file, dict_size, lang, reverse) +@deprecated( + since="2.0.0", + update_to="paddle.text.datasets.WMT16", + reason="Please use new dataset API which supports paddle.io.DataLoader") def fetch(): """download the entire dataset. """ -- GitLab