From fe0de977beab96f9d0f3bfac83dcef7851174451 Mon Sep 17 00:00:00 2001 From: Guo Sheng Date: Wed, 23 Sep 2020 10:42:45 +0800 Subject: [PATCH] Delete movie_review and sentiment dataset. (#2664) test=develop --- doc/fluid/api/data/dataset.rst | 1 - doc/fluid/api/data/dataset/sentiment.rst | 6 --- doc/fluid/api_cn/data/dataset_cn.rst | 1 - .../api_cn/data/dataset_cn/sentiment_cn.rst | 28 ----------- doc/paddle/api/alias_api_mapping | 1 - .../api/paddle/dataset/sentiment_cn.rst | 28 ----------- .../movie_reviews/MovieReviews_cn.rst | 49 ------------------- .../high_level_api/high_level_api.ipynb | 2 +- .../high_level_api/high_level_api.rst | 2 +- 9 files changed, 2 insertions(+), 116 deletions(-) delete mode 100644 doc/fluid/api/data/dataset/sentiment.rst delete mode 100644 doc/fluid/api_cn/data/dataset_cn/sentiment_cn.rst delete mode 100644 doc/paddle/api/paddle/dataset/sentiment_cn.rst delete mode 100644 doc/paddle/api/paddle/text/datasets/movie_reviews/MovieReviews_cn.rst diff --git a/doc/fluid/api/data/dataset.rst b/doc/fluid/api/data/dataset.rst index 52d7c44ac..420a5fc75 100644 --- a/doc/fluid/api/data/dataset.rst +++ b/doc/fluid/api/data/dataset.rst @@ -10,7 +10,6 @@ dataset dataset/imdb.rst dataset/imikolov.rst dataset/movielens.rst - dataset/sentiment.rst dataset/uci_housing.rst dataset/wmt14.rst dataset/wmt16.rst diff --git a/doc/fluid/api/data/dataset/sentiment.rst b/doc/fluid/api/data/dataset/sentiment.rst deleted file mode 100644 index 6713c9ef9..000000000 --- a/doc/fluid/api/data/dataset/sentiment.rst +++ /dev/null @@ -1,6 +0,0 @@ -sentiment -+++++++++ - -.. automodule:: paddle.dataset.sentiment - :members: - :noindex: diff --git a/doc/fluid/api_cn/data/dataset_cn.rst b/doc/fluid/api_cn/data/dataset_cn.rst index 87017bd66..17a6a32b8 100644 --- a/doc/fluid/api_cn/data/dataset_cn.rst +++ b/doc/fluid/api_cn/data/dataset_cn.rst @@ -14,7 +14,6 @@ dataset dataset_cn/imdb_cn.rst dataset_cn/imikolov_cn.rst dataset_cn/movielens_cn.rst - dataset_cn/sentiment_cn.rst dataset_cn/uci_housing_cn.rst dataset_cn/wmt14_cn.rst dataset_cn/wmt16_cn.rst diff --git a/doc/fluid/api_cn/data/dataset_cn/sentiment_cn.rst b/doc/fluid/api_cn/data/dataset_cn/sentiment_cn.rst deleted file mode 100644 index d5826830f..000000000 --- a/doc/fluid/api_cn/data/dataset_cn/sentiment_cn.rst +++ /dev/null @@ -1,28 +0,0 @@ -.. _cn_api_paddle_dataset_sentiment: - -sentiment -------------------------------- - -脚本获取并预处理由NLTK提供的movie_reviews数据集。 - - -.. py:function:: paddle.dataset.sentiment.get_word_dict() - -按照样本中出现的单词的频率对单词进行排序。 - -返回: words_freq_sorted - -.. py:function:: paddle.dataset.sentiment.train() - -默认的训练集reader creator。 - -.. py:function:: paddle.dataset.sentiment.test() - -默认的测试集reader creator。 - -.. py:function:: paddle.dataset.sentiment.convert(path) - -将数据集转换为recordio格式。 - - - diff --git a/doc/paddle/api/alias_api_mapping b/doc/paddle/api/alias_api_mapping index 3a43a1cdf..2b2b4074e 100644 --- a/doc/paddle/api/alias_api_mapping +++ b/doc/paddle/api/alias_api_mapping @@ -561,7 +561,6 @@ paddle.text.datasets.conll05.Conll05st paddle.text.datasets.Conll05st,paddle.tex paddle.text.datasets.imdb.Imdb paddle.text.datasets.Imdb,paddle.text.Imdb paddle.text.datasets.imikolov.Imikolov paddle.text.datasets.Imikolov,paddle.text.Imikolov paddle.text.datasets.movielens.Movielens paddle.text.datasets.Movielens,paddle.text.Movielens -paddle.text.datasets.movie_reviews.MovieReviews paddle.text.datasets.MovieRevie,paddle.text.MovieRevie paddle.text.datasets.uci_housing.UCIHousing paddle.text.datasets.UCIHousing,paddle.text.UCIHousing paddle.text.datasets.wmt14.WMT14 paddle.text.datasets.WMT14,paddle.text.WMT14 paddle.text.datasets.wmt16.WMT16 paddle.text.datasets.WMT16,paddle.text.WMT16 diff --git a/doc/paddle/api/paddle/dataset/sentiment_cn.rst b/doc/paddle/api/paddle/dataset/sentiment_cn.rst deleted file mode 100644 index d5826830f..000000000 --- a/doc/paddle/api/paddle/dataset/sentiment_cn.rst +++ /dev/null @@ -1,28 +0,0 @@ -.. _cn_api_paddle_dataset_sentiment: - -sentiment -------------------------------- - -脚本获取并预处理由NLTK提供的movie_reviews数据集。 - - -.. py:function:: paddle.dataset.sentiment.get_word_dict() - -按照样本中出现的单词的频率对单词进行排序。 - -返回: words_freq_sorted - -.. py:function:: paddle.dataset.sentiment.train() - -默认的训练集reader creator。 - -.. py:function:: paddle.dataset.sentiment.test() - -默认的测试集reader creator。 - -.. py:function:: paddle.dataset.sentiment.convert(path) - -将数据集转换为recordio格式。 - - - diff --git a/doc/paddle/api/paddle/text/datasets/movie_reviews/MovieReviews_cn.rst b/doc/paddle/api/paddle/text/datasets/movie_reviews/MovieReviews_cn.rst deleted file mode 100644 index b4c99bccf..000000000 --- a/doc/paddle/api/paddle/text/datasets/movie_reviews/MovieReviews_cn.rst +++ /dev/null @@ -1,49 +0,0 @@ -.. _cn_api_text_datasets_MovieReviews: - -MovieReviews -------------------------------- - -.. py:class:: paddle.text.datasets.MovieReviews() - - -该类是对`NLTK movie reviews `_ 测试数据集的实现。 - -参数 -::::::::: - - data_file(str)- 保存压缩数据的路径,如果参数:attr:`download`设置为True, - 可设置为None。默认为None。 - - mode(str)- 'train'或 'test' 模式。默认为'train'。 - - download(bool)- 如果:attr:`data_file`未设置,是否自动下载数据集。默认为True。 - -返回值 -::::::::: -``Dataset``,NLTK movie reviews数据集实例。 - -代码示例 -::::::::: - -.. code-block:: python - - import paddle - from paddle.text.datasets import MovieReviews - - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() - - def forward(self, word, category): - return paddle.sum(word), category - - paddle.disable_static() - - movie_reviews = MovieReviews(mode='train') - - for i in range(10): - word_list, category = movie_reviews[i] - word_list = paddle.to_tensor(word_list) - category = paddle.to_tensor(category) - - model = SimpleNet() - word_list, category = model(word_list, category) - print(word_list.numpy().shape, category.numpy()) - diff --git a/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.ipynb b/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.ipynb index 8bb35cb0a..7fdef0b26 100644 --- a/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.ipynb +++ b/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.ipynb @@ -103,7 +103,7 @@ "output_type": "stream", "text": [ "视觉相关数据集: ['DatasetFolder', 'ImageFolder', 'MNIST', 'Flowers', 'Cifar10', 'Cifar100', 'VOC2012']\n", - "自然语言相关数据集: ['Conll05st', 'Imdb', 'Imikolov', 'Movielens', 'MovieReviews', 'UCIHousing', 'WMT14', 'WMT16']\n" + "自然语言相关数据集: ['Conll05st', 'Imdb', 'Imikolov', 'Movielens', 'UCIHousing', 'WMT14', 'WMT16']\n" ] } ], diff --git a/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.rst b/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.rst index 8cf941567..8f776df2b 100644 --- a/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.rst +++ b/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.rst @@ -84,7 +84,7 @@ paddle即可使用相关高层API,如:paddle.Model、视觉领域paddle.visi .. parsed-literal:: 视觉相关数据集: ['DatasetFolder', 'ImageFolder', 'MNIST', 'Flowers', 'Cifar10', 'Cifar100', 'VOC2012'] - 自然语言相关数据集: ['Conll05st', 'Imdb', 'Imikolov', 'Movielens', 'MovieReviews', 'UCIHousing', 'WMT14', 'WMT16'] + 自然语言相关数据集: ['Conll05st', 'Imdb', 'Imikolov', 'Movielens', 'UCIHousing', 'WMT14', 'WMT16'] 这里我们是加载一个手写数字识别的数据集,用\ ``mode``\ 来标识是训练数据还是测试数据集。数据集接口会自动从远端下载数据集到本机缓存目录\ ``~/.cache/paddle/dataset``\ 。 -- GitLab