diff --git a/doc/fluid/api/data/dataset.rst b/doc/fluid/api/data/dataset.rst index 52d7c44ac948b54c167c58d57a580ccd32c95d48..420a5fc75d8e3bb403fcf9f705dd80efc24044aa 100644 --- a/doc/fluid/api/data/dataset.rst +++ b/doc/fluid/api/data/dataset.rst @@ -10,7 +10,6 @@ dataset dataset/imdb.rst dataset/imikolov.rst dataset/movielens.rst - dataset/sentiment.rst dataset/uci_housing.rst dataset/wmt14.rst dataset/wmt16.rst diff --git a/doc/fluid/api/data/dataset/sentiment.rst b/doc/fluid/api/data/dataset/sentiment.rst deleted file mode 100644 index 6713c9ef983a31d4c495ed87394f383d0080b529..0000000000000000000000000000000000000000 --- a/doc/fluid/api/data/dataset/sentiment.rst +++ /dev/null @@ -1,6 +0,0 @@ -sentiment -+++++++++ - -.. automodule:: paddle.dataset.sentiment - :members: - :noindex: diff --git a/doc/fluid/api_cn/data/dataset_cn.rst b/doc/fluid/api_cn/data/dataset_cn.rst index 87017bd66993881d0cfba8c93bdaf92d31592512..17a6a32b855cdcc4f52a77e4639d6a8c8a87eeb4 100644 --- a/doc/fluid/api_cn/data/dataset_cn.rst +++ b/doc/fluid/api_cn/data/dataset_cn.rst @@ -14,7 +14,6 @@ dataset dataset_cn/imdb_cn.rst dataset_cn/imikolov_cn.rst dataset_cn/movielens_cn.rst - dataset_cn/sentiment_cn.rst dataset_cn/uci_housing_cn.rst dataset_cn/wmt14_cn.rst dataset_cn/wmt16_cn.rst diff --git a/doc/fluid/api_cn/data/dataset_cn/sentiment_cn.rst b/doc/fluid/api_cn/data/dataset_cn/sentiment_cn.rst deleted file mode 100644 index d5826830f07689805114028f6b387731e4770be2..0000000000000000000000000000000000000000 --- a/doc/fluid/api_cn/data/dataset_cn/sentiment_cn.rst +++ /dev/null @@ -1,28 +0,0 @@ -.. _cn_api_paddle_dataset_sentiment: - -sentiment -------------------------------- - -脚本获取并预处理由NLTK提供的movie_reviews数据集。 - - -.. py:function:: paddle.dataset.sentiment.get_word_dict() - -按照样本中出现的单词的频率对单词进行排序。 - -返回: words_freq_sorted - -.. py:function:: paddle.dataset.sentiment.train() - -默认的训练集reader creator。 - -.. py:function:: paddle.dataset.sentiment.test() - -默认的测试集reader creator。 - -.. py:function:: paddle.dataset.sentiment.convert(path) - -将数据集转换为recordio格式。 - - - diff --git a/doc/paddle/api/alias_api_mapping b/doc/paddle/api/alias_api_mapping index 3a43a1cdf51facf13c9a1f8c255d5fd3228bfec4..2b2b4074e8b17c28f97ed137afce32e469ce55fa 100644 --- a/doc/paddle/api/alias_api_mapping +++ b/doc/paddle/api/alias_api_mapping @@ -561,7 +561,6 @@ paddle.text.datasets.conll05.Conll05st paddle.text.datasets.Conll05st,paddle.tex paddle.text.datasets.imdb.Imdb paddle.text.datasets.Imdb,paddle.text.Imdb paddle.text.datasets.imikolov.Imikolov paddle.text.datasets.Imikolov,paddle.text.Imikolov paddle.text.datasets.movielens.Movielens paddle.text.datasets.Movielens,paddle.text.Movielens -paddle.text.datasets.movie_reviews.MovieReviews paddle.text.datasets.MovieRevie,paddle.text.MovieRevie paddle.text.datasets.uci_housing.UCIHousing paddle.text.datasets.UCIHousing,paddle.text.UCIHousing paddle.text.datasets.wmt14.WMT14 paddle.text.datasets.WMT14,paddle.text.WMT14 paddle.text.datasets.wmt16.WMT16 paddle.text.datasets.WMT16,paddle.text.WMT16 diff --git a/doc/paddle/api/paddle/dataset/sentiment_cn.rst b/doc/paddle/api/paddle/dataset/sentiment_cn.rst deleted file mode 100644 index d5826830f07689805114028f6b387731e4770be2..0000000000000000000000000000000000000000 --- a/doc/paddle/api/paddle/dataset/sentiment_cn.rst +++ /dev/null @@ -1,28 +0,0 @@ -.. _cn_api_paddle_dataset_sentiment: - -sentiment -------------------------------- - -脚本获取并预处理由NLTK提供的movie_reviews数据集。 - - -.. py:function:: paddle.dataset.sentiment.get_word_dict() - -按照样本中出现的单词的频率对单词进行排序。 - -返回: words_freq_sorted - -.. py:function:: paddle.dataset.sentiment.train() - -默认的训练集reader creator。 - -.. py:function:: paddle.dataset.sentiment.test() - -默认的测试集reader creator。 - -.. py:function:: paddle.dataset.sentiment.convert(path) - -将数据集转换为recordio格式。 - - - diff --git a/doc/paddle/api/paddle/text/datasets/movie_reviews/MovieReviews_cn.rst b/doc/paddle/api/paddle/text/datasets/movie_reviews/MovieReviews_cn.rst deleted file mode 100644 index b4c99bccf3c72776f9f805445791918d08282eda..0000000000000000000000000000000000000000 --- a/doc/paddle/api/paddle/text/datasets/movie_reviews/MovieReviews_cn.rst +++ /dev/null @@ -1,49 +0,0 @@ -.. _cn_api_text_datasets_MovieReviews: - -MovieReviews -------------------------------- - -.. py:class:: paddle.text.datasets.MovieReviews() - - -该类是对`NLTK movie reviews `_ 测试数据集的实现。 - -参数 -::::::::: - - data_file(str)- 保存压缩数据的路径,如果参数:attr:`download`设置为True, - 可设置为None。默认为None。 - - mode(str)- 'train'或 'test' 模式。默认为'train'。 - - download(bool)- 如果:attr:`data_file`未设置,是否自动下载数据集。默认为True。 - -返回值 -::::::::: -``Dataset``,NLTK movie reviews数据集实例。 - -代码示例 -::::::::: - -.. code-block:: python - - import paddle - from paddle.text.datasets import MovieReviews - - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() - - def forward(self, word, category): - return paddle.sum(word), category - - paddle.disable_static() - - movie_reviews = MovieReviews(mode='train') - - for i in range(10): - word_list, category = movie_reviews[i] - word_list = paddle.to_tensor(word_list) - category = paddle.to_tensor(category) - - model = SimpleNet() - word_list, category = model(word_list, category) - print(word_list.numpy().shape, category.numpy()) - diff --git a/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.ipynb b/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.ipynb index 8bb35cb0a9ddc5075f43707688e324eb0255ffb3..7fdef0b264743b671b7d7a2712852b7658272309 100644 --- a/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.ipynb +++ b/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.ipynb @@ -103,7 +103,7 @@ "output_type": "stream", "text": [ "视觉相关数据集: ['DatasetFolder', 'ImageFolder', 'MNIST', 'Flowers', 'Cifar10', 'Cifar100', 'VOC2012']\n", - "自然语言相关数据集: ['Conll05st', 'Imdb', 'Imikolov', 'Movielens', 'MovieReviews', 'UCIHousing', 'WMT14', 'WMT16']\n" + "自然语言相关数据集: ['Conll05st', 'Imdb', 'Imikolov', 'Movielens', 'UCIHousing', 'WMT14', 'WMT16']\n" ] } ], diff --git a/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.rst b/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.rst index 8cf94156740e5812d20142de88b8c0cd52339271..8f776df2bbf4648272171f8b7a72f2eb7ba20714 100644 --- a/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.rst +++ b/doc/paddle/tutorial/quick_start/high_level_api/high_level_api.rst @@ -84,7 +84,7 @@ paddle即可使用相关高层API,如:paddle.Model、视觉领域paddle.visi .. parsed-literal:: 视觉相关数据集: ['DatasetFolder', 'ImageFolder', 'MNIST', 'Flowers', 'Cifar10', 'Cifar100', 'VOC2012'] - 自然语言相关数据集: ['Conll05st', 'Imdb', 'Imikolov', 'Movielens', 'MovieReviews', 'UCIHousing', 'WMT14', 'WMT16'] + 自然语言相关数据集: ['Conll05st', 'Imdb', 'Imikolov', 'Movielens', 'UCIHousing', 'WMT14', 'WMT16'] 这里我们是加载一个手写数字识别的数据集,用\ ``mode``\ 来标识是训练数据还是测试数据集。数据集接口会自动从远端下载数据集到本机缓存目录\ ``~/.cache/paddle/dataset``\ 。