diff --git a/python/paddle/dataset/__init__.py b/python/paddle/dataset/__init__.py
index 54aa3edc51d3734633ce077a59bd86cec8d09032..d1e5975856515b1fc4f6aba67e8a110e3288cc33 100644
--- a/python/paddle/dataset/__init__.py
+++ b/python/paddle/dataset/__init__.py
@@ -22,7 +22,6 @@ import paddle.dataset.cifar
 import paddle.dataset.movielens
 import paddle.dataset.conll05
 import paddle.dataset.uci_housing
-import paddle.dataset.sentiment
 import paddle.dataset.wmt14
 import paddle.dataset.wmt16
 import paddle.dataset.mq2007
@@ -37,7 +36,6 @@ __all__ = [
     'cifar',
     'movielens',
     'conll05',
-    'sentiment',
     'uci_housing',
     'wmt14',
     'wmt16',
diff --git a/python/paddle/dataset/sentiment.py b/python/paddle/dataset/sentiment.py
deleted file mode 100644
index 721cb5a819282d5ef130de4d4596116326349d71..0000000000000000000000000000000000000000
--- a/python/paddle/dataset/sentiment.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# /usr/bin/env python
-# -*- coding:utf-8 -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-The script fetch and preprocess movie_reviews data set that provided by NLTK
-
-TODO(yuyang18): Complete dataset.
-"""
-
-from __future__ import print_function
-
-import six
-import collections
-from itertools import chain
-
-import os
-import nltk
-from nltk.corpus import movie_reviews
-import zipfile
-from functools import cmp_to_key
-
-import paddle.dataset.common
-
-URL = "https://corpora.bj.bcebos.com/movie_reviews%2Fmovie_reviews.zip"
-MD5 = '155de2b77c6834dd8eea7cbe88e93acb'
-
-__all__ = ['train', 'test', 'get_word_dict']
-NUM_TRAINING_INSTANCES = 1600
-NUM_TOTAL_INSTANCES = 2000
-
-
-def download_data_if_not_yet():
-    """
-    Download the data set, if the data set is not download.
-    """
-    try:
-        # download and extract movie_reviews.zip
-        paddle.dataset.common.download(
-            URL, 'corpora', md5sum=MD5, save_name='movie_reviews.zip')
-        path = os.path.join(paddle.dataset.common.DATA_HOME, 'corpora')
-        filename = os.path.join(path, 'movie_reviews.zip')
-        zip_file = zipfile.ZipFile(filename)
-        zip_file.extractall(path)
-        zip_file.close()
-        # make sure that nltk can find the data
-        if paddle.dataset.common.DATA_HOME not in nltk.data.path:
-            nltk.data.path.append(paddle.dataset.common.DATA_HOME)
-        movie_reviews.categories()
-    except LookupError:
-        print("Downloading movie_reviews data set, please wait.....")
-        nltk.download(
-            'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME)
-        print("Download data set success.....")
-        print("Path is " + nltk.data.find('corpora/movie_reviews').path)
-
-
-def get_word_dict():
-    """
-    Sorted the words by the frequency of words which occur in sample
-    :return:
-        words_freq_sorted
-    """
-    words_freq_sorted = list()
-    word_freq_dict = collections.defaultdict(int)
-    download_data_if_not_yet()
-
-    for category in movie_reviews.categories():
-        for field in movie_reviews.fileids(category):
-            for words in movie_reviews.words(field):
-                word_freq_dict[words] += 1
-    words_sort_list = list(six.iteritems(word_freq_dict))
-    words_sort_list.sort(key=cmp_to_key(lambda a, b: b[1] - a[1]))
-    for index, word in enumerate(words_sort_list):
-        words_freq_sorted.append((word[0], index))
-    return words_freq_sorted
-
-
-def sort_files():
-    """
-    Sorted the sample for cross reading the sample
-    :return:
-        files_list
-    """
-    files_list = list()
-    neg_file_list = movie_reviews.fileids('neg')
-    pos_file_list = movie_reviews.fileids('pos')
-    files_list = list(
-        chain.from_iterable(list(zip(neg_file_list, pos_file_list))))
-    return files_list
-
-
-def load_sentiment_data():
-    """
-    Load the data set
-    :return:
-        data_set
-    """
-    data_set = list()
-    download_data_if_not_yet()
-    words_ids = dict(get_word_dict())
-    for sample_file in sort_files():
-        words_list = list()
-        category = 0 if 'neg' in sample_file else 1
-        for word in movie_reviews.words(sample_file):
-            words_list.append(words_ids[word.lower()])
-        data_set.append((words_list, category))
-    return data_set
-
-
-def reader_creator(data):
-    """
-    Reader creator, generate an iterator for data set
-    :param data:
-        train data set or test data set
-    """
-    for each in data:
-        yield each[0], each[1]
-
-
-def train():
-    """
-    Default training set reader creator
-    """
-    data_set = load_sentiment_data()
-    return reader_creator(data_set[0:NUM_TRAINING_INSTANCES])
-
-
-def test():
-    """
-    Default test set reader creator
-    """
-    data_set = load_sentiment_data()
-    return reader_creator(data_set[NUM_TRAINING_INSTANCES:])
-
-
-def fetch():
-    nltk.download('movie_reviews', download_dir=paddle.dataset.common.DATA_HOME)
diff --git a/python/paddle/dataset/tests/test_sentiment.py b/python/paddle/dataset/tests/test_sentiment.py
deleted file mode 100644
index 3540ea06b075ed9b649af803c5a655a1e737723b..0000000000000000000000000000000000000000
--- a/python/paddle/dataset/tests/test_sentiment.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# /usr/bin/env python
-# -*- coding:utf-8 -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import unittest
-import nltk
-import paddle.dataset.sentiment as st
-from nltk.corpus import movie_reviews
-
-
-class TestSentimentMethods(unittest.TestCase):
-    def test_get_word_dict(self):
-        word_dict = st.get_word_dict()[0:10]
-        test_word_list = [(',', 0), ('the', 1), ('.', 2), ('a', 3), ('and', 4),
-                          ('of', 5), ('to', 6), ("'", 7), ('is', 8), ('in', 9)]
-        for idx, each in enumerate(word_dict):
-            self.assertEqual(each, test_word_list[idx])
-        self.assertTrue("/root/.cache/paddle/dataset" in nltk.data.path)
-
-    def test_sort_files(self):
-        last_label = ''
-        for sample_file in st.sort_files():
-            current_label = sample_file.split("/")[0]
-            self.assertNotEqual(current_label, last_label)
-            last_label = current_label
-
-    def test_data_set(self):
-        data_set = st.load_sentiment_data()
-        last_label = -1
-
-        for each in st.test():
-            self.assertNotEqual(each[1], last_label)
-            last_label = each[1]
-
-        self.assertEqual(len(data_set), st.NUM_TOTAL_INSTANCES)
-        self.assertEqual(len(list(st.train())), st.NUM_TRAINING_INSTANCES)
-        self.assertEqual(
-            len(list(st.test())),
-            (st.NUM_TOTAL_INSTANCES - st.NUM_TRAINING_INSTANCES))
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_dataset_sentiment.py b/python/paddle/fluid/tests/unittests/test_dataset_sentiment.py
deleted file mode 100644
index b5d5d33fa3fc32a054c23c80d471ce70dd745d08..0000000000000000000000000000000000000000
--- a/python/paddle/fluid/tests/unittests/test_dataset_sentiment.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-TestCases for Dataset,
-including create, config, run, etc.
-"""
-
-from __future__ import print_function
-import numpy as np
-import unittest
-import os
-import paddle
-import zipfile
-import paddle.dataset.common
-
-URL = "https://corpora.bj.bcebos.com/movie_reviews%2Fmovie_reviews.zip"
-MD5 = '155de2b77c6834dd8eea7cbe88e93acb'
-
-
-class TestDatasetSentiment(unittest.TestCase):
-    """  TestCases for Sentiment. """
-
-    def test_get_word_dict(self):
-        """ Testcase for get_word_dict. """
-        words_freq_sorted = paddle.dataset.sentiment.get_word_dict()
-        print(words_freq_sorted)
-        self.assertTrue(len(words_freq_sorted) == 39768)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/tests/test_dataset_movie_reviews.py b/python/paddle/tests/test_dataset_movie_reviews.py
deleted file mode 100644
index e6e6667013f89aca305f82a744c00de2af818736..0000000000000000000000000000000000000000
--- a/python/paddle/tests/test_dataset_movie_reviews.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-import numpy as np
-
-from paddle.text.datasets import *
-
-
-class TestMovieReviewsTrain(unittest.TestCase):
-    def test_main(self):
-        movie_reviews = MovieReviews(mode='train')
-        self.assertTrue(len(movie_reviews) == 1600)
-
-        # traversal whole dataset may cost a
-        # long time, randomly check 1 sample
-        idx = np.random.randint(0, 1600)
-        data = movie_reviews[idx]
-        self.assertTrue(len(data) == 2)
-        self.assertTrue(len(data[0].shape) == 1)
-        self.assertTrue(int(data[1]) in [0, 1])
-
-
-class TestMovieReviewsTest(unittest.TestCase):
-    def test_main(self):
-        movie_reviews = MovieReviews(mode='test')
-        self.assertTrue(len(movie_reviews) == 400)
-
-        # traversal whole dataset may cost a
-        # long time, randomly check 1 sample
-        idx = np.random.randint(0, 400)
-        data = movie_reviews[idx]
-        self.assertTrue(len(data) == 2)
-        self.assertTrue(len(data[0].shape) == 1)
-        self.assertTrue(int(data[1]) in [0, 1])
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/text/datasets/__init__.py b/python/paddle/text/datasets/__init__.py
index b5cea40a4f4924fee7a76bad6030a21fa5a61268..71571d09b5c2bde8ba970624195973d2a1771789 100644
--- a/python/paddle/text/datasets/__init__.py
+++ b/python/paddle/text/datasets/__init__.py
@@ -16,7 +16,6 @@ from . import conll05
 from . import imdb
 from . import imikolov
 from . import movielens
-from . import movie_reviews
 from . import uci_housing
 from . import wmt14
 from . import wmt16
@@ -25,7 +24,6 @@ from .conll05 import *
 from .imdb import *
 from .imikolov import *
 from .movielens import *
-from .movie_reviews import *
 from .uci_housing import *
 from .wmt14 import *
 from .wmt16 import *
@@ -34,7 +32,6 @@ __all__ = conll05.__all__ \
           + imdb.__all__ \
           + imikolov.__all__ \
           + movielens.__all__ \
-          + movie_reviews.__all__ \
           + uci_housing.__all__ \
           + wmt14.__all__ \
           + wmt16.__all__
diff --git a/python/paddle/text/datasets/movie_reviews.py b/python/paddle/text/datasets/movie_reviews.py
deleted file mode 100644
index db5b15654f96712abc842ca0c99654c1b7378808..0000000000000000000000000000000000000000
--- a/python/paddle/text/datasets/movie_reviews.py
+++ /dev/null
@@ -1,173 +0,0 @@
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-import os
-import six
-import numpy as np
-import collections
-import nltk
-from nltk.corpus import movie_reviews
-import zipfile
-from functools import cmp_to_key
-from itertools import chain
-
-import paddle
-from paddle.io import Dataset
-
-__all__ = ['MovieReviews']
-
-URL = "https://corpora.bj.bcebos.com/movie_reviews%2Fmovie_reviews.zip"
-MD5 = '155de2b77c6834dd8eea7cbe88e93acb'
-
-NUM_TRAINING_INSTANCES = 1600
-NUM_TOTAL_INSTANCES = 2000
-
-
-class MovieReviews(Dataset):
-    """
-    Implementation of `NLTK movie reviews <http://www.nltk.org/nltk_data/>`_ dataset.
-
-    Args:
-        data_file(str): path to data tar file, can be set None if
-            :attr:`download` is True. Default None
-        mode(str): 'train' 'test' mode. Default 'train'.
-        download(bool): whether auto download cifar dataset if
-            :attr:`data_file` unset. Default True.
-
-    Returns:
-        Dataset: instance of movie reviews dataset
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle
-            from paddle.text.datasets import MovieReviews
-
-            class SimpleNet(paddle.nn.Layer):
-                def __init__(self):
-                    super(SimpleNet, self).__init__()
-
-                def forward(self, word, category):
-                    return paddle.sum(word), category
-
-            paddle.disable_static()
-
-            movie_reviews = MovieReviews(mode='train')
-
-            for i in range(10):
-                word_list, category = movie_reviews[i]
-                word_list = paddle.to_tensor(word_list)
-                category = paddle.to_tensor(category)
-
-                model = SimpleNet()
-                word_list, category = model(word_list, category)
-                print(word_list.numpy().shape, category.numpy())
-
-    """
-
-    def __init__(self, mode='train'):
-        assert mode.lower() in ['train', 'test'], \
-            "mode should be 'train', 'test', but got {}".format(mode)
-        self.mode = mode.lower()
-
-        self._download_data_if_not_yet()
-
-        # read dataset into memory
-        self._load_sentiment_data()
-
-    def _get_word_dict(self):
-        """
-        Sorted the words by the frequency of words which occur in sample
-        :return:
-            words_freq_sorted
-        """
-        words_freq_sorted = list()
-        word_freq_dict = collections.defaultdict(int)
-
-        for category in movie_reviews.categories():
-            for field in movie_reviews.fileids(category):
-                for words in movie_reviews.words(field):
-                    word_freq_dict[words] += 1
-        words_sort_list = list(six.iteritems(word_freq_dict))
-        words_sort_list.sort(key=cmp_to_key(lambda a, b: b[1] - a[1]))
-        for index, word in enumerate(words_sort_list):
-            words_freq_sorted.append((word[0], index))
-        return words_freq_sorted
-
-    def _sort_files(self):
-        """
-        Sorted the sample for cross reading the sample
-        :return:
-            files_list
-        """
-        files_list = list()
-        neg_file_list = movie_reviews.fileids('neg')
-        pos_file_list = movie_reviews.fileids('pos')
-        files_list = list(
-            chain.from_iterable(list(zip(neg_file_list, pos_file_list))))
-        return files_list
-
-    def _load_sentiment_data(self):
-        """
-        Load the data set
-        :return:
-            data_set
-        """
-        self.data = []
-        words_ids = dict(self._get_word_dict())
-        for sample_file in self._sort_files():
-            words_list = list()
-            category = 0 if 'neg' in sample_file else 1
-            for word in movie_reviews.words(sample_file):
-                words_list.append(words_ids[word.lower()])
-            self.data.append((words_list, category))
-
-    def _download_data_if_not_yet(self):
-        """
-        Download the data set, if the data set is not download.
-        """
-        try:
-            # download and extract movie_reviews.zip
-            paddle.dataset.common.download(
-                URL, 'corpora', md5sum=MD5, save_name='movie_reviews.zip')
-            path = os.path.join(paddle.dataset.common.DATA_HOME, 'corpora')
-            filename = os.path.join(path, 'movie_reviews.zip')
-            zip_file = zipfile.ZipFile(filename)
-            zip_file.extractall(path)
-            zip_file.close()
-            # make sure that nltk can find the data
-            if paddle.dataset.common.DATA_HOME not in nltk.data.path:
-                nltk.data.path.append(paddle.dataset.common.DATA_HOME)
-            movie_reviews.categories()
-        except LookupError:
-            print("Downloading movie_reviews data set, please wait.....")
-            nltk.download(
-                'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME)
-            print("Download data set success.....")
-            print("Path is " + nltk.data.find('corpora/movie_reviews').path)
-
-    def __getitem__(self, idx):
-        if self.mode == 'test':
-            idx += NUM_TRAINING_INSTANCES
-        data = self.data[idx]
-        return np.array(data[0]), np.array(data[1])
-
-    def __len__(self):
-        if self.mode == 'train':
-            return NUM_TRAINING_INSTANCES
-        else:
-            return NUM_TOTAL_INSTANCES - NUM_TRAINING_INSTANCES