remove the dependency on ssl (#21712)

aa287e19 · lilong12 · Tao Luo · 37896e90 · aa287e19 · aa287e19
Showing with 13 addition and 11 deletion

python/paddle/dataset/sentiment.py python/paddle/dataset/sentiment.py +13 -2

python/paddle/fluid/tests/unittests/test_dataset_sentiment.py ...on/paddle/fluid/tests/unittests/test_dataset_sentiment.py +0 -9

未找到文件。
--- a/python/paddle/dataset/sentiment.py
+++ b/python/paddle/dataset/sentiment.py
@@ -26,14 +26,17 @@ import six
 import collections
 from itertools import chain
+import os
 import nltk
 from nltk.corpus import movie_reviews
-import ssl
+import zipfile
-ssl._create_default_https_context = ssl._create_unverified_context
 from functools import cmp_to_key
 import paddle.dataset.common
+URL = "https://corpora.bj.bcebos.com/movie_reviews%2Fmovie_reviews.zip"
+MD5 = '155de2b77c6834dd8eea7cbe88e93acb'
 __all__ = ['train', 'test', 'get_word_dict']
 NUM_TRAINING_INSTANCES = 1600
 NUM_TOTAL_INSTANCES = 2000
@@ -44,6 +47,14 @@ def download_data_if_not_yet():
    Download the data set, if the data set is not download.
    """
    try:
+        # download and extract movie_reviews.zip
+        paddle.dataset.common.download(
+            URL, 'corpora', md5sum=MD5, save_name='movie_reviews.zip')
+        path = os.path.join(paddle.dataset.common.DATA_HOME, 'corpora')
+        filename = os.path.join(path, 'movie_reviews.zip')
+        zip_file = zipfile.ZipFile(filename)
+        zip_file.extractall(path)
+        zip_file.close()
        # make sure that nltk can find the data
        if paddle.dataset.common.DATA_HOME not in nltk.data.path:
            nltk.data.path.append(paddle.dataset.common.DATA_HOME)

--- a/python/paddle/fluid/tests/unittests/test_dataset_sentiment.py
+++ b/python/paddle/fluid/tests/unittests/test_dataset_sentiment.py
@@ -31,15 +31,6 @@ MD5 = '155de2b77c6834dd8eea7cbe88e93acb'
 class TestDatasetSentiment(unittest.TestCase):
    """  TestCases for Sentiment. """
-    def setUp(self):
-        paddle.dataset.common.download(
-            URL, 'corpora', md5sum=MD5, save_name='movie_reviews.zip')
-        path = os.path.join(paddle.dataset.common.DATA_HOME, 'corpora')
-        filename = os.path.join(path, 'movie_reviews.zip')
-        zip_file = zipfile.ZipFile(filename)
-        zip_file.extractall(path)
-        zip_file.close()
    def test_get_word_dict(self):
        """ Testcase for get_word_dict. """
        words_freq_sorted = paddle.dataset.sentiment.get_word_dict()