提交 aa287e19 编写于 作者: L lilong12 提交者: Tao Luo

remove the dependency on ssl (#21712)

上级 37896e90
...@@ -26,14 +26,17 @@ import six ...@@ -26,14 +26,17 @@ import six
import collections import collections
from itertools import chain from itertools import chain
import os
import nltk import nltk
from nltk.corpus import movie_reviews from nltk.corpus import movie_reviews
import ssl import zipfile
ssl._create_default_https_context = ssl._create_unverified_context
from functools import cmp_to_key from functools import cmp_to_key
import paddle.dataset.common import paddle.dataset.common
URL = "https://corpora.bj.bcebos.com/movie_reviews%2Fmovie_reviews.zip"
MD5 = '155de2b77c6834dd8eea7cbe88e93acb'
__all__ = ['train', 'test', 'get_word_dict'] __all__ = ['train', 'test', 'get_word_dict']
NUM_TRAINING_INSTANCES = 1600 NUM_TRAINING_INSTANCES = 1600
NUM_TOTAL_INSTANCES = 2000 NUM_TOTAL_INSTANCES = 2000
...@@ -44,6 +47,14 @@ def download_data_if_not_yet(): ...@@ -44,6 +47,14 @@ def download_data_if_not_yet():
Download the data set, if the data set is not download. Download the data set, if the data set is not download.
""" """
try: try:
# download and extract movie_reviews.zip
paddle.dataset.common.download(
URL, 'corpora', md5sum=MD5, save_name='movie_reviews.zip')
path = os.path.join(paddle.dataset.common.DATA_HOME, 'corpora')
filename = os.path.join(path, 'movie_reviews.zip')
zip_file = zipfile.ZipFile(filename)
zip_file.extractall(path)
zip_file.close()
# make sure that nltk can find the data # make sure that nltk can find the data
if paddle.dataset.common.DATA_HOME not in nltk.data.path: if paddle.dataset.common.DATA_HOME not in nltk.data.path:
nltk.data.path.append(paddle.dataset.common.DATA_HOME) nltk.data.path.append(paddle.dataset.common.DATA_HOME)
......
...@@ -31,15 +31,6 @@ MD5 = '155de2b77c6834dd8eea7cbe88e93acb' ...@@ -31,15 +31,6 @@ MD5 = '155de2b77c6834dd8eea7cbe88e93acb'
class TestDatasetSentiment(unittest.TestCase): class TestDatasetSentiment(unittest.TestCase):
""" TestCases for Sentiment. """ """ TestCases for Sentiment. """
def setUp(self):
paddle.dataset.common.download(
URL, 'corpora', md5sum=MD5, save_name='movie_reviews.zip')
path = os.path.join(paddle.dataset.common.DATA_HOME, 'corpora')
filename = os.path.join(path, 'movie_reviews.zip')
zip_file = zipfile.ZipFile(filename)
zip_file.extractall(path)
zip_file.close()
def test_get_word_dict(self): def test_get_word_dict(self):
""" Testcase for get_word_dict. """ """ Testcase for get_word_dict. """
words_freq_sorted = paddle.dataset.sentiment.get_word_dict() words_freq_sorted = paddle.dataset.sentiment.get_word_dict()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册