未验证 提交 e75ded08 编写于 作者: L lilong12 提交者: GitHub

fix the compatiable problem between PY2 and PY3 (issue#20749) (#20942)

* fix the compatiable problem between PY2 and PY3.

* add ut, test=develop

* add proxy, test=develop

* download dataset before test, test=develop
上级 3662fb71
...@@ -28,6 +28,9 @@ from itertools import chain ...@@ -28,6 +28,9 @@ from itertools import chain
import nltk import nltk
from nltk.corpus import movie_reviews from nltk.corpus import movie_reviews
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
from functools import cmp_to_key
import paddle.dataset.common import paddle.dataset.common
...@@ -68,7 +71,7 @@ def get_word_dict(): ...@@ -68,7 +71,7 @@ def get_word_dict():
for words in movie_reviews.words(field): for words in movie_reviews.words(field):
word_freq_dict[words] += 1 word_freq_dict[words] += 1
words_sort_list = list(six.iteritems(word_freq_dict)) words_sort_list = list(six.iteritems(word_freq_dict))
words_sort_list.sort(cmp=lambda a, b: b[1] - a[1]) words_sort_list.sort(key=cmp_to_key(lambda a, b: b[1] - a[1]))
for index, word in enumerate(words_sort_list): for index, word in enumerate(words_sort_list):
words_freq_sorted.append((word[0], index)) words_freq_sorted.append((word[0], index))
return words_freq_sorted return words_freq_sorted
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
TestCases for Dataset,
including create, config, run, etc.
"""
from __future__ import print_function
import numpy as np
import unittest
import os
import paddle
import zipfile
import paddle.dataset.common
URL = "https://corpora.bj.bcebos.com/movie_reviews%2Fmovie_reviews.zip"
MD5 = '155de2b77c6834dd8eea7cbe88e93acb'
class TestDatasetSentiment(unittest.TestCase):
""" TestCases for Sentiment. """
def setUp(self):
paddle.dataset.common.download(
URL, 'corpora', md5sum=MD5, save_name='movie_reviews.zip')
path = os.path.join(paddle.dataset.common.DATA_HOME, 'corpora')
filename = os.path.join(path, 'movie_reviews.zip')
zip_file = zipfile.ZipFile(filename)
zip_file.extractall(path)
zip_file.close()
def test_get_word_dict(self):
""" Testcase for get_word_dict. """
words_freq_sorted = paddle.dataset.sentiment.get_word_dict()
print(words_freq_sorted)
self.assertTrue(len(words_freq_sorted) == 39768)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册