提交 e915aa9c 编写于 作者: Y Your Name

fix bugs

上级 97270b9f
...@@ -34,7 +34,7 @@ import numpy ...@@ -34,7 +34,7 @@ import numpy
import paddle.v2.dataset.common import paddle.v2.dataset.common
import tarfile import tarfile
__all__ = ['train100', 'test100', 'train10', 'test10'] __all__ = ['train100', 'test100', 'train10', 'test10', 'convert']
URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/' URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/'
CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz' CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz'
......
...@@ -25,7 +25,7 @@ import gzip ...@@ -25,7 +25,7 @@ import gzip
import itertools import itertools
import paddle.v2.dataset.common import paddle.v2.dataset.common
__all__ = ['test, get_dict', 'get_embedding'] __all__ = ['test, get_dict', 'get_embedding', 'convert']
DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5 = '387719152ae52d60422c016e92a742fc' DATA_MD5 = '387719152ae52d60422c016e92a742fc'
...@@ -229,7 +229,7 @@ def fetch(): ...@@ -229,7 +229,7 @@ def fetch():
paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5) paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5)
def convert(): def convert(path):
""" """
Converts dataset to recordio format Converts dataset to recordio format
""" """
......
...@@ -28,7 +28,7 @@ import re ...@@ -28,7 +28,7 @@ import re
import string import string
import threading import threading
__all__ = ['build_dict', 'train', 'test'] __all__ = ['build_dict', 'train', 'test', 'convert']
URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz' URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
MD5 = '7c2ac02c03563afcf9b574c7e56c153a' MD5 = '7c2ac02c03563afcf9b574c7e56c153a'
...@@ -168,12 +168,10 @@ def fetch(): ...@@ -168,12 +168,10 @@ def fetch():
paddle.v2.dataset.common.download(URL, 'imdb', MD5) paddle.v2.dataset.common.download(URL, 'imdb', MD5)
def convert(): def convert(path):
""" """
Converts dataset to recordio format Converts dataset to recordio format
""" """
word_dict = ds.imdb.word_dict() w = word_dict()
paddle.v2.dataset.common.convert(path, lambda: train(word_dict), 10, paddle.v2.dataset.common.convert(path, lambda: train(w), 10, "imdb_train")
"imdb_train") paddle.v2.dataset.common.convert(path, lambda: test(w), 10, "imdb_test")
paddle.v2.dataset.common.convert(path, lambda: test(word_dict), 10,
"imdb_test")
...@@ -18,11 +18,11 @@ This module will download dataset from ...@@ -18,11 +18,11 @@ This module will download dataset from
http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set
into paddle reader creators. into paddle reader creators.
""" """
import paddle.v2.dataset.common as common import paddle.v2.dataset.common
import collections import collections
import tarfile import tarfile
__all__ = ['train', 'test', 'build_dict'] __all__ = ['train', 'test', 'build_dict', 'convert']
URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz' URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
MD5 = '30177ea32e27c525793142b6bf2c8e2d' MD5 = '30177ea32e27c525793142b6bf2c8e2d'
...@@ -145,7 +145,7 @@ def test(word_idx, n, data_type=DataType.NGRAM): ...@@ -145,7 +145,7 @@ def test(word_idx, n, data_type=DataType.NGRAM):
def fetch(): def fetch():
common.download(URL, "imikolov", MD5) paddle.v2.dataset.common.download(URL, "imikolov", MD5)
def convert(path): def convert(path):
...@@ -154,5 +154,7 @@ def convert(path): ...@@ -154,5 +154,7 @@ def convert(path):
""" """
N = 5 N = 5
word_dict = build_dict() word_dict = build_dict()
common.convert(path, train(word_dict, N), 10, "imikolov_train") paddle.v2.dataset.common.convert(path,
common.convert(path, test(word_dict, N), 10, "imikolov_test") train(word_dict, N), 10, "imikolov_train")
paddle.v2.dataset.common.convert(path,
test(word_dict, N), 10, "imikolov_test")
...@@ -21,7 +21,7 @@ import paddle.v2.dataset.common ...@@ -21,7 +21,7 @@ import paddle.v2.dataset.common
import subprocess import subprocess
import numpy import numpy
import platform import platform
__all__ = ['train', 'test'] __all__ = ['train', 'test', 'convert']
URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/' URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/'
TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz' TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz'
......
...@@ -30,7 +30,8 @@ import functools ...@@ -30,7 +30,8 @@ import functools
__all__ = [ __all__ = [
'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id', 'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id',
'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info' 'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info',
'convert'
] ]
age_table = [1, 18, 25, 35, 45, 50, 56] age_table = [1, 18, 25, 35, 45, 50, 56]
......
...@@ -28,7 +28,7 @@ from nltk.corpus import movie_reviews ...@@ -28,7 +28,7 @@ from nltk.corpus import movie_reviews
import paddle.v2.dataset.common import paddle.v2.dataset.common
__all__ = ['train', 'test', 'get_word_dict'] __all__ = ['train', 'test', 'get_word_dict', 'convert']
NUM_TRAINING_INSTANCES = 1600 NUM_TRAINING_INSTANCES = 1600
NUM_TOTAL_INSTANCES = 2000 NUM_TOTAL_INSTANCES = 2000
......
...@@ -29,7 +29,7 @@ URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing ...@@ -29,7 +29,7 @@ URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing
MD5 = 'd4accdce7a25600298819f8e28e8d593' MD5 = 'd4accdce7a25600298819f8e28e8d593'
feature_names = [ feature_names = [
'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
'PTRATIO', 'B', 'LSTAT' 'PTRATIO', 'B', 'LSTAT', 'convert'
] ]
UCI_TRAIN_DATA = None UCI_TRAIN_DATA = None
......
...@@ -25,7 +25,7 @@ import gzip ...@@ -25,7 +25,7 @@ import gzip
import paddle.v2.dataset.common import paddle.v2.dataset.common
from paddle.v2.parameters import Parameters from paddle.v2.parameters import Parameters
__all__ = ['train', 'test', 'build_dict'] __all__ = ['train', 'test', 'build_dict', 'convert']
URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz' URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册