提交 e915aa9c 编写于 作者: Y Your Name

fix bugs

上级 97270b9f
......@@ -34,7 +34,7 @@ import numpy
import paddle.v2.dataset.common
import tarfile
__all__ = ['train100', 'test100', 'train10', 'test10']
__all__ = ['train100', 'test100', 'train10', 'test10', 'convert']
URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/'
CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz'
......
......@@ -25,7 +25,7 @@ import gzip
import itertools
import paddle.v2.dataset.common
__all__ = ['test, get_dict', 'get_embedding']
__all__ = ['test, get_dict', 'get_embedding', 'convert']
DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5 = '387719152ae52d60422c016e92a742fc'
......@@ -229,7 +229,7 @@ def fetch():
paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5)
def convert():
def convert(path):
"""
Converts dataset to recordio format
"""
......
......@@ -28,7 +28,7 @@ import re
import string
import threading
__all__ = ['build_dict', 'train', 'test']
__all__ = ['build_dict', 'train', 'test', 'convert']
URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
MD5 = '7c2ac02c03563afcf9b574c7e56c153a'
......@@ -168,12 +168,10 @@ def fetch():
paddle.v2.dataset.common.download(URL, 'imdb', MD5)
def convert():
def convert(path):
"""
Converts dataset to recordio format
"""
word_dict = ds.imdb.word_dict()
paddle.v2.dataset.common.convert(path, lambda: train(word_dict), 10,
"imdb_train")
paddle.v2.dataset.common.convert(path, lambda: test(word_dict), 10,
"imdb_test")
w = word_dict()
paddle.v2.dataset.common.convert(path, lambda: train(w), 10, "imdb_train")
paddle.v2.dataset.common.convert(path, lambda: test(w), 10, "imdb_test")
......@@ -18,11 +18,11 @@ This module will download dataset from
http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set
into paddle reader creators.
"""
import paddle.v2.dataset.common as common
import paddle.v2.dataset.common
import collections
import tarfile
__all__ = ['train', 'test', 'build_dict']
__all__ = ['train', 'test', 'build_dict', 'convert']
URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
MD5 = '30177ea32e27c525793142b6bf2c8e2d'
......@@ -145,7 +145,7 @@ def test(word_idx, n, data_type=DataType.NGRAM):
def fetch():
common.download(URL, "imikolov", MD5)
paddle.v2.dataset.common.download(URL, "imikolov", MD5)
def convert(path):
......@@ -154,5 +154,7 @@ def convert(path):
"""
N = 5
word_dict = build_dict()
common.convert(path, train(word_dict, N), 10, "imikolov_train")
common.convert(path, test(word_dict, N), 10, "imikolov_test")
paddle.v2.dataset.common.convert(path,
train(word_dict, N), 10, "imikolov_train")
paddle.v2.dataset.common.convert(path,
test(word_dict, N), 10, "imikolov_test")
......@@ -21,7 +21,7 @@ import paddle.v2.dataset.common
import subprocess
import numpy
import platform
__all__ = ['train', 'test']
__all__ = ['train', 'test', 'convert']
URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/'
TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz'
......
......@@ -30,7 +30,8 @@ import functools
__all__ = [
'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id',
'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info'
'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info',
'convert'
]
age_table = [1, 18, 25, 35, 45, 50, 56]
......
......@@ -28,7 +28,7 @@ from nltk.corpus import movie_reviews
import paddle.v2.dataset.common
__all__ = ['train', 'test', 'get_word_dict']
__all__ = ['train', 'test', 'get_word_dict', 'convert']
NUM_TRAINING_INSTANCES = 1600
NUM_TOTAL_INSTANCES = 2000
......
......@@ -29,7 +29,7 @@ URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing
MD5 = 'd4accdce7a25600298819f8e28e8d593'
feature_names = [
'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
'PTRATIO', 'B', 'LSTAT'
'PTRATIO', 'B', 'LSTAT', 'convert'
]
UCI_TRAIN_DATA = None
......
......@@ -25,7 +25,7 @@ import gzip
import paddle.v2.dataset.common
from paddle.v2.parameters import Parameters
__all__ = ['train', 'test', 'build_dict']
__all__ = ['train', 'test', 'build_dict', 'convert']
URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册