未验证 提交 b5247cac 编写于 作者: H Helin Wang 提交者: GitHub

Merge pull request #9414 from helinwang/move_reader_dataset

Make paddle.fluid no longer depends on paddle.v2
......@@ -73,12 +73,13 @@ add_custom_target(paddle_python ALL DEPENDS ${paddle_python_deps})
set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)
if (WITH_TESTING)
add_subdirectory(paddle/reader/tests)
add_subdirectory(paddle/dataset/tests)
if(NOT WITH_FLUID_ONLY)
add_subdirectory(paddle/trainer_config_helpers/tests)
if (WITH_SWIG_PY)
# enable v2 API unittest only when paddle swig api is compiled
add_subdirectory(paddle/v2/tests)
add_subdirectory(paddle/v2/reader/tests)
add_subdirectory(paddle/v2/plot/tests)
endif()
endif()
......
......@@ -14,8 +14,14 @@
try:
from version import full_version as __version__
from version import commit as __git_commit__
except ImportError:
import sys
sys.stderr.write('''Warning with import paddle: you should not
import paddle from the source directory; please install paddlepaddle*.whl firstly.'''
)
import reader
import dataset
import batch
batch = batch.batch
......@@ -28,6 +28,7 @@ import wmt16
import mq2007
import flowers
import voc2012
import image
__all__ = [
'mnist',
......@@ -43,4 +44,5 @@ __all__ = [
'mq2007',
'flowers',
'voc2012',
'image',
]
......@@ -31,7 +31,7 @@ images per class.
import cPickle
import itertools
import numpy
import paddle.v2.dataset.common
import paddle.dataset.common
import tarfile
__all__ = ['train100', 'test100', 'train10', 'test10', 'convert']
......@@ -75,7 +75,7 @@ def train100():
:rtype: callable
"""
return reader_creator(
paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
paddle.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
'train')
......@@ -90,7 +90,7 @@ def test100():
:rtype: callable
"""
return reader_creator(
paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
paddle.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
'test')
......@@ -105,7 +105,7 @@ def train10():
:rtype: callable
"""
return reader_creator(
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
'data_batch')
......@@ -120,20 +120,20 @@ def test10():
:rtype: callable
"""
return reader_creator(
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
'test_batch')
def fetch():
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5)
paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5)
paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5)
paddle.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5)
def convert(path):
"""
Converts dataset to recordio format
"""
paddle.v2.dataset.common.convert(path, train100(), 1000, "cifar_train100")
paddle.v2.dataset.common.convert(path, test100(), 1000, "cifar_test100")
paddle.v2.dataset.common.convert(path, train10(), 1000, "cifar_train10")
paddle.v2.dataset.common.convert(path, test10(), 1000, "cifar_test10")
paddle.dataset.common.convert(path, train100(), 1000, "cifar_train100")
paddle.dataset.common.convert(path, test100(), 1000, "cifar_test100")
paddle.dataset.common.convert(path, train10(), 1000, "cifar_train10")
paddle.dataset.common.convert(path, test10(), 1000, "cifar_test10")
......@@ -19,7 +19,7 @@ import errno
import shutil
import sys
import importlib
import paddle.v2.dataset
import paddle.dataset
import cPickle
import glob
import cPickle as pickle
......@@ -105,24 +105,24 @@ def download(url, module_name, md5sum, save_name=None):
def fetch_all():
for module_name in filter(lambda x: not x.startswith("__"),
dir(paddle.v2.dataset)):
dir(paddle.dataset)):
if "fetch" in dir(
importlib.import_module("paddle.v2.dataset.%s" % module_name)):
importlib.import_module("paddle.dataset.%s" % module_name)):
getattr(
importlib.import_module("paddle.v2.dataset.%s" % module_name),
importlib.import_module("paddle.dataset.%s" % module_name),
"fetch")()
def fetch_all_recordio(path):
for module_name in filter(lambda x: not x.startswith("__"),
dir(paddle.v2.dataset)):
dir(paddle.dataset)):
if "convert" in dir(
importlib.import_module("paddle.v2.dataset.%s" % module_name)) and \
importlib.import_module("paddle.dataset.%s" % module_name)) and \
not module_name == "common":
ds_path = os.path.join(path, module_name)
must_mkdirs(ds_path)
getattr(
importlib.import_module("paddle.v2.dataset.%s" % module_name),
importlib.import_module("paddle.dataset.%s" % module_name),
"convert")(ds_path)
......@@ -130,7 +130,7 @@ def split(reader, line_count, suffix="%05d.pickle", dumper=cPickle.dump):
"""
you can call the function as:
split(paddle.v2.dataset.cifar.train10(), line_count=1000,
split(paddle.dataset.cifar.train10(), line_count=1000,
suffix="imikolov-train-%05d.pickle")
the output files as:
......
......@@ -23,7 +23,7 @@ to initialize SRL model.
import tarfile
import gzip
import itertools
import paddle.v2.dataset.common
import paddle.dataset.common
__all__ = ['test, get_dict', 'get_embedding', 'convert']
......@@ -203,14 +203,11 @@ def get_dict():
Get the word, verb and label dictionary of Wikipedia corpus.
"""
word_dict = load_dict(
paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st',
WORDDICT_MD5))
paddle.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5))
verb_dict = load_dict(
paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st',
VERBDICT_MD5))
paddle.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5))
label_dict = load_label_dict(
paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st',
TRGDICT_MD5))
paddle.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5))
return word_dict, verb_dict, label_dict
......@@ -218,7 +215,7 @@ def get_embedding():
"""
Get the trained word vector based on Wikipedia corpus.
"""
return paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5)
return paddle.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5)
def test():
......@@ -235,23 +232,23 @@ def test():
"""
word_dict, verb_dict, label_dict = get_dict()
reader = corpus_reader(
paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5),
paddle.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5),
words_name='conll05st-release/test.wsj/words/test.wsj.words.gz',
props_name='conll05st-release/test.wsj/props/test.wsj.props.gz')
return reader_creator(reader, word_dict, verb_dict, label_dict)
def fetch():
paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)
paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)
paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)
paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5)
paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5)
paddle.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)
paddle.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)
paddle.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)
paddle.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5)
paddle.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5)
def convert(path):
"""
Converts dataset to recordio format
"""
paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_train")
paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_test")
paddle.dataset.common.convert(path, test(), 1000, "conl105_train")
paddle.dataset.common.convert(path, test(), 1000, "conl105_test")
......@@ -34,8 +34,8 @@ import functools
from common import download
import tarfile
import scipy.io as scio
from paddle.v2.image import *
from paddle.v2.reader import *
from paddle.dataset.image import *
from paddle.reader import *
import os
import numpy as np
from multiprocessing import cpu_count
......
......@@ -20,7 +20,7 @@ of 25,000 highly polar movie reviews for training, and 25,000 for testing.
Besides, this module also provides API for building dictionary.
"""
import paddle.v2.dataset.common
import paddle.dataset.common
import collections
import tarfile
import re
......@@ -37,8 +37,7 @@ def tokenize(pattern):
Read files that match the given pattern. Tokenize and yield each file.
"""
with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb',
MD5)) as tarf:
with tarfile.open(paddle.dataset.common.download(URL, 'imdb', MD5)) as tarf:
# Note that we should use tarfile.next(), which does
# sequential access of member files, other than
# tarfile.extractfile, which does random access and might
......@@ -136,7 +135,7 @@ def word_dict():
def fetch():
paddle.v2.dataset.common.download(URL, 'imdb', MD5)
paddle.dataset.common.download(URL, 'imdb', MD5)
def convert(path):
......@@ -144,5 +143,5 @@ def convert(path):
Converts dataset to recordio format
"""
w = word_dict()
paddle.v2.dataset.common.convert(path, lambda: train(w), 1000, "imdb_train")
paddle.v2.dataset.common.convert(path, lambda: test(w), 1000, "imdb_test")
paddle.dataset.common.convert(path, lambda: train(w), 1000, "imdb_train")
paddle.dataset.common.convert(path, lambda: test(w), 1000, "imdb_test")
......@@ -18,7 +18,7 @@ This module will download dataset from
http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set
into paddle reader creators.
"""
import paddle.v2.dataset.common
import paddle.dataset.common
import collections
import tarfile
......@@ -54,9 +54,9 @@ def build_dict(min_word_freq=50):
train_filename = './simple-examples/data/ptb.train.txt'
test_filename = './simple-examples/data/ptb.valid.txt'
with tarfile.open(
paddle.v2.dataset.common.download(
paddle.v2.dataset.imikolov.URL, 'imikolov',
paddle.v2.dataset.imikolov.MD5)) as tf:
paddle.dataset.common.download(paddle.dataset.imikolov.URL,
'imikolov',
paddle.dataset.imikolov.MD5)) as tf:
trainf = tf.extractfile(train_filename)
testf = tf.extractfile(test_filename)
word_freq = word_count(testf, word_count(trainf))
......@@ -77,9 +77,9 @@ def build_dict(min_word_freq=50):
def reader_creator(filename, word_idx, n, data_type):
def reader():
with tarfile.open(
paddle.v2.dataset.common.download(
paddle.v2.dataset.imikolov.URL, 'imikolov',
paddle.v2.dataset.imikolov.MD5)) as tf:
paddle.dataset.common.download(
paddle.dataset.imikolov.URL, 'imikolov',
paddle.dataset.imikolov.MD5)) as tf:
f = tf.extractfile(filename)
UNK = word_idx['<unk>']
......@@ -145,7 +145,7 @@ def test(word_idx, n, data_type=DataType.NGRAM):
def fetch():
paddle.v2.dataset.common.download(URL, "imikolov", MD5)
paddle.dataset.common.download(URL, "imikolov", MD5)
def convert(path):
......@@ -154,8 +154,7 @@ def convert(path):
"""
N = 5
word_dict = build_dict()
paddle.v2.dataset.common.convert(path,
train(word_dict, N), 1000,
"imikolov_train")
paddle.v2.dataset.common.convert(path,
test(word_dict, N), 1000, "imikolov_test")
paddle.dataset.common.convert(path,
train(word_dict, N), 1000, "imikolov_train")
paddle.dataset.common.convert(path,
test(word_dict, N), 1000, "imikolov_test")
......@@ -17,7 +17,7 @@ MNIST dataset.
This module will download dataset from http://yann.lecun.com/exdb/mnist/ and
parse training set and test set into paddle reader creators.
"""
import paddle.v2.dataset.common
import paddle.dataset.common
import subprocess
import numpy
import platform
......@@ -85,10 +85,10 @@ def train():
:rtype: callable
"""
return reader_creator(
paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist',
TRAIN_IMAGE_MD5),
paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist',
TRAIN_LABEL_MD5), 100)
paddle.dataset.common.download(TRAIN_IMAGE_URL, 'mnist',
TRAIN_IMAGE_MD5),
paddle.dataset.common.download(TRAIN_LABEL_URL, 'mnist',
TRAIN_LABEL_MD5), 100)
def test():
......@@ -102,22 +102,21 @@ def test():
:rtype: callable
"""
return reader_creator(
paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist',
TEST_IMAGE_MD5),
paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist',
TEST_LABEL_MD5), 100)
paddle.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5),
paddle.dataset.common.download(TEST_LABEL_URL, 'mnist', TEST_LABEL_MD5),
100)
def fetch():
paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5)
paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5)
paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5)
paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5)
paddle.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5)
paddle.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5)
paddle.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5)
paddle.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5)
def convert(path):
"""
Converts dataset to recordio format
"""
paddle.v2.dataset.common.convert(path, train(), 1000, "minist_train")
paddle.v2.dataset.common.convert(path, test(), 1000, "minist_test")
paddle.dataset.common.convert(path, train(), 1000, "minist_train")
paddle.dataset.common.convert(path, test(), 1000, "minist_test")
......@@ -23,7 +23,7 @@ set and test set into paddle reader creators.
"""
import zipfile
import paddle.v2.dataset.common
import paddle.dataset.common
import re
import random
import functools
......@@ -100,7 +100,7 @@ USER_INFO = None
def __initialize_meta_info__():
fn = paddle.v2.dataset.common.download(URL, "movielens", MD5)
fn = paddle.dataset.common.download(URL, "movielens", MD5)
global MOVIE_INFO
if MOVIE_INFO is None:
pattern = re.compile(r'^(.*)\((\d+)\)$')
......@@ -247,15 +247,15 @@ def unittest():
def fetch():
paddle.v2.dataset.common.download(URL, "movielens", MD5)
paddle.dataset.common.download(URL, "movielens", MD5)
def convert(path):
"""
Converts dataset to recordio format
"""
paddle.v2.dataset.common.convert(path, train(), 1000, "movielens_train")
paddle.v2.dataset.common.convert(path, test(), 1000, "movielens_test")
paddle.dataset.common.convert(path, train(), 1000, "movielens_train")
paddle.dataset.common.convert(path, test(), 1000, "movielens_test")
if __name__ == '__main__':
......
......@@ -26,7 +26,7 @@ from itertools import chain
import nltk
from nltk.corpus import movie_reviews
import paddle.v2.dataset.common
import paddle.dataset.common
__all__ = ['train', 'test', 'get_word_dict', 'convert']
NUM_TRAINING_INSTANCES = 1600
......@@ -39,13 +39,13 @@ def download_data_if_not_yet():
"""
try:
# make sure that nltk can find the data
if paddle.v2.dataset.common.DATA_HOME not in nltk.data.path:
nltk.data.path.append(paddle.v2.dataset.common.DATA_HOME)
if paddle.dataset.common.DATA_HOME not in nltk.data.path:
nltk.data.path.append(paddle.dataset.common.DATA_HOME)
movie_reviews.categories()
except LookupError:
print "Downloading movie_reviews data set, please wait....."
nltk.download(
'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME)
'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME)
print "Download data set success....."
print "Path is " + nltk.data.find('corpora/movie_reviews').path
......@@ -129,13 +129,12 @@ def test():
def fetch():
nltk.download(
'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME)
nltk.download('movie_reviews', download_dir=paddle.dataset.common.DATA_HOME)
def convert(path):
"""
Converts dataset to recordio format
"""
paddle.v2.dataset.common.convert(path, train, 1000, "sentiment_train")
paddle.v2.dataset.common.convert(path, test, 1000, "sentiment_test")
paddle.dataset.common.convert(path, train, 1000, "sentiment_train")
paddle.dataset.common.convert(path, test, 1000, "sentiment_test")
py_test(test_image SRCS test_image.py)
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.cifar
import paddle.dataset.cifar
import unittest
......@@ -29,25 +29,25 @@ class TestCIFAR(unittest.TestCase):
def test_test10(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.cifar.test10())
paddle.dataset.cifar.test10())
self.assertEqual(instances, 10000)
self.assertEqual(max_label_value, 9)
def test_train10(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.cifar.train10())
paddle.dataset.cifar.train10())
self.assertEqual(instances, 50000)
self.assertEqual(max_label_value, 9)
def test_test100(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.cifar.test100())
paddle.dataset.cifar.test100())
self.assertEqual(instances, 10000)
self.assertEqual(max_label_value, 99)
def test_train100(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.cifar.train100())
paddle.dataset.cifar.train100())
self.assertEqual(instances, 50000)
self.assertEqual(max_label_value, 99)
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.common
import paddle.dataset.common
import unittest
import tempfile
import glob
......@@ -24,14 +24,14 @@ class TestCommon(unittest.TestCase):
with open(temp_path, 'w') as f:
f.write("Hello\n")
self.assertEqual('09f7e02f1290be211da707a266f153b3',
paddle.v2.dataset.common.md5file(temp_path))
paddle.dataset.common.md5file(temp_path))
def test_download(self):
yi_avatar = 'https://avatars0.githubusercontent.com/u/1548775?v=3&s=460'
self.assertEqual(
paddle.v2.dataset.common.DATA_HOME + '/test/1548775?v=3&s=460',
paddle.v2.dataset.common.download(
yi_avatar, 'test', 'f75287202d6622414c706c36c16f8e0d'))
paddle.dataset.common.DATA_HOME + '/test/1548775?v=3&s=460',
paddle.dataset.common.download(yi_avatar, 'test',
'f75287202d6622414c706c36c16f8e0d'))
def test_split(self):
def test_reader():
......@@ -42,7 +42,7 @@ class TestCommon(unittest.TestCase):
return reader
_, temp_path = tempfile.mkstemp()
paddle.v2.dataset.common.split(
paddle.dataset.common.split(
test_reader(), 4, suffix=temp_path + '/test-%05d.pickle')
files = glob.glob(temp_path + '/test-%05d.pickle')
self.assertEqual(len(files), 3)
......@@ -52,7 +52,7 @@ class TestCommon(unittest.TestCase):
for x in xrange(5):
with open(temp_path + '/%05d.test' % x) as f:
f.write('%d\n' % x)
reader = paddle.v2.dataset.common.cluster_files_reader(
reader = paddle.dataset.common.cluster_files_reader(
temp_path + '/*.test', 5, 0)
for idx, e in enumerate(reader()):
self.assertEqual(e, str("0"))
......@@ -69,9 +69,9 @@ class TestCommon(unittest.TestCase):
return reader
path = tempfile.mkdtemp()
paddle.v2.dataset.common.convert(path,
test_reader(), num_shards,
'random_images')
paddle.dataset.common.convert(path,
test_reader(), num_shards,
'random_images')
files = glob.glob(path + '/random_images-*')
self.assertEqual(len(files), num_shards)
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.flowers
import paddle.dataset.flowers
import unittest
......@@ -30,19 +30,19 @@ class TestFlowers(unittest.TestCase):
def test_train(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.flowers.train())
paddle.dataset.flowers.train())
self.assertEqual(instances, 6149)
self.assertEqual(max_label_value, 102)
def test_test(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.flowers.test())
paddle.dataset.flowers.test())
self.assertEqual(instances, 1020)
self.assertEqual(max_label_value, 102)
def test_valid(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.flowers.valid())
paddle.dataset.flowers.valid())
self.assertEqual(instances, 1020)
self.assertEqual(max_label_value, 102)
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.imdb
import paddle.dataset.imdb
import unittest
import re
......@@ -30,15 +30,13 @@ class TestIMDB(unittest.TestCase):
def test_build_dict(self):
if self.word_idx == None:
self.word_idx = paddle.v2.dataset.imdb.build_dict(TRAIN_PATTERN,
150)
self.word_idx = paddle.dataset.imdb.build_dict(TRAIN_PATTERN, 150)
self.assertEqual(len(self.word_idx), 7036)
def check_dataset(self, dataset, expected_size):
if self.word_idx == None:
self.word_idx = paddle.v2.dataset.imdb.build_dict(TRAIN_PATTERN,
150)
self.word_idx = paddle.dataset.imdb.build_dict(TRAIN_PATTERN, 150)
sum = 0
for l in dataset(self.word_idx):
......@@ -47,10 +45,10 @@ class TestIMDB(unittest.TestCase):
self.assertEqual(sum, expected_size)
def test_train(self):
self.check_dataset(paddle.v2.dataset.imdb.train, 25000)
self.check_dataset(paddle.dataset.imdb.train, 25000)
def test_test(self):
self.check_dataset(paddle.v2.dataset.imdb.test, 25000)
self.check_dataset(paddle.dataset.imdb.test, 25000)
if __name__ == '__main__':
......
......@@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.imikolov
import paddle.dataset.imikolov
import unittest
WORD_DICT = paddle.v2.dataset.imikolov.build_dict()
WORD_DICT = paddle.dataset.imikolov.build_dict()
class TestMikolov(unittest.TestCase):
......@@ -25,7 +25,7 @@ class TestMikolov(unittest.TestCase):
def test_train(self):
n = 5
self.check_reader(paddle.v2.dataset.imikolov.train(WORD_DICT, n), n)
self.check_reader(paddle.dataset.imikolov.train(WORD_DICT, n), n)
first_line = 'aer banknote berlitz calloway centrust cluett fromstein '\
'gitano guterman hydro-quebec ipo kia memotec mlx nahb punts '\
......@@ -34,16 +34,16 @@ class TestMikolov(unittest.TestCase):
WORD_DICT.get(ch, WORD_DICT['<unk>'])
for ch in first_line.split(' ')
]
for l in paddle.v2.dataset.imikolov.train(
for l in paddle.dataset.imikolov.train(
WORD_DICT, n=-1,
data_type=paddle.v2.dataset.imikolov.DataType.SEQ)():
data_type=paddle.dataset.imikolov.DataType.SEQ)():
read_line = l[0][1:]
break
self.assertEqual(first_line, read_line)
def test_test(self):
n = 5
self.check_reader(paddle.v2.dataset.imikolov.test(WORD_DICT, n), n)
self.check_reader(paddle.dataset.imikolov.test(WORD_DICT, n), n)
first_line = 'consumers may want to move their telephones a little '\
'closer to the tv set'
......@@ -51,9 +51,9 @@ class TestMikolov(unittest.TestCase):
WORD_DICT.get(ch, WORD_DICT['<unk>'])
for ch in first_line.split(' ')
]
for l in paddle.v2.dataset.imikolov.test(
for l in paddle.dataset.imikolov.test(
WORD_DICT, n=-1,
data_type=paddle.v2.dataset.imikolov.DataType.SEQ)():
data_type=paddle.dataset.imikolov.DataType.SEQ)():
read_line = l[0][1:]
break
self.assertEqual(first_line, read_line)
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.mnist
import paddle.dataset.mnist
import unittest
......@@ -29,13 +29,13 @@ class TestMNIST(unittest.TestCase):
def test_train(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.mnist.train())
paddle.dataset.mnist.train())
self.assertEqual(instances, 60000)
self.assertEqual(max_label_value, 9)
def test_test(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.mnist.test())
paddle.dataset.mnist.test())
self.assertEqual(instances, 10000)
self.assertEqual(max_label_value, 9)
......
......@@ -12,19 +12,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.mq2007
import paddle.dataset.mq2007
import unittest
class TestMQ2007(unittest.TestCase):
def test_pairwise(self):
for label, query_left, query_right in paddle.v2.dataset.mq2007.test(
for label, query_left, query_right in paddle.dataset.mq2007.test(
format="pairwise"):
self.assertEqual(query_left.shape(), (46, ))
self.assertEqual(query_right.shape(), (46, ))
def test_listwise(self):
for label_array, query_array in paddle.v2.dataset.mq2007.test(
for label_array, query_array in paddle.dataset.mq2007.test(
format="listwise"):
self.assertEqual(len(label_array), len(query_array))
......
......@@ -15,7 +15,7 @@
import unittest
import numpy as np
import paddle.v2.image as image
import paddle.dataset.image as image
class Image(unittest.TestCase):
......
......@@ -17,7 +17,7 @@
import unittest
import nltk
import paddle.v2.dataset.sentiment as st
import paddle.dataset.sentiment as st
from nltk.corpus import movie_reviews
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.voc2012
import paddle.dataset.voc2012
import unittest
......@@ -26,15 +26,15 @@ class TestVOC(unittest.TestCase):
return sum
def test_train(self):
count = self.check_reader(paddle.v2.dataset.voc_seg.train())
count = self.check_reader(paddle.dataset.voc_seg.train())
self.assertEqual(count, 2913)
def test_test(self):
count = self.check_reader(paddle.v2.dataset.voc_seg.test())
count = self.check_reader(paddle.dataset.voc_seg.test())
self.assertEqual(count, 1464)
def test_val(self):
count = self.check_reader(paddle.v2.dataset.voc_seg.val())
count = self.check_reader(paddle.dataset.voc_seg.val())
self.assertEqual(count, 1449)
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.wmt16
import paddle.dataset.wmt16
import unittest
......@@ -34,28 +34,28 @@ class TestWMT16(unittest.TestCase):
def test_train(self):
for idx, sample in enumerate(
paddle.v2.dataset.wmt16.train(
paddle.dataset.wmt16.train(
src_dict_size=100000, trg_dict_size=100000)()):
if idx >= 10: break
self.checkout_one_sample(sample)
def test_test(self):
for idx, sample in enumerate(
paddle.v2.dataset.wmt16.test(
paddle.dataset.wmt16.test(
src_dict_size=1000, trg_dict_size=1000)()):
if idx >= 10: break
self.checkout_one_sample(sample)
def test_val(self):
for idx, sample in enumerate(
paddle.v2.dataset.wmt16.validation(
paddle.dataset.wmt16.validation(
src_dict_size=1000, trg_dict_size=1000)()):
if idx >= 10: break
self.checkout_one_sample(sample)
def test_get_dict(self):
dict_size = 1000
word_dict = paddle.v2.dataset.wmt16.get_dict("en", dict_size, True)
word_dict = paddle.dataset.wmt16.get_dict("en", dict_size, True)
self.assertEqual(len(word_dict), dict_size)
self.assertEqual(word_dict[0], "<s>")
self.assertEqual(word_dict[1], "<e>")
......
......@@ -21,8 +21,7 @@ parse training set and test set into paddle reader creators.
import numpy as np
import os
import paddle.v2.dataset.common
from paddle.v2.parameters import Parameters
import paddle.dataset.common
__all__ = ['train', 'test']
......@@ -85,7 +84,7 @@ def train():
:rtype: callable
"""
global UCI_TRAIN_DATA
load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5))
load_data(paddle.dataset.common.download(URL, 'uci_housing', MD5))
def reader():
for d in UCI_TRAIN_DATA:
......@@ -105,7 +104,7 @@ def test():
:rtype: callable
"""
global UCI_TEST_DATA
load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5))
load_data(paddle.dataset.common.download(URL, 'uci_housing', MD5))
def reader():
for d in UCI_TEST_DATA:
......@@ -114,21 +113,13 @@ def test():
return reader
def model():
tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'fit_a_line.tar',
MD5_MODEL)
with open(tar_file, 'r') as f:
parameters = Parameters.from_tar(f)
return parameters
def fetch():
paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)
paddle.dataset.common.download(URL, 'uci_housing', MD5)
def convert(path):
"""
Converts dataset to recordio format
"""
paddle.v2.dataset.common.convert(path, train(), 1000, "uci_housing_train")
paddle.v2.dataset.common.convert(path, test(), 1000, "uci_houseing_test")
paddle.dataset.common.convert(path, train(), 1000, "uci_housing_train")
paddle.dataset.common.convert(path, test(), 1000, "uci_houseing_test")
......@@ -22,8 +22,8 @@ with segmentation has been increased from 7,062 to 9,993.
import tarfile
import io
import numpy as np
from paddle.v2.dataset.common import download
from paddle.v2.image import *
from paddle.dataset.common import download
from paddle.dataset.image import *
from PIL import Image
__all__ = ['train', 'test', 'val']
......
......@@ -22,8 +22,7 @@ parse training set and test set into paddle reader creators.
import tarfile
import gzip
import paddle.v2.dataset.common
from paddle.v2.parameters import Parameters
import paddle.dataset.common
__all__ = [
'train',
......@@ -123,7 +122,7 @@ def train(dict_size):
:rtype: callable
"""
return reader_creator(
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
'train/train', dict_size)
......@@ -139,27 +138,20 @@ def test(dict_size):
:rtype: callable
"""
return reader_creator(
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
'test/test', dict_size)
def gen(dict_size):
return reader_creator(
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
'gen/gen', dict_size)
def model():
tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL)
with gzip.open(tar_file, 'r') as f:
parameters = Parameters.from_tar(f)
return parameters
def get_dict(dict_size, reverse=True):
# if reverse = False, return dict = {'a':'001', 'b':'002', ...}
# else reverse = true, return dict = {'001':'a', '002':'b', ...}
tar_file = paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
tar_file = paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
src_dict, trg_dict = __read_to_dict(tar_file, dict_size)
if reverse:
src_dict = {v: k for k, v in src_dict.items()}
......@@ -168,8 +160,8 @@ def get_dict(dict_size, reverse=True):
def fetch():
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL)
paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
paddle.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL)
def convert(path):
......@@ -177,6 +169,5 @@ def convert(path):
Converts dataset to recordio format
"""
dict_size = 30000
paddle.v2.dataset.common.convert(path,
train(dict_size), 1000, "wmt14_train")
paddle.v2.dataset.common.convert(path, test(dict_size), 1000, "wmt14_test")
paddle.dataset.common.convert(path, train(dict_size), 1000, "wmt14_train")
paddle.dataset.common.convert(path, test(dict_size), 1000, "wmt14_test")
......@@ -33,7 +33,7 @@ import tarfile
import gzip
from collections import defaultdict
import paddle.v2.dataset.common
import paddle.dataset.common
__all__ = [
"train",
......@@ -76,7 +76,7 @@ def __build_dict(tar_file, dict_size, save_path, lang):
def __load_dict(tar_file, dict_size, lang, reverse=False):
dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME,
dict_path = os.path.join(paddle.dataset.common.DATA_HOME,
"wmt16/%s_%d.dict" % (lang, dict_size))
if not os.path.exists(dict_path) or (
len(open(dict_path, "r").readlines()) != dict_size):
......@@ -178,8 +178,8 @@ def train(src_dict_size, trg_dict_size, src_lang="en"):
src_lang)
return reader_creator(
tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
"wmt16.tar.gz"),
tar_file=paddle.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
"wmt16.tar.gz"),
file_name="wmt16/train",
src_dict_size=src_dict_size,
trg_dict_size=trg_dict_size,
......@@ -227,8 +227,8 @@ def test(src_dict_size, trg_dict_size, src_lang="en"):
src_lang)
return reader_creator(
tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
"wmt16.tar.gz"),
tar_file=paddle.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
"wmt16.tar.gz"),
file_name="wmt16/test",
src_dict_size=src_dict_size,
trg_dict_size=trg_dict_size,
......@@ -274,8 +274,8 @@ def validation(src_dict_size, trg_dict_size, src_lang="en"):
src_lang)
return reader_creator(
tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
"wmt16.tar.gz"),
tar_file=paddle.dataset.common.download(DATA_URL, "wmt16", DATA_MD5,
"wmt16.tar.gz"),
file_name="wmt16/val",
src_dict_size=src_dict_size,
trg_dict_size=trg_dict_size,
......@@ -303,12 +303,12 @@ def get_dict(lang, dict_size, reverse=False):
if lang == "en": dict_size = min(dict_size, TOTAL_EN_WORDS)
else: dict_size = min(dict_size, TOTAL_DE_WORDS)
dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME,
dict_path = os.path.join(paddle.dataset.common.DATA_HOME,
"wmt16/%s_%d.dict" % (lang, dict_size))
assert os.path.exists(dict_path), "Word dictionary does not exist. "
"Please invoke paddle.dataset.wmt16.train/test/validation first "
"to build the dictionary."
tar_file = os.path.join(paddle.v2.dataset.common.DATA_HOME, "wmt16.tar.gz")
tar_file = os.path.join(paddle.dataset.common.DATA_HOME, "wmt16.tar.gz")
return __load_dict(tar_file, dict_size, lang, reverse)
......@@ -323,7 +323,7 @@ def convert(path, src_dict_size, trg_dict_size, src_lang):
"""Converts dataset to recordio format.
"""
paddle.v2.dataset.common.convert(
paddle.dataset.common.convert(
path,
train(
src_dict_size=src_dict_size,
......@@ -331,7 +331,7 @@ def convert(path, src_dict_size, trg_dict_size, src_lang):
src_lang=src_lang),
1000,
"wmt16_train")
paddle.v2.dataset.common.convert(
paddle.dataset.common.convert(
path,
test(
src_dict_size=src_dict_size,
......@@ -339,7 +339,7 @@ def convert(path, src_dict_size, trg_dict_size, src_lang):
src_lang=src_lang),
1000,
"wmt16_test")
paddle.v2.dataset.common.convert(
paddle.dataset.common.convert(
path,
validation(
src_dict_size=src_dict_size,
......
......@@ -13,7 +13,7 @@
# limitations under the License.
import numpy as np
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.framework as framework
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
import contextlib
import numpy
......
......@@ -14,7 +14,7 @@
from __future__ import print_function
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
import contextlib
import math
......
......@@ -15,8 +15,8 @@
import math
import numpy as np
import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05
import paddle
import paddle.dataset.conll05 as conll05
import paddle.fluid as fluid
from paddle.fluid.initializer import init_on_cpu
import contextlib
......
......@@ -14,7 +14,7 @@
import contextlib
import numpy as np
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
import paddle.fluid.framework as framework
import paddle.fluid.layers as pd
......
......@@ -14,7 +14,7 @@
from __future__ import print_function
import argparse
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
import sys
import numpy
import unittest
......
......@@ -16,7 +16,7 @@ import math
import sys
import os
import numpy as np
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
import paddle.fluid.framework as framework
import paddle.fluid.layers as layers
......
......@@ -15,7 +15,7 @@ from __future__ import print_function
import unittest
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
import contextlib
import math
import numpy as np
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
import unittest
import os
......
......@@ -13,7 +13,7 @@
# limitations under the License.
import numpy as np
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
import math
import sys
......
......@@ -16,7 +16,7 @@ from __future__ import print_function
import sys
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
import math
import sys
......
......@@ -13,7 +13,7 @@
# limitations under the License.
import numpy as np
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.framework as framework
......
......@@ -19,7 +19,7 @@ import os
import matplotlib
import numpy
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
matplotlib.use('Agg')
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
import numpy as np
import sys
......
......@@ -14,7 +14,7 @@
from __future__ import print_function
import numpy as np
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
BATCH_SIZE = 128
......
......@@ -13,7 +13,7 @@
# limitations under the License.
import numpy as np
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
BATCH_SIZE = 128
......
......@@ -12,12 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.fluid.layers as layers
from paddle.fluid.framework import Program, program_guard, default_main_program, default_startup_program
from paddle.fluid.executor import Executor
from paddle.fluid.optimizer import MomentumOptimizer
import paddle.fluid.core as core
import paddle.v2 as paddle
import unittest
import numpy as np
......
......@@ -13,7 +13,7 @@
# limitations under the License.
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle
import unittest
import numpy
......
......@@ -13,7 +13,7 @@
# limitations under the License.
import unittest
import paddle.v2 as paddle
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid.backward import append_backward
......
......@@ -15,8 +15,8 @@
import unittest
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle.v2.dataset.mnist as mnist
import paddle
import paddle.dataset.mnist as mnist
class TestMultipleReader(unittest.TestCase):
......
......@@ -15,8 +15,8 @@
import unittest
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle.v2.dataset.mnist as mnist
import paddle
import paddle.dataset.mnist as mnist
from shutil import copyfile
......
......@@ -16,9 +16,9 @@ import numpy
import unittest
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle.v2.dataset.mnist as mnist
import paddle.v2.dataset.wmt16 as wmt16
import paddle
import paddle.dataset.mnist as mnist
import paddle.dataset.wmt16 as wmt16
def simple_fc_net():
......
......@@ -15,8 +15,8 @@
import unittest
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle.v2.dataset.mnist as mnist
import paddle
import paddle.dataset.mnist as mnist
class TestRecordIO(unittest.TestCase):
......
......@@ -16,7 +16,7 @@ Creator package contains some simple reader creator, which could
be used in user program.
"""
__all__ = ['np_array', 'text_file', 'recordio', 'cloud_reader']
__all__ = ['np_array', 'text_file', 'recordio']
def np_array(x):
......@@ -66,7 +66,7 @@ def recordio(paths, buf_size=100):
"""
import recordio as rec
import paddle.v2.reader.decorator as dec
import paddle.reader.decorator as dec
import cPickle as pickle
def reader():
......@@ -83,48 +83,3 @@ def recordio(paths, buf_size=100):
f.close()
return dec.buffered(reader, buf_size)
pass_num = 0
def cloud_reader(paths, etcd_endpoints, timeout_sec=5, buf_size=64):
"""
Create a data reader that yield a record one by one from
the paths:
:paths: path of recordio files, can be a string or a string list.
:etcd_endpoints: the endpoints for etcd cluster
:returns: data reader of recordio files.
.. code-block:: python
from paddle.v2.reader.creator import cloud_reader
etcd_endpoints = "http://127.0.0.1:2379"
trainer.train.(
reader=cloud_reader(["/work/dataset/uci_housing/uci_housing*"], etcd_endpoints),
)
"""
import os
import cPickle as pickle
import paddle.v2.master as master
c = master.client(etcd_endpoints, timeout_sec, buf_size)
if isinstance(paths, basestring):
path = [paths]
else:
path = paths
c.set_dataset(path)
def reader():
global pass_num
c.paddle_start_get_records(pass_num)
pass_num += 1
while True:
r, e = c.next_record()
if not r:
if e != -2:
print "get record error: ", e
break
yield pickle.loads(r)
return reader
......@@ -28,14 +28,14 @@
import os
import unittest
import numpy as np
import paddle.v2.reader.creator
import paddle.reader.creator
class TestNumpyArray(unittest.TestCase):
def test_numpy_array(self):
l = [[1, 2, 3], [4, 5, 6]]
x = np.array(l, np.int32)
reader = paddle.v2.reader.creator.np_array(x)
reader = paddle.reader.creator.np_array(x)
for idx, e in enumerate(reader()):
self.assertItemsEqual(e, l[idx])
......@@ -43,14 +43,14 @@ class TestNumpyArray(unittest.TestCase):
class TestTextFile(unittest.TestCase):
def test_text_file(self):
path = os.path.join(os.path.dirname(__file__), "test_data_creator.txt")
reader = paddle.v2.reader.creator.text_file(path)
reader = paddle.reader.creator.text_file(path)
for idx, e in enumerate(reader()):
self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1))
class TestRecordIO(unittest.TestCase):
def do_test(self, path):
reader = paddle.v2.reader.creator.recordio(path)
reader = paddle.reader.creator.recordio(path)
idx = 0
for e in reader():
if idx == 0:
......
......@@ -15,7 +15,7 @@
import time
import unittest
import paddle.v2.reader
import paddle.reader
def reader_creator_10(dur):
......@@ -39,7 +39,7 @@ class TestMap(unittest.TestCase):
yield "h"
yield "i"
r = paddle.v2.reader.map_readers(tokenize, read)
r = paddle.reader.map_readers(tokenize, read)
for i, e in enumerate(r()):
self.assertEqual(e, i)
......@@ -47,7 +47,7 @@ class TestMap(unittest.TestCase):
class TestBuffered(unittest.TestCase):
def test_read(self):
for size in range(20):
b = paddle.v2.reader.buffered(reader_creator_10(0), size)
b = paddle.reader.buffered(reader_creator_10(0), size)
c = 0
for i in b():
self.assertEqual(i, c)
......@@ -56,7 +56,7 @@ class TestBuffered(unittest.TestCase):
def test_buffering(self):
# read have 30ms delay.
b = paddle.v2.reader.buffered(reader_creator_10(0.03), 10)
b = paddle.reader.buffered(reader_creator_10(0.03), 10)
last_time = time.time()
for idx, i in enumerate(b()):
elapsed_time = time.time() - last_time
......@@ -70,17 +70,17 @@ class TestBuffered(unittest.TestCase):
class TestCompose(unittest.TestCase):
def test_compse(self):
reader = paddle.v2.reader.compose(
reader = paddle.reader.compose(
reader_creator_10(0), reader_creator_10(0))
for idx, e in enumerate(reader()):
self.assertEqual(e, (idx, idx))
def test_compose_not_aligned(self):
total = 0
reader = paddle.v2.reader.compose(
paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)),
reader = paddle.reader.compose(
paddle.reader.chain(reader_creator_10(0), reader_creator_10(0)),
reader_creator_10(0))
with self.assertRaises(paddle.v2.reader.ComposeNotAligned):
with self.assertRaises(paddle.reader.ComposeNotAligned):
for e in reader():
total += 1
# expecting 10, not 20
......@@ -88,8 +88,8 @@ class TestCompose(unittest.TestCase):
def test_compose_not_aligned_no_check(self):
total = 0
reader = paddle.v2.reader.compose(
paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)),
reader = paddle.reader.compose(
paddle.reader.chain(reader_creator_10(0), reader_creator_10(0)),
reader_creator_10(0),
check_alignment=False)
for e in reader():
......@@ -100,7 +100,7 @@ class TestCompose(unittest.TestCase):
class TestChain(unittest.TestCase):
def test_chain(self):
c = paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0))
c = paddle.reader.chain(reader_creator_10(0), reader_creator_10(0))
idx = 0
for e in c():
self.assertEqual(e, idx % 10)
......@@ -113,7 +113,7 @@ class TestShuffle(unittest.TestCase):
case = [(0, True), (1, True), (10, False), (100, False)]
a = reader_creator_10(0)
for size, checkEq in case:
s = paddle.v2.reader.shuffle(a, size)
s = paddle.reader.shuffle(a, size)
total = 0
for idx, e in enumerate(s()):
if checkEq:
......@@ -133,9 +133,9 @@ class TestXmap(unittest.TestCase):
for order in orders:
for tNum in thread_nums:
for size in buffered_size:
reader = paddle.v2.reader.xmap_readers(mapper,
reader_creator_10(0),
tNum, size, order)
reader = paddle.reader.xmap_readers(mapper,
reader_creator_10(0),
tNum, size, order)
for n in xrange(3):
result = []
for i in reader():
......@@ -150,7 +150,7 @@ class TestPipeReader(unittest.TestCase):
def test_pipe_reader(self):
def example_reader(myfiles):
for f in myfiles:
pr = paddle.v2.reader.PipeReader("cat %s" % f, bufsize=128)
pr = paddle.reader.PipeReader("cat %s" % f, bufsize=128)
for l in pr.get_line():
yield l
......
......@@ -22,17 +22,13 @@ import data_type
import topology
import networks
import evaluator
from . import dataset
from . import reader
from . import plot
import attr
import op
import pooling
import inference
import networks
import minibatch
import plot
import image
import paddle.trainer.config_parser as cp
__all__ = [
......@@ -48,14 +44,11 @@ __all__ = [
'data_type',
'attr',
'pooling',
'dataset',
'reader',
'topology',
'networks',
'infer',
'plot',
'evaluator',
'image',
'master',
]
......@@ -153,4 +146,3 @@ def init(**kwargs):
infer = inference.infer
batch = minibatch.batch
......@@ -15,7 +15,7 @@
import numpy
import collections
import topology
import minibatch
import paddle
import cPickle
__all__ = ['infer', 'Inference']
......@@ -80,7 +80,7 @@ class Inference(object):
for each_sample in input:
yield each_sample
reader = minibatch.batch(__reader_impl__, batch_size=batch_size)
reader = paddle.batch(__reader_impl__, batch_size=batch_size)
self.__gradient_machine__.start()
for data_batch in reader():
......
......@@ -20,7 +20,7 @@ The primary usage shows below.
.. code-block:: python
import paddle.v2 as paddle
import paddle
img = paddle.layer.data(name='img', type=paddle.data_type.dense_vector(784))
hidden = paddle.layer.fc(input=img, size=200)
......
py_test(test_op SRCS test_op.py)
py_test(test_image SRCS test_image.py)
py_test(test_layer SRCS test_layer.py)
py_test(test_topology SRCS test_topology.py)
py_test(test_rnn_layer SRCS test_rnn_layer.py)
......
......@@ -27,6 +27,7 @@
# limitations under the License.
import unittest
import math
import paddle.dataset as dataset
import paddle.v2 as paddle
......@@ -40,7 +41,7 @@ def wordemb(inlayer):
def train():
word_dict = paddle.dataset.imikolov.build_dict()
word_dict = dataset.imikolov.build_dict()
dict_size = len(word_dict)
# Every layer takes integer value of range [0, dict_size)
firstword = paddle.layer.data(
......
......@@ -63,6 +63,8 @@ write_version_py(filename='@PADDLE_SOURCE_DIR@/python/paddle/version.py')
packages=['paddle',
'paddle.utils',
'paddle.dataset',
'paddle.reader',
'paddle.fluid',
'paddle.fluid.proto',
'paddle.fluid.proto.profiler',
......@@ -73,8 +75,6 @@ if '${WITH_FLUID_ONLY}'== 'OFF':
'paddle.trainer',
'paddle.trainer_config_helpers',
'paddle.v2',
'paddle.v2.dataset',
'paddle.v2.reader',
'paddle.v2.master',
'paddle.v2.plot',
'py_paddle']
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册