diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b0242b20b8d0fd81e624447d56e47865e1bf6438..f5ae553c8571e21b351d0f5507afdf1539843a51 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -73,12 +73,13 @@ add_custom_target(paddle_python ALL DEPENDS ${paddle_python_deps}) set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) if (WITH_TESTING) + add_subdirectory(paddle/reader/tests) + add_subdirectory(paddle/dataset/tests) if(NOT WITH_FLUID_ONLY) add_subdirectory(paddle/trainer_config_helpers/tests) if (WITH_SWIG_PY) # enable v2 API unittest only when paddle swig api is compiled add_subdirectory(paddle/v2/tests) - add_subdirectory(paddle/v2/reader/tests) add_subdirectory(paddle/v2/plot/tests) endif() endif() diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 1030c94e16376c326cb8b32926b8c47625cd38f0..d1cf04161ae4444ebc7da7fbc20e37dafe6c0fb1 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -14,8 +14,14 @@ try: from version import full_version as __version__ from version import commit as __git_commit__ + except ImportError: import sys sys.stderr.write('''Warning with import paddle: you should not import paddle from the source directory; please install paddlepaddle*.whl firstly.''' ) + +import reader +import dataset +import batch +batch = batch.batch diff --git a/python/paddle/v2/minibatch.py b/python/paddle/batch.py similarity index 100% rename from python/paddle/v2/minibatch.py rename to python/paddle/batch.py diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/dataset/__init__.py similarity index 97% rename from python/paddle/v2/dataset/__init__.py rename to python/paddle/dataset/__init__.py index c1acbecd9c313b02d6d33d2d04fd33fc1a8b026e..1fdfd49f1c970d89bfde9d12a24076d38c54ba66 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/dataset/__init__.py @@ -28,6 +28,7 @@ import wmt16 import mq2007 import flowers import voc2012 +import image __all__ = [ 'mnist', @@ -43,4 +44,5 @@ __all__ = [ 'mq2007', 'flowers', 'voc2012', + 'image', ] diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/dataset/cifar.py similarity index 80% rename from python/paddle/v2/dataset/cifar.py rename to python/paddle/dataset/cifar.py index 0a2a1ced11ee5cb2fb407b229ce810d553c2fa46..07f4dcbdab2fecf84a0a7042a48a8c8a9e5f880d 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/dataset/cifar.py @@ -31,7 +31,7 @@ images per class. import cPickle import itertools import numpy -import paddle.v2.dataset.common +import paddle.dataset.common import tarfile __all__ = ['train100', 'test100', 'train10', 'test10', 'convert'] @@ -75,7 +75,7 @@ def train100(): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), + paddle.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train') @@ -90,7 +90,7 @@ def test100(): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), + paddle.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test') @@ -105,7 +105,7 @@ def train10(): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), + paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch') @@ -120,20 +120,20 @@ def test10(): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), + paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch') def fetch(): - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5) - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5) + paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5) + paddle.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5) def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train100(), 1000, "cifar_train100") - paddle.v2.dataset.common.convert(path, test100(), 1000, "cifar_test100") - paddle.v2.dataset.common.convert(path, train10(), 1000, "cifar_train10") - paddle.v2.dataset.common.convert(path, test10(), 1000, "cifar_test10") + paddle.dataset.common.convert(path, train100(), 1000, "cifar_train100") + paddle.dataset.common.convert(path, test100(), 1000, "cifar_test100") + paddle.dataset.common.convert(path, train10(), 1000, "cifar_train10") + paddle.dataset.common.convert(path, test10(), 1000, "cifar_test10") diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/dataset/common.py similarity index 93% rename from python/paddle/v2/dataset/common.py rename to python/paddle/dataset/common.py index c6ff09a1d1e3ca56877e986c3ed3ae9ecd0a7316..68660601c161d2332b17b448fae089506238ba78 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/dataset/common.py @@ -19,7 +19,7 @@ import errno import shutil import sys import importlib -import paddle.v2.dataset +import paddle.dataset import cPickle import glob import cPickle as pickle @@ -105,24 +105,24 @@ def download(url, module_name, md5sum, save_name=None): def fetch_all(): for module_name in filter(lambda x: not x.startswith("__"), - dir(paddle.v2.dataset)): + dir(paddle.dataset)): if "fetch" in dir( - importlib.import_module("paddle.v2.dataset.%s" % module_name)): + importlib.import_module("paddle.dataset.%s" % module_name)): getattr( - importlib.import_module("paddle.v2.dataset.%s" % module_name), + importlib.import_module("paddle.dataset.%s" % module_name), "fetch")() def fetch_all_recordio(path): for module_name in filter(lambda x: not x.startswith("__"), - dir(paddle.v2.dataset)): + dir(paddle.dataset)): if "convert" in dir( - importlib.import_module("paddle.v2.dataset.%s" % module_name)) and \ + importlib.import_module("paddle.dataset.%s" % module_name)) and \ not module_name == "common": ds_path = os.path.join(path, module_name) must_mkdirs(ds_path) getattr( - importlib.import_module("paddle.v2.dataset.%s" % module_name), + importlib.import_module("paddle.dataset.%s" % module_name), "convert")(ds_path) @@ -130,7 +130,7 @@ def split(reader, line_count, suffix="%05d.pickle", dumper=cPickle.dump): """ you can call the function as: - split(paddle.v2.dataset.cifar.train10(), line_count=1000, + split(paddle.dataset.cifar.train10(), line_count=1000, suffix="imikolov-train-%05d.pickle") the output files as: diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/dataset/conll05.py similarity index 88% rename from python/paddle/v2/dataset/conll05.py rename to python/paddle/dataset/conll05.py index 0d544efac9cd20157f87b5cd3b68f97ab5ed2dbc..4e94ce89892f8e6822c15fdc510805e75dfca988 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/dataset/conll05.py @@ -23,7 +23,7 @@ to initialize SRL model. import tarfile import gzip import itertools -import paddle.v2.dataset.common +import paddle.dataset.common __all__ = ['test, get_dict', 'get_embedding', 'convert'] @@ -203,14 +203,11 @@ def get_dict(): Get the word, verb and label dictionary of Wikipedia corpus. """ word_dict = load_dict( - paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', - WORDDICT_MD5)) + paddle.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)) verb_dict = load_dict( - paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', - VERBDICT_MD5)) + paddle.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)) label_dict = load_label_dict( - paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', - TRGDICT_MD5)) + paddle.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)) return word_dict, verb_dict, label_dict @@ -218,7 +215,7 @@ def get_embedding(): """ Get the trained word vector based on Wikipedia corpus. """ - return paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) + return paddle.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) def test(): @@ -235,23 +232,23 @@ def test(): """ word_dict, verb_dict, label_dict = get_dict() reader = corpus_reader( - paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5), + paddle.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5), words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') return reader_creator(reader, word_dict, verb_dict, label_dict) def fetch(): - paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) - paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) - paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) - paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) - paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5) + paddle.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) + paddle.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) + paddle.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) + paddle.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) + paddle.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5) def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_train") - paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_test") + paddle.dataset.common.convert(path, test(), 1000, "conl105_train") + paddle.dataset.common.convert(path, test(), 1000, "conl105_test") diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/dataset/flowers.py similarity index 99% rename from python/paddle/v2/dataset/flowers.py rename to python/paddle/dataset/flowers.py index 7bdddeaabec733ef26b3f766c6437f5c53d65044..f082e33be3357fbe405ab1a1ef5e0e601108a363 100644 --- a/python/paddle/v2/dataset/flowers.py +++ b/python/paddle/dataset/flowers.py @@ -34,8 +34,8 @@ import functools from common import download import tarfile import scipy.io as scio -from paddle.v2.image import * -from paddle.v2.reader import * +from paddle.dataset.image import * +from paddle.reader import * import os import numpy as np from multiprocessing import cpu_count diff --git a/python/paddle/v2/image.py b/python/paddle/dataset/image.py similarity index 100% rename from python/paddle/v2/image.py rename to python/paddle/dataset/image.py diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/dataset/imdb.py similarity index 91% rename from python/paddle/v2/dataset/imdb.py rename to python/paddle/dataset/imdb.py index 37c4296f9bcea7e16daa46f778934331513c30c4..5ff05b1e9b7f4c42909370a21beb140ecdcd6868 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/dataset/imdb.py @@ -20,7 +20,7 @@ of 25,000 highly polar movie reviews for training, and 25,000 for testing. Besides, this module also provides API for building dictionary. """ -import paddle.v2.dataset.common +import paddle.dataset.common import collections import tarfile import re @@ -37,8 +37,7 @@ def tokenize(pattern): Read files that match the given pattern. Tokenize and yield each file. """ - with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb', - MD5)) as tarf: + with tarfile.open(paddle.dataset.common.download(URL, 'imdb', MD5)) as tarf: # Note that we should use tarfile.next(), which does # sequential access of member files, other than # tarfile.extractfile, which does random access and might @@ -136,7 +135,7 @@ def word_dict(): def fetch(): - paddle.v2.dataset.common.download(URL, 'imdb', MD5) + paddle.dataset.common.download(URL, 'imdb', MD5) def convert(path): @@ -144,5 +143,5 @@ def convert(path): Converts dataset to recordio format """ w = word_dict() - paddle.v2.dataset.common.convert(path, lambda: train(w), 1000, "imdb_train") - paddle.v2.dataset.common.convert(path, lambda: test(w), 1000, "imdb_test") + paddle.dataset.common.convert(path, lambda: train(w), 1000, "imdb_train") + paddle.dataset.common.convert(path, lambda: test(w), 1000, "imdb_test") diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/dataset/imikolov.py similarity index 86% rename from python/paddle/v2/dataset/imikolov.py rename to python/paddle/dataset/imikolov.py index 617c722c4165cdfed9e650fc968d623ef6ed4391..c6c0a0f54373dd068b2c493f6fbc9c8578593aef 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/dataset/imikolov.py @@ -18,7 +18,7 @@ This module will download dataset from http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set into paddle reader creators. """ -import paddle.v2.dataset.common +import paddle.dataset.common import collections import tarfile @@ -54,9 +54,9 @@ def build_dict(min_word_freq=50): train_filename = './simple-examples/data/ptb.train.txt' test_filename = './simple-examples/data/ptb.valid.txt' with tarfile.open( - paddle.v2.dataset.common.download( - paddle.v2.dataset.imikolov.URL, 'imikolov', - paddle.v2.dataset.imikolov.MD5)) as tf: + paddle.dataset.common.download(paddle.dataset.imikolov.URL, + 'imikolov', + paddle.dataset.imikolov.MD5)) as tf: trainf = tf.extractfile(train_filename) testf = tf.extractfile(test_filename) word_freq = word_count(testf, word_count(trainf)) @@ -77,9 +77,9 @@ def build_dict(min_word_freq=50): def reader_creator(filename, word_idx, n, data_type): def reader(): with tarfile.open( - paddle.v2.dataset.common.download( - paddle.v2.dataset.imikolov.URL, 'imikolov', - paddle.v2.dataset.imikolov.MD5)) as tf: + paddle.dataset.common.download( + paddle.dataset.imikolov.URL, 'imikolov', + paddle.dataset.imikolov.MD5)) as tf: f = tf.extractfile(filename) UNK = word_idx[''] @@ -145,7 +145,7 @@ def test(word_idx, n, data_type=DataType.NGRAM): def fetch(): - paddle.v2.dataset.common.download(URL, "imikolov", MD5) + paddle.dataset.common.download(URL, "imikolov", MD5) def convert(path): @@ -154,8 +154,7 @@ def convert(path): """ N = 5 word_dict = build_dict() - paddle.v2.dataset.common.convert(path, - train(word_dict, N), 1000, - "imikolov_train") - paddle.v2.dataset.common.convert(path, - test(word_dict, N), 1000, "imikolov_test") + paddle.dataset.common.convert(path, + train(word_dict, N), 1000, "imikolov_train") + paddle.dataset.common.convert(path, + test(word_dict, N), 1000, "imikolov_test") diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/dataset/mnist.py similarity index 76% rename from python/paddle/v2/dataset/mnist.py rename to python/paddle/dataset/mnist.py index 9f675bed895223e054cd3bb6e504fe1607f19858..6a1b8b5fac223c0d134cae69a61a0c2c00bc1feb 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/dataset/mnist.py @@ -17,7 +17,7 @@ MNIST dataset. This module will download dataset from http://yann.lecun.com/exdb/mnist/ and parse training set and test set into paddle reader creators. """ -import paddle.v2.dataset.common +import paddle.dataset.common import subprocess import numpy import platform @@ -85,10 +85,10 @@ def train(): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', - TRAIN_IMAGE_MD5), - paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', - TRAIN_LABEL_MD5), 100) + paddle.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', + TRAIN_IMAGE_MD5), + paddle.dataset.common.download(TRAIN_LABEL_URL, 'mnist', + TRAIN_LABEL_MD5), 100) def test(): @@ -102,22 +102,21 @@ def test(): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', - TEST_IMAGE_MD5), - paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', - TEST_LABEL_MD5), 100) + paddle.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5), + paddle.dataset.common.download(TEST_LABEL_URL, 'mnist', TEST_LABEL_MD5), + 100) def fetch(): - paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5) - paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) - paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) - paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) + paddle.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5) + paddle.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) + paddle.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) + paddle.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train(), 1000, "minist_train") - paddle.v2.dataset.common.convert(path, test(), 1000, "minist_test") + paddle.dataset.common.convert(path, train(), 1000, "minist_train") + paddle.dataset.common.convert(path, test(), 1000, "minist_test") diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/dataset/movielens.py similarity index 95% rename from python/paddle/v2/dataset/movielens.py rename to python/paddle/dataset/movielens.py index 5b61a9420af1bb81e1d826f8a7b69f34c306d382..ab11716202a8298c182e23b661eb1d2ac74bf4da 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/dataset/movielens.py @@ -23,7 +23,7 @@ set and test set into paddle reader creators. """ import zipfile -import paddle.v2.dataset.common +import paddle.dataset.common import re import random import functools @@ -100,7 +100,7 @@ USER_INFO = None def __initialize_meta_info__(): - fn = paddle.v2.dataset.common.download(URL, "movielens", MD5) + fn = paddle.dataset.common.download(URL, "movielens", MD5) global MOVIE_INFO if MOVIE_INFO is None: pattern = re.compile(r'^(.*)\((\d+)\)$') @@ -247,15 +247,15 @@ def unittest(): def fetch(): - paddle.v2.dataset.common.download(URL, "movielens", MD5) + paddle.dataset.common.download(URL, "movielens", MD5) def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train(), 1000, "movielens_train") - paddle.v2.dataset.common.convert(path, test(), 1000, "movielens_test") + paddle.dataset.common.convert(path, train(), 1000, "movielens_train") + paddle.dataset.common.convert(path, test(), 1000, "movielens_test") if __name__ == '__main__': diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/dataset/mq2007.py similarity index 100% rename from python/paddle/v2/dataset/mq2007.py rename to python/paddle/dataset/mq2007.py diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/dataset/sentiment.py similarity index 87% rename from python/paddle/v2/dataset/sentiment.py rename to python/paddle/dataset/sentiment.py index b0b9757c1a75d215cf8945b5cedbb1239fd43af7..f5461164fe6b816356e42fc7b7dcf388eccfadfb 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/dataset/sentiment.py @@ -26,7 +26,7 @@ from itertools import chain import nltk from nltk.corpus import movie_reviews -import paddle.v2.dataset.common +import paddle.dataset.common __all__ = ['train', 'test', 'get_word_dict', 'convert'] NUM_TRAINING_INSTANCES = 1600 @@ -39,13 +39,13 @@ def download_data_if_not_yet(): """ try: # make sure that nltk can find the data - if paddle.v2.dataset.common.DATA_HOME not in nltk.data.path: - nltk.data.path.append(paddle.v2.dataset.common.DATA_HOME) + if paddle.dataset.common.DATA_HOME not in nltk.data.path: + nltk.data.path.append(paddle.dataset.common.DATA_HOME) movie_reviews.categories() except LookupError: print "Downloading movie_reviews data set, please wait....." nltk.download( - 'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME) + 'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME) print "Download data set success....." print "Path is " + nltk.data.find('corpora/movie_reviews').path @@ -129,13 +129,12 @@ def test(): def fetch(): - nltk.download( - 'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME) + nltk.download('movie_reviews', download_dir=paddle.dataset.common.DATA_HOME) def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train, 1000, "sentiment_train") - paddle.v2.dataset.common.convert(path, test, 1000, "sentiment_test") + paddle.dataset.common.convert(path, train, 1000, "sentiment_train") + paddle.dataset.common.convert(path, test, 1000, "sentiment_test") diff --git a/python/paddle/dataset/tests/CMakeLists.txt b/python/paddle/dataset/tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..485c38a13b573664d8033c237272a10ebb7c9701 --- /dev/null +++ b/python/paddle/dataset/tests/CMakeLists.txt @@ -0,0 +1 @@ +py_test(test_image SRCS test_image.py) diff --git a/python/paddle/v2/tests/cat.jpg b/python/paddle/dataset/tests/cat.jpg similarity index 100% rename from python/paddle/v2/tests/cat.jpg rename to python/paddle/dataset/tests/cat.jpg diff --git a/python/paddle/v2/dataset/tests/cifar_test.py b/python/paddle/dataset/tests/cifar_test.py similarity index 88% rename from python/paddle/v2/dataset/tests/cifar_test.py rename to python/paddle/dataset/tests/cifar_test.py index e0e18229da7818be5752ee592e094a00da286ad9..839125b09dd5c6432e3572374a7345a77a43f7cf 100644 --- a/python/paddle/v2/dataset/tests/cifar_test.py +++ b/python/paddle/dataset/tests/cifar_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.cifar +import paddle.dataset.cifar import unittest @@ -29,25 +29,25 @@ class TestCIFAR(unittest.TestCase): def test_test10(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.cifar.test10()) + paddle.dataset.cifar.test10()) self.assertEqual(instances, 10000) self.assertEqual(max_label_value, 9) def test_train10(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.cifar.train10()) + paddle.dataset.cifar.train10()) self.assertEqual(instances, 50000) self.assertEqual(max_label_value, 9) def test_test100(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.cifar.test100()) + paddle.dataset.cifar.test100()) self.assertEqual(instances, 10000) self.assertEqual(max_label_value, 99) def test_train100(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.cifar.train100()) + paddle.dataset.cifar.train100()) self.assertEqual(instances, 50000) self.assertEqual(max_label_value, 99) diff --git a/python/paddle/v2/dataset/tests/common_test.py b/python/paddle/dataset/tests/common_test.py similarity index 81% rename from python/paddle/v2/dataset/tests/common_test.py rename to python/paddle/dataset/tests/common_test.py index cfa194eba38ea70311c4deeac2635dc0a0103576..e7cc02aa83061599ffefa18de6cb02ac0fc9e9b7 100644 --- a/python/paddle/v2/dataset/tests/common_test.py +++ b/python/paddle/dataset/tests/common_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.common +import paddle.dataset.common import unittest import tempfile import glob @@ -24,14 +24,14 @@ class TestCommon(unittest.TestCase): with open(temp_path, 'w') as f: f.write("Hello\n") self.assertEqual('09f7e02f1290be211da707a266f153b3', - paddle.v2.dataset.common.md5file(temp_path)) + paddle.dataset.common.md5file(temp_path)) def test_download(self): yi_avatar = 'https://avatars0.githubusercontent.com/u/1548775?v=3&s=460' self.assertEqual( - paddle.v2.dataset.common.DATA_HOME + '/test/1548775?v=3&s=460', - paddle.v2.dataset.common.download( - yi_avatar, 'test', 'f75287202d6622414c706c36c16f8e0d')) + paddle.dataset.common.DATA_HOME + '/test/1548775?v=3&s=460', + paddle.dataset.common.download(yi_avatar, 'test', + 'f75287202d6622414c706c36c16f8e0d')) def test_split(self): def test_reader(): @@ -42,7 +42,7 @@ class TestCommon(unittest.TestCase): return reader _, temp_path = tempfile.mkstemp() - paddle.v2.dataset.common.split( + paddle.dataset.common.split( test_reader(), 4, suffix=temp_path + '/test-%05d.pickle') files = glob.glob(temp_path + '/test-%05d.pickle') self.assertEqual(len(files), 3) @@ -52,7 +52,7 @@ class TestCommon(unittest.TestCase): for x in xrange(5): with open(temp_path + '/%05d.test' % x) as f: f.write('%d\n' % x) - reader = paddle.v2.dataset.common.cluster_files_reader( + reader = paddle.dataset.common.cluster_files_reader( temp_path + '/*.test', 5, 0) for idx, e in enumerate(reader()): self.assertEqual(e, str("0")) @@ -69,9 +69,9 @@ class TestCommon(unittest.TestCase): return reader path = tempfile.mkdtemp() - paddle.v2.dataset.common.convert(path, - test_reader(), num_shards, - 'random_images') + paddle.dataset.common.convert(path, + test_reader(), num_shards, + 'random_images') files = glob.glob(path + '/random_images-*') self.assertEqual(len(files), num_shards) diff --git a/python/paddle/v2/dataset/tests/flowers_test.py b/python/paddle/dataset/tests/flowers_test.py similarity index 89% rename from python/paddle/v2/dataset/tests/flowers_test.py rename to python/paddle/dataset/tests/flowers_test.py index a8ae9a07acc22eb9d3c0cc5ebb07f8f11ed21233..06260fd796ce0271b7cec2f42a8a5a255a02dc24 100644 --- a/python/paddle/v2/dataset/tests/flowers_test.py +++ b/python/paddle/dataset/tests/flowers_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.flowers +import paddle.dataset.flowers import unittest @@ -30,19 +30,19 @@ class TestFlowers(unittest.TestCase): def test_train(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.flowers.train()) + paddle.dataset.flowers.train()) self.assertEqual(instances, 6149) self.assertEqual(max_label_value, 102) def test_test(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.flowers.test()) + paddle.dataset.flowers.test()) self.assertEqual(instances, 1020) self.assertEqual(max_label_value, 102) def test_valid(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.flowers.valid()) + paddle.dataset.flowers.valid()) self.assertEqual(instances, 1020) self.assertEqual(max_label_value, 102) diff --git a/python/paddle/v2/dataset/tests/imdb_test.py b/python/paddle/dataset/tests/imdb_test.py similarity index 77% rename from python/paddle/v2/dataset/tests/imdb_test.py rename to python/paddle/dataset/tests/imdb_test.py index c4d82f26895d77d05c6e936bd636b1239e1a0cd8..539da049449cd273db0a9e260851ed40e1be0f04 100644 --- a/python/paddle/v2/dataset/tests/imdb_test.py +++ b/python/paddle/dataset/tests/imdb_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.imdb +import paddle.dataset.imdb import unittest import re @@ -30,15 +30,13 @@ class TestIMDB(unittest.TestCase): def test_build_dict(self): if self.word_idx == None: - self.word_idx = paddle.v2.dataset.imdb.build_dict(TRAIN_PATTERN, - 150) + self.word_idx = paddle.dataset.imdb.build_dict(TRAIN_PATTERN, 150) self.assertEqual(len(self.word_idx), 7036) def check_dataset(self, dataset, expected_size): if self.word_idx == None: - self.word_idx = paddle.v2.dataset.imdb.build_dict(TRAIN_PATTERN, - 150) + self.word_idx = paddle.dataset.imdb.build_dict(TRAIN_PATTERN, 150) sum = 0 for l in dataset(self.word_idx): @@ -47,10 +45,10 @@ class TestIMDB(unittest.TestCase): self.assertEqual(sum, expected_size) def test_train(self): - self.check_dataset(paddle.v2.dataset.imdb.train, 25000) + self.check_dataset(paddle.dataset.imdb.train, 25000) def test_test(self): - self.check_dataset(paddle.v2.dataset.imdb.test, 25000) + self.check_dataset(paddle.dataset.imdb.test, 25000) if __name__ == '__main__': diff --git a/python/paddle/v2/dataset/tests/imikolov_test.py b/python/paddle/dataset/tests/imikolov_test.py similarity index 79% rename from python/paddle/v2/dataset/tests/imikolov_test.py rename to python/paddle/dataset/tests/imikolov_test.py index 714a75d6f1ff31697eec2d893d350a726d6390fe..233fd9fc8cea4cd0b5cd052580030fc8c993693c 100644 --- a/python/paddle/v2/dataset/tests/imikolov_test.py +++ b/python/paddle/dataset/tests/imikolov_test.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.imikolov +import paddle.dataset.imikolov import unittest -WORD_DICT = paddle.v2.dataset.imikolov.build_dict() +WORD_DICT = paddle.dataset.imikolov.build_dict() class TestMikolov(unittest.TestCase): @@ -25,7 +25,7 @@ class TestMikolov(unittest.TestCase): def test_train(self): n = 5 - self.check_reader(paddle.v2.dataset.imikolov.train(WORD_DICT, n), n) + self.check_reader(paddle.dataset.imikolov.train(WORD_DICT, n), n) first_line = 'aer banknote berlitz calloway centrust cluett fromstein '\ 'gitano guterman hydro-quebec ipo kia memotec mlx nahb punts '\ @@ -34,16 +34,16 @@ class TestMikolov(unittest.TestCase): WORD_DICT.get(ch, WORD_DICT['']) for ch in first_line.split(' ') ] - for l in paddle.v2.dataset.imikolov.train( + for l in paddle.dataset.imikolov.train( WORD_DICT, n=-1, - data_type=paddle.v2.dataset.imikolov.DataType.SEQ)(): + data_type=paddle.dataset.imikolov.DataType.SEQ)(): read_line = l[0][1:] break self.assertEqual(first_line, read_line) def test_test(self): n = 5 - self.check_reader(paddle.v2.dataset.imikolov.test(WORD_DICT, n), n) + self.check_reader(paddle.dataset.imikolov.test(WORD_DICT, n), n) first_line = 'consumers may want to move their telephones a little '\ 'closer to the tv set' @@ -51,9 +51,9 @@ class TestMikolov(unittest.TestCase): WORD_DICT.get(ch, WORD_DICT['']) for ch in first_line.split(' ') ] - for l in paddle.v2.dataset.imikolov.test( + for l in paddle.dataset.imikolov.test( WORD_DICT, n=-1, - data_type=paddle.v2.dataset.imikolov.DataType.SEQ)(): + data_type=paddle.dataset.imikolov.DataType.SEQ)(): read_line = l[0][1:] break self.assertEqual(first_line, read_line) diff --git a/python/paddle/v2/dataset/tests/mnist_test.py b/python/paddle/dataset/tests/mnist_test.py similarity index 91% rename from python/paddle/v2/dataset/tests/mnist_test.py rename to python/paddle/dataset/tests/mnist_test.py index 1d344cac3e7483a351033570fbec75a4d19f4a55..8ada19d3f2ee13e194d08e19a4b86b558c69a0a7 100644 --- a/python/paddle/v2/dataset/tests/mnist_test.py +++ b/python/paddle/dataset/tests/mnist_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.mnist +import paddle.dataset.mnist import unittest @@ -29,13 +29,13 @@ class TestMNIST(unittest.TestCase): def test_train(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.mnist.train()) + paddle.dataset.mnist.train()) self.assertEqual(instances, 60000) self.assertEqual(max_label_value, 9) def test_test(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.mnist.test()) + paddle.dataset.mnist.test()) self.assertEqual(instances, 10000) self.assertEqual(max_label_value, 9) diff --git a/python/paddle/v2/dataset/tests/mq2007_test.py b/python/paddle/dataset/tests/mq2007_test.py similarity index 85% rename from python/paddle/v2/dataset/tests/mq2007_test.py rename to python/paddle/dataset/tests/mq2007_test.py index 59847b6c18eadb12123cae824e8bce1051a69d4c..fba388724a8e84591df7150b41f8ea39a850fc31 100644 --- a/python/paddle/v2/dataset/tests/mq2007_test.py +++ b/python/paddle/dataset/tests/mq2007_test.py @@ -12,19 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.mq2007 +import paddle.dataset.mq2007 import unittest class TestMQ2007(unittest.TestCase): def test_pairwise(self): - for label, query_left, query_right in paddle.v2.dataset.mq2007.test( + for label, query_left, query_right in paddle.dataset.mq2007.test( format="pairwise"): self.assertEqual(query_left.shape(), (46, )) self.assertEqual(query_right.shape(), (46, )) def test_listwise(self): - for label_array, query_array in paddle.v2.dataset.mq2007.test( + for label_array, query_array in paddle.dataset.mq2007.test( format="listwise"): self.assertEqual(len(label_array), len(query_array)) diff --git a/python/paddle/v2/tests/test_image.py b/python/paddle/dataset/tests/test_image.py similarity index 97% rename from python/paddle/v2/tests/test_image.py rename to python/paddle/dataset/tests/test_image.py index c78bbdc40a25878b21ba7e678afedf9d8f0a87cf..8bd56607ae1998935a3b3aaa0e3279515c2a540c 100644 --- a/python/paddle/v2/tests/test_image.py +++ b/python/paddle/dataset/tests/test_image.py @@ -15,7 +15,7 @@ import unittest import numpy as np -import paddle.v2.image as image +import paddle.dataset.image as image class Image(unittest.TestCase): diff --git a/python/paddle/v2/dataset/tests/test_sentiment.py b/python/paddle/dataset/tests/test_sentiment.py similarity index 97% rename from python/paddle/v2/dataset/tests/test_sentiment.py rename to python/paddle/dataset/tests/test_sentiment.py index 407405290734609059c1767600748d530e8a13a6..543f4b7378b583ea3857bf785cf330c43e535c2a 100644 --- a/python/paddle/v2/dataset/tests/test_sentiment.py +++ b/python/paddle/dataset/tests/test_sentiment.py @@ -17,7 +17,7 @@ import unittest import nltk -import paddle.v2.dataset.sentiment as st +import paddle.dataset.sentiment as st from nltk.corpus import movie_reviews diff --git a/python/paddle/v2/dataset/tests/voc2012_test.py b/python/paddle/dataset/tests/voc2012_test.py similarity index 82% rename from python/paddle/v2/dataset/tests/voc2012_test.py rename to python/paddle/dataset/tests/voc2012_test.py index 31e72ebf5eac0508d12783f9ceaa6eef0fa6d353..0d285461a8ae8a9cc69fbec0dcf5efc106b594f0 100644 --- a/python/paddle/v2/dataset/tests/voc2012_test.py +++ b/python/paddle/dataset/tests/voc2012_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.voc2012 +import paddle.dataset.voc2012 import unittest @@ -26,15 +26,15 @@ class TestVOC(unittest.TestCase): return sum def test_train(self): - count = self.check_reader(paddle.v2.dataset.voc_seg.train()) + count = self.check_reader(paddle.dataset.voc_seg.train()) self.assertEqual(count, 2913) def test_test(self): - count = self.check_reader(paddle.v2.dataset.voc_seg.test()) + count = self.check_reader(paddle.dataset.voc_seg.test()) self.assertEqual(count, 1464) def test_val(self): - count = self.check_reader(paddle.v2.dataset.voc_seg.val()) + count = self.check_reader(paddle.dataset.voc_seg.val()) self.assertEqual(count, 1449) diff --git a/python/paddle/v2/dataset/tests/wmt16_test.py b/python/paddle/dataset/tests/wmt16_test.py similarity index 89% rename from python/paddle/v2/dataset/tests/wmt16_test.py rename to python/paddle/dataset/tests/wmt16_test.py index cef6c3216e7de8d9785a063976e63f88d90b24df..8b949d8bf5212d51016a33da322095bde2038200 100644 --- a/python/paddle/v2/dataset/tests/wmt16_test.py +++ b/python/paddle/dataset/tests/wmt16_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.wmt16 +import paddle.dataset.wmt16 import unittest @@ -34,28 +34,28 @@ class TestWMT16(unittest.TestCase): def test_train(self): for idx, sample in enumerate( - paddle.v2.dataset.wmt16.train( + paddle.dataset.wmt16.train( src_dict_size=100000, trg_dict_size=100000)()): if idx >= 10: break self.checkout_one_sample(sample) def test_test(self): for idx, sample in enumerate( - paddle.v2.dataset.wmt16.test( + paddle.dataset.wmt16.test( src_dict_size=1000, trg_dict_size=1000)()): if idx >= 10: break self.checkout_one_sample(sample) def test_val(self): for idx, sample in enumerate( - paddle.v2.dataset.wmt16.validation( + paddle.dataset.wmt16.validation( src_dict_size=1000, trg_dict_size=1000)()): if idx >= 10: break self.checkout_one_sample(sample) def test_get_dict(self): dict_size = 1000 - word_dict = paddle.v2.dataset.wmt16.get_dict("en", dict_size, True) + word_dict = paddle.dataset.wmt16.get_dict("en", dict_size, True) self.assertEqual(len(word_dict), dict_size) self.assertEqual(word_dict[0], "") self.assertEqual(word_dict[1], "") diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/dataset/uci_housing.py similarity index 82% rename from python/paddle/v2/dataset/uci_housing.py rename to python/paddle/dataset/uci_housing.py index f10bf7e42a1ead09b3eba0d61e55701215e4360f..6a56e9d5563c76ab6f524ccea9191693dc227010 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/dataset/uci_housing.py @@ -21,8 +21,7 @@ parse training set and test set into paddle reader creators. import numpy as np import os -import paddle.v2.dataset.common -from paddle.v2.parameters import Parameters +import paddle.dataset.common __all__ = ['train', 'test'] @@ -85,7 +84,7 @@ def train(): :rtype: callable """ global UCI_TRAIN_DATA - load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)) + load_data(paddle.dataset.common.download(URL, 'uci_housing', MD5)) def reader(): for d in UCI_TRAIN_DATA: @@ -105,7 +104,7 @@ def test(): :rtype: callable """ global UCI_TEST_DATA - load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)) + load_data(paddle.dataset.common.download(URL, 'uci_housing', MD5)) def reader(): for d in UCI_TEST_DATA: @@ -114,21 +113,13 @@ def test(): return reader -def model(): - tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'fit_a_line.tar', - MD5_MODEL) - with open(tar_file, 'r') as f: - parameters = Parameters.from_tar(f) - return parameters - - def fetch(): - paddle.v2.dataset.common.download(URL, 'uci_housing', MD5) + paddle.dataset.common.download(URL, 'uci_housing', MD5) def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train(), 1000, "uci_housing_train") - paddle.v2.dataset.common.convert(path, test(), 1000, "uci_houseing_test") + paddle.dataset.common.convert(path, train(), 1000, "uci_housing_train") + paddle.dataset.common.convert(path, test(), 1000, "uci_houseing_test") diff --git a/python/paddle/v2/dataset/voc2012.py b/python/paddle/dataset/voc2012.py similarity index 97% rename from python/paddle/v2/dataset/voc2012.py rename to python/paddle/dataset/voc2012.py index 617e212d67fbe37f9d9663e9c83c62045411fa77..9c945574dbcc15f5cee370206ed7e70ba8ab5014 100644 --- a/python/paddle/v2/dataset/voc2012.py +++ b/python/paddle/dataset/voc2012.py @@ -22,8 +22,8 @@ with segmentation has been increased from 7,062 to 9,993. import tarfile import io import numpy as np -from paddle.v2.dataset.common import download -from paddle.v2.image import * +from paddle.dataset.common import download +from paddle.dataset.image import * from PIL import Image __all__ = ['train', 'test', 'val'] diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/dataset/wmt14.py similarity index 84% rename from python/paddle/v2/dataset/wmt14.py rename to python/paddle/dataset/wmt14.py index 5104e29051e4480f3a7eb18421f1b519841b009b..f0908c737874fa7335cca5b5f0cba83190c9f90f 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/dataset/wmt14.py @@ -22,8 +22,7 @@ parse training set and test set into paddle reader creators. import tarfile import gzip -import paddle.v2.dataset.common -from paddle.v2.parameters import Parameters +import paddle.dataset.common __all__ = [ 'train', @@ -123,7 +122,7 @@ def train(dict_size): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size) @@ -139,27 +138,20 @@ def test(dict_size): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size) def gen(dict_size): return reader_creator( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'gen/gen', dict_size) -def model(): - tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) - with gzip.open(tar_file, 'r') as f: - parameters = Parameters.from_tar(f) - return parameters - - def get_dict(dict_size, reverse=True): # if reverse = False, return dict = {'a':'001', 'b':'002', ...} # else reverse = true, return dict = {'001':'a', '002':'b', ...} - tar_file = paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) + tar_file = paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) src_dict, trg_dict = __read_to_dict(tar_file, dict_size) if reverse: src_dict = {v: k for k, v in src_dict.items()} @@ -168,8 +160,8 @@ def get_dict(dict_size, reverse=True): def fetch(): - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) - paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) + paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) + paddle.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) def convert(path): @@ -177,6 +169,5 @@ def convert(path): Converts dataset to recordio format """ dict_size = 30000 - paddle.v2.dataset.common.convert(path, - train(dict_size), 1000, "wmt14_train") - paddle.v2.dataset.common.convert(path, test(dict_size), 1000, "wmt14_test") + paddle.dataset.common.convert(path, train(dict_size), 1000, "wmt14_train") + paddle.dataset.common.convert(path, test(dict_size), 1000, "wmt14_test") diff --git a/python/paddle/v2/dataset/wmt16.py b/python/paddle/dataset/wmt16.py similarity index 94% rename from python/paddle/v2/dataset/wmt16.py rename to python/paddle/dataset/wmt16.py index c8818f715beadd9499ae588f2c19a57fbf26f372..ad23338a96df6856c7e15cb5e3bb713021a55bf0 100644 --- a/python/paddle/v2/dataset/wmt16.py +++ b/python/paddle/dataset/wmt16.py @@ -33,7 +33,7 @@ import tarfile import gzip from collections import defaultdict -import paddle.v2.dataset.common +import paddle.dataset.common __all__ = [ "train", @@ -76,7 +76,7 @@ def __build_dict(tar_file, dict_size, save_path, lang): def __load_dict(tar_file, dict_size, lang, reverse=False): - dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME, + dict_path = os.path.join(paddle.dataset.common.DATA_HOME, "wmt16/%s_%d.dict" % (lang, dict_size)) if not os.path.exists(dict_path) or ( len(open(dict_path, "r").readlines()) != dict_size): @@ -178,8 +178,8 @@ def train(src_dict_size, trg_dict_size, src_lang="en"): src_lang) return reader_creator( - tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, - "wmt16.tar.gz"), + tar_file=paddle.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, + "wmt16.tar.gz"), file_name="wmt16/train", src_dict_size=src_dict_size, trg_dict_size=trg_dict_size, @@ -227,8 +227,8 @@ def test(src_dict_size, trg_dict_size, src_lang="en"): src_lang) return reader_creator( - tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, - "wmt16.tar.gz"), + tar_file=paddle.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, + "wmt16.tar.gz"), file_name="wmt16/test", src_dict_size=src_dict_size, trg_dict_size=trg_dict_size, @@ -274,8 +274,8 @@ def validation(src_dict_size, trg_dict_size, src_lang="en"): src_lang) return reader_creator( - tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, - "wmt16.tar.gz"), + tar_file=paddle.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, + "wmt16.tar.gz"), file_name="wmt16/val", src_dict_size=src_dict_size, trg_dict_size=trg_dict_size, @@ -303,12 +303,12 @@ def get_dict(lang, dict_size, reverse=False): if lang == "en": dict_size = min(dict_size, TOTAL_EN_WORDS) else: dict_size = min(dict_size, TOTAL_DE_WORDS) - dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME, + dict_path = os.path.join(paddle.dataset.common.DATA_HOME, "wmt16/%s_%d.dict" % (lang, dict_size)) assert os.path.exists(dict_path), "Word dictionary does not exist. " "Please invoke paddle.dataset.wmt16.train/test/validation first " "to build the dictionary." - tar_file = os.path.join(paddle.v2.dataset.common.DATA_HOME, "wmt16.tar.gz") + tar_file = os.path.join(paddle.dataset.common.DATA_HOME, "wmt16.tar.gz") return __load_dict(tar_file, dict_size, lang, reverse) @@ -323,7 +323,7 @@ def convert(path, src_dict_size, trg_dict_size, src_lang): """Converts dataset to recordio format. """ - paddle.v2.dataset.common.convert( + paddle.dataset.common.convert( path, train( src_dict_size=src_dict_size, @@ -331,7 +331,7 @@ def convert(path, src_dict_size, trg_dict_size, src_lang): src_lang=src_lang), 1000, "wmt16_train") - paddle.v2.dataset.common.convert( + paddle.dataset.common.convert( path, test( src_dict_size=src_dict_size, @@ -339,7 +339,7 @@ def convert(path, src_dict_size, trg_dict_size, src_lang): src_lang=src_lang), 1000, "wmt16_test") - paddle.v2.dataset.common.convert( + paddle.dataset.common.convert( path, validation( src_dict_size=src_dict_size, diff --git a/python/paddle/fluid/tests/book/notest_rnn_encoder_decoer.py b/python/paddle/fluid/tests/book/notest_rnn_encoder_decoer.py index 983f8f4dbeac83566839de25ec9765eb248be768..ce640dece8a5067bd10f410a2bb58874b7cc0908 100644 --- a/python/paddle/fluid/tests/book/notest_rnn_encoder_decoer.py +++ b/python/paddle/fluid/tests/book/notest_rnn_encoder_decoer.py @@ -13,7 +13,7 @@ # limitations under the License. import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.framework as framework diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index 93ef66851b0efd65361122853dadeefe11992ed5..6dfc2997ae0328a41fe22d13dfa8fc51d4d021a6 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import contextlib import numpy diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py index b01c1875d64d7fc14e0141672f7e8eab2b6a0394..e8bb082be196b6342b1719235f1264bbe3d776ac 100644 --- a/python/paddle/fluid/tests/book/test_image_classification.py +++ b/python/paddle/fluid/tests/book/test_image_classification.py @@ -14,7 +14,7 @@ from __future__ import print_function -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import contextlib import math diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index f488527e0bc69059bc44422aa28188441f3d5b54..c0a6df831acbfe2654a5941cf95c91343992ef13 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -15,8 +15,8 @@ import math import numpy as np -import paddle.v2 as paddle -import paddle.v2.dataset.conll05 as conll05 +import paddle +import paddle.dataset.conll05 as conll05 import paddle.fluid as fluid from paddle.fluid.initializer import init_on_cpu import contextlib diff --git a/python/paddle/fluid/tests/book/test_machine_translation.py b/python/paddle/fluid/tests/book/test_machine_translation.py index 3a1a0859ecfd4ac5337e2112f8b22e32d8474f22..830d78df8b9e56b45f7e928562ef4b89e88f696d 100644 --- a/python/paddle/fluid/tests/book/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/test_machine_translation.py @@ -14,7 +14,7 @@ import contextlib import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import paddle.fluid.framework as framework import paddle.fluid.layers as pd diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py index e85b97a7f430b6d752baa179f27a7d15bc4d9a81..e4997b4069f60ff4382b4254bc026ae8ae29b345 100644 --- a/python/paddle/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/fluid/tests/book/test_recognize_digits.py @@ -14,7 +14,7 @@ from __future__ import print_function import argparse import paddle.fluid as fluid -import paddle.v2 as paddle +import paddle import sys import numpy import unittest diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 2ce66d32c993672793b0db213267d1f80b5c49dd..2172c275b8082689a6ff5f2c3c27a2ff4e92275a 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -16,7 +16,7 @@ import math import sys import os import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import paddle.fluid.framework as framework import paddle.fluid.layers as layers diff --git a/python/paddle/fluid/tests/book/test_understand_sentiment.py b/python/paddle/fluid/tests/book/test_understand_sentiment.py index d2f3f7404697feb0768f873070b97aeb3ba0cd64..dedd153778d7ad9caeb5fa7090a980bc7f177dea 100644 --- a/python/paddle/fluid/tests/book/test_understand_sentiment.py +++ b/python/paddle/fluid/tests/book/test_understand_sentiment.py @@ -15,7 +15,7 @@ from __future__ import print_function import unittest import paddle.fluid as fluid -import paddle.v2 as paddle +import paddle import contextlib import math import numpy as np diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py index 26b97c3e254f54b83515436660e44d4908c98fbe..8929779de9448d036e1528b64330b37463ab3988 100644 --- a/python/paddle/fluid/tests/book/test_word2vec.py +++ b/python/paddle/fluid/tests/book/test_word2vec.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import unittest import os diff --git a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py index ad79e96b958b36a06c8a3cc990dbe3608e32c9ac..8818cf96fa8f08036f9e23aae786f67b5614b2b9 100644 --- a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py +++ b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py @@ -13,7 +13,7 @@ # limitations under the License. import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import math import sys diff --git a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py index 204669d7e6176e9e8250e8aebc2d10441fa24b67..dfebb9a06ea4f290f128c486dcaccaeccdcef8c4 100644 --- a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py +++ b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py @@ -16,7 +16,7 @@ from __future__ import print_function import sys -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import math import sys diff --git a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py index a24834a6f0b19d1265f6c8d7089d31583af82d1f..a1ca6d981fafb401985d03e9f2d63d1cb41b21b5 100644 --- a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py +++ b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py @@ -13,7 +13,7 @@ # limitations under the License. import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.framework as framework diff --git a/python/paddle/fluid/tests/demo/fc_gan.py b/python/paddle/fluid/tests/demo/fc_gan.py index 7452ea2a34aa0c75d8e0990639b29705033af98b..8ea1b2b15cc0c0eb5bca67a9c5a6ac6c6774e7e2 100644 --- a/python/paddle/fluid/tests/demo/fc_gan.py +++ b/python/paddle/fluid/tests/demo/fc_gan.py @@ -19,7 +19,7 @@ import os import matplotlib import numpy -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid matplotlib.use('Agg') diff --git a/python/paddle/fluid/tests/test_cpp_reader.py b/python/paddle/fluid/tests/test_cpp_reader.py index 4b0d039b7e05a55980946a8949e32802e9e57c20..e54c73b2956dd99ee57804318130c261e133d21a 100644 --- a/python/paddle/fluid/tests/test_cpp_reader.py +++ b/python/paddle/fluid/tests/test_cpp_reader.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import numpy as np import sys diff --git a/python/paddle/fluid/tests/test_error_clip.py b/python/paddle/fluid/tests/test_error_clip.py index b2fd5ae29c724da52df0a5d3cb56d2ec9e5530f3..89f4c64975802dc1827ec17ed3626b91e36d6971 100644 --- a/python/paddle/fluid/tests/test_error_clip.py +++ b/python/paddle/fluid/tests/test_error_clip.py @@ -14,7 +14,7 @@ from __future__ import print_function import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid BATCH_SIZE = 128 diff --git a/python/paddle/fluid/tests/test_gradient_clip.py b/python/paddle/fluid/tests/test_gradient_clip.py index 68b682f68b1fd147b821cfdb1e0866cf8aa04bff..d530601f13be6810a8a99b13c92faf584df568f9 100644 --- a/python/paddle/fluid/tests/test_gradient_clip.py +++ b/python/paddle/fluid/tests/test_gradient_clip.py @@ -13,7 +13,7 @@ # limitations under the License. import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid BATCH_SIZE = 128 diff --git a/python/paddle/fluid/tests/test_mnist_if_else_op.py b/python/paddle/fluid/tests/test_mnist_if_else_op.py index 94395f6cfb4648967558ed265e798e3505c20fc1..d34f52db5ffc889f17513d034ad2c99f696b0cdf 100644 --- a/python/paddle/fluid/tests/test_mnist_if_else_op.py +++ b/python/paddle/fluid/tests/test_mnist_if_else_op.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import paddle import paddle.fluid.layers as layers from paddle.fluid.framework import Program, program_guard, default_main_program, default_startup_program from paddle.fluid.executor import Executor from paddle.fluid.optimizer import MomentumOptimizer import paddle.fluid.core as core -import paddle.v2 as paddle import unittest import numpy as np diff --git a/python/paddle/fluid/tests/unittests/test_dyn_rnn.py b/python/paddle/fluid/tests/unittests/test_dyn_rnn.py index df7ab0d29bdfc9410cd7dd4a8f2a7cd440ef4aba..0faed94deb4808783027d776e0f4c61da0db457a 100644 --- a/python/paddle/fluid/tests/unittests/test_dyn_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_dyn_rnn.py @@ -13,7 +13,7 @@ # limitations under the License. import paddle.fluid as fluid -import paddle.v2 as paddle +import paddle import unittest import numpy diff --git a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py index b03a70f1b9e61162d37541ffeba8510fc11c605a..d3f63ee2c414a71309be8f0af6d3e5912078ecdb 100644 --- a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py +++ b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py @@ -13,7 +13,7 @@ # limitations under the License. import unittest -import paddle.v2 as paddle +import paddle import paddle.fluid.core as core import paddle.fluid as fluid from paddle.fluid.backward import append_backward diff --git a/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py b/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py index 8add353303e3626bbce68199a100306d4858766a..0b7a29075939a548320185947b5afa7261029d49 100644 --- a/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py +++ b/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py @@ -15,8 +15,8 @@ import unittest import paddle.fluid as fluid -import paddle.v2 as paddle -import paddle.v2.dataset.mnist as mnist +import paddle +import paddle.dataset.mnist as mnist class TestMultipleReader(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_multiple_reader.py b/python/paddle/fluid/tests/unittests/test_multiple_reader.py index 69f8acf81efaba8fc0f3df4cfe3a42dc4e477df2..a60a5d6c4af2b6b3652d0fe2089018b9403eee25 100644 --- a/python/paddle/fluid/tests/unittests/test_multiple_reader.py +++ b/python/paddle/fluid/tests/unittests/test_multiple_reader.py @@ -15,8 +15,8 @@ import unittest import paddle.fluid as fluid -import paddle.v2 as paddle -import paddle.v2.dataset.mnist as mnist +import paddle +import paddle.dataset.mnist as mnist from shutil import copyfile diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index bbfd03c638dac64de24c0b363f8342d8485f1223..95d0f9da47e97e94ff97eb3647ac5244d5409ca3 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -16,9 +16,9 @@ import numpy import unittest import paddle.fluid as fluid -import paddle.v2 as paddle -import paddle.v2.dataset.mnist as mnist -import paddle.v2.dataset.wmt16 as wmt16 +import paddle +import paddle.dataset.mnist as mnist +import paddle.dataset.wmt16 as wmt16 def simple_fc_net(): diff --git a/python/paddle/fluid/tests/unittests/test_recordio_reader.py b/python/paddle/fluid/tests/unittests/test_recordio_reader.py index 24a0074d9b9621d902d12eb8cb29d9b65be22ed3..640264d82f0dc7fa71bf882d5549e30b87b8d7c5 100644 --- a/python/paddle/fluid/tests/unittests/test_recordio_reader.py +++ b/python/paddle/fluid/tests/unittests/test_recordio_reader.py @@ -15,8 +15,8 @@ import unittest import paddle.fluid as fluid -import paddle.v2 as paddle -import paddle.v2.dataset.mnist as mnist +import paddle +import paddle.dataset.mnist as mnist class TestRecordIO(unittest.TestCase): diff --git a/python/paddle/v2/reader/__init__.py b/python/paddle/reader/__init__.py similarity index 100% rename from python/paddle/v2/reader/__init__.py rename to python/paddle/reader/__init__.py diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/reader/creator.py similarity index 62% rename from python/paddle/v2/reader/creator.py rename to python/paddle/reader/creator.py index fda5246d74f598200b439774a25e80ec3e504077..4c905d959fad4e8c1a8826ce8dc60c5fa834514d 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/reader/creator.py @@ -16,7 +16,7 @@ Creator package contains some simple reader creator, which could be used in user program. """ -__all__ = ['np_array', 'text_file', 'recordio', 'cloud_reader'] +__all__ = ['np_array', 'text_file', 'recordio'] def np_array(x): @@ -66,7 +66,7 @@ def recordio(paths, buf_size=100): """ import recordio as rec - import paddle.v2.reader.decorator as dec + import paddle.reader.decorator as dec import cPickle as pickle def reader(): @@ -83,48 +83,3 @@ def recordio(paths, buf_size=100): f.close() return dec.buffered(reader, buf_size) - - -pass_num = 0 - - -def cloud_reader(paths, etcd_endpoints, timeout_sec=5, buf_size=64): - """ - Create a data reader that yield a record one by one from - the paths: - :paths: path of recordio files, can be a string or a string list. - :etcd_endpoints: the endpoints for etcd cluster - :returns: data reader of recordio files. - - .. code-block:: python - from paddle.v2.reader.creator import cloud_reader - etcd_endpoints = "http://127.0.0.1:2379" - trainer.train.( - reader=cloud_reader(["/work/dataset/uci_housing/uci_housing*"], etcd_endpoints), - ) - """ - import os - import cPickle as pickle - import paddle.v2.master as master - c = master.client(etcd_endpoints, timeout_sec, buf_size) - - if isinstance(paths, basestring): - path = [paths] - else: - path = paths - c.set_dataset(path) - - def reader(): - global pass_num - c.paddle_start_get_records(pass_num) - pass_num += 1 - - while True: - r, e = c.next_record() - if not r: - if e != -2: - print "get record error: ", e - break - yield pickle.loads(r) - - return reader diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/reader/decorator.py similarity index 100% rename from python/paddle/v2/reader/decorator.py rename to python/paddle/reader/decorator.py diff --git a/python/paddle/v2/reader/tests/CMakeLists.txt b/python/paddle/reader/tests/CMakeLists.txt similarity index 100% rename from python/paddle/v2/reader/tests/CMakeLists.txt rename to python/paddle/reader/tests/CMakeLists.txt diff --git a/python/paddle/v2/reader/tests/__init__.py b/python/paddle/reader/tests/__init__.py similarity index 100% rename from python/paddle/v2/reader/tests/__init__.py rename to python/paddle/reader/tests/__init__.py diff --git a/python/paddle/v2/reader/tests/creator_test.py b/python/paddle/reader/tests/creator_test.py similarity index 92% rename from python/paddle/v2/reader/tests/creator_test.py rename to python/paddle/reader/tests/creator_test.py index 7fe374e663607607cd0839eb6ca9c70c4d15eef8..c4238c12a74759d52eb09f31ce1126cc93dd3489 100644 --- a/python/paddle/v2/reader/tests/creator_test.py +++ b/python/paddle/reader/tests/creator_test.py @@ -28,14 +28,14 @@ import os import unittest import numpy as np -import paddle.v2.reader.creator +import paddle.reader.creator class TestNumpyArray(unittest.TestCase): def test_numpy_array(self): l = [[1, 2, 3], [4, 5, 6]] x = np.array(l, np.int32) - reader = paddle.v2.reader.creator.np_array(x) + reader = paddle.reader.creator.np_array(x) for idx, e in enumerate(reader()): self.assertItemsEqual(e, l[idx]) @@ -43,14 +43,14 @@ class TestNumpyArray(unittest.TestCase): class TestTextFile(unittest.TestCase): def test_text_file(self): path = os.path.join(os.path.dirname(__file__), "test_data_creator.txt") - reader = paddle.v2.reader.creator.text_file(path) + reader = paddle.reader.creator.text_file(path) for idx, e in enumerate(reader()): self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1)) class TestRecordIO(unittest.TestCase): def do_test(self, path): - reader = paddle.v2.reader.creator.recordio(path) + reader = paddle.reader.creator.recordio(path) idx = 0 for e in reader(): if idx == 0: diff --git a/python/paddle/v2/reader/tests/decorator_test.py b/python/paddle/reader/tests/decorator_test.py similarity index 81% rename from python/paddle/v2/reader/tests/decorator_test.py rename to python/paddle/reader/tests/decorator_test.py index 6b680e39f3fb299a14e7d8162470996d1d16b83d..bee24d3b6579db5e99ec66931df201fdf9e1af07 100644 --- a/python/paddle/v2/reader/tests/decorator_test.py +++ b/python/paddle/reader/tests/decorator_test.py @@ -15,7 +15,7 @@ import time import unittest -import paddle.v2.reader +import paddle.reader def reader_creator_10(dur): @@ -39,7 +39,7 @@ class TestMap(unittest.TestCase): yield "h" yield "i" - r = paddle.v2.reader.map_readers(tokenize, read) + r = paddle.reader.map_readers(tokenize, read) for i, e in enumerate(r()): self.assertEqual(e, i) @@ -47,7 +47,7 @@ class TestMap(unittest.TestCase): class TestBuffered(unittest.TestCase): def test_read(self): for size in range(20): - b = paddle.v2.reader.buffered(reader_creator_10(0), size) + b = paddle.reader.buffered(reader_creator_10(0), size) c = 0 for i in b(): self.assertEqual(i, c) @@ -56,7 +56,7 @@ class TestBuffered(unittest.TestCase): def test_buffering(self): # read have 30ms delay. - b = paddle.v2.reader.buffered(reader_creator_10(0.03), 10) + b = paddle.reader.buffered(reader_creator_10(0.03), 10) last_time = time.time() for idx, i in enumerate(b()): elapsed_time = time.time() - last_time @@ -70,17 +70,17 @@ class TestBuffered(unittest.TestCase): class TestCompose(unittest.TestCase): def test_compse(self): - reader = paddle.v2.reader.compose( + reader = paddle.reader.compose( reader_creator_10(0), reader_creator_10(0)) for idx, e in enumerate(reader()): self.assertEqual(e, (idx, idx)) def test_compose_not_aligned(self): total = 0 - reader = paddle.v2.reader.compose( - paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)), + reader = paddle.reader.compose( + paddle.reader.chain(reader_creator_10(0), reader_creator_10(0)), reader_creator_10(0)) - with self.assertRaises(paddle.v2.reader.ComposeNotAligned): + with self.assertRaises(paddle.reader.ComposeNotAligned): for e in reader(): total += 1 # expecting 10, not 20 @@ -88,8 +88,8 @@ class TestCompose(unittest.TestCase): def test_compose_not_aligned_no_check(self): total = 0 - reader = paddle.v2.reader.compose( - paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)), + reader = paddle.reader.compose( + paddle.reader.chain(reader_creator_10(0), reader_creator_10(0)), reader_creator_10(0), check_alignment=False) for e in reader(): @@ -100,7 +100,7 @@ class TestCompose(unittest.TestCase): class TestChain(unittest.TestCase): def test_chain(self): - c = paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)) + c = paddle.reader.chain(reader_creator_10(0), reader_creator_10(0)) idx = 0 for e in c(): self.assertEqual(e, idx % 10) @@ -113,7 +113,7 @@ class TestShuffle(unittest.TestCase): case = [(0, True), (1, True), (10, False), (100, False)] a = reader_creator_10(0) for size, checkEq in case: - s = paddle.v2.reader.shuffle(a, size) + s = paddle.reader.shuffle(a, size) total = 0 for idx, e in enumerate(s()): if checkEq: @@ -133,9 +133,9 @@ class TestXmap(unittest.TestCase): for order in orders: for tNum in thread_nums: for size in buffered_size: - reader = paddle.v2.reader.xmap_readers(mapper, - reader_creator_10(0), - tNum, size, order) + reader = paddle.reader.xmap_readers(mapper, + reader_creator_10(0), + tNum, size, order) for n in xrange(3): result = [] for i in reader(): @@ -150,7 +150,7 @@ class TestPipeReader(unittest.TestCase): def test_pipe_reader(self): def example_reader(myfiles): for f in myfiles: - pr = paddle.v2.reader.PipeReader("cat %s" % f, bufsize=128) + pr = paddle.reader.PipeReader("cat %s" % f, bufsize=128) for l in pr.get_line(): yield l diff --git a/python/paddle/v2/reader/tests/test_data_creator.txt b/python/paddle/reader/tests/test_data_creator.txt similarity index 100% rename from python/paddle/v2/reader/tests/test_data_creator.txt rename to python/paddle/reader/tests/test_data_creator.txt diff --git a/python/paddle/v2/reader/tests/test_reader_recordio.dat b/python/paddle/reader/tests/test_reader_recordio.dat similarity index 100% rename from python/paddle/v2/reader/tests/test_reader_recordio.dat rename to python/paddle/reader/tests/test_reader_recordio.dat diff --git a/python/paddle/v2/reader/tests/test_recordio_creator.dat b/python/paddle/reader/tests/test_recordio_creator.dat similarity index 100% rename from python/paddle/v2/reader/tests/test_recordio_creator.dat rename to python/paddle/reader/tests/test_recordio_creator.dat diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index df710c33d0c0ca16d358dac1eb42327e9cd4c7ae..02b0d077eefa431baed05c421a367ebe3581626c 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -22,17 +22,13 @@ import data_type import topology import networks import evaluator -from . import dataset -from . import reader from . import plot import attr import op import pooling import inference import networks -import minibatch import plot -import image import paddle.trainer.config_parser as cp __all__ = [ @@ -48,14 +44,11 @@ __all__ = [ 'data_type', 'attr', 'pooling', - 'dataset', - 'reader', 'topology', 'networks', 'infer', 'plot', 'evaluator', - 'image', 'master', ] @@ -153,4 +146,3 @@ def init(**kwargs): infer = inference.infer -batch = minibatch.batch diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py index 52f5b947fdec55eea45b9d34eddd576c981fa97c..14b64742fd09bf6c197c5d1aa2354271293df239 100644 --- a/python/paddle/v2/inference.py +++ b/python/paddle/v2/inference.py @@ -15,7 +15,7 @@ import numpy import collections import topology -import minibatch +import paddle import cPickle __all__ = ['infer', 'Inference'] @@ -80,7 +80,7 @@ class Inference(object): for each_sample in input: yield each_sample - reader = minibatch.batch(__reader_impl__, batch_size=batch_size) + reader = paddle.batch(__reader_impl__, batch_size=batch_size) self.__gradient_machine__.start() for data_batch in reader(): diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 6a2bb8d337b7667aa2b1e3ef0815bb80f6e38d6a..a188a03eb3698c972de92c9807f1bdb71a249330 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -20,7 +20,7 @@ The primary usage shows below. .. code-block:: python - import paddle.v2 as paddle + import paddle img = paddle.layer.data(name='img', type=paddle.data_type.dense_vector(784)) hidden = paddle.layer.fc(input=img, size=200) diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt index b4333ed530ce464095ec38d72706949cc464fbe4..46e4feb8e1ce1d12f214f5c49b1b589a46110603 100644 --- a/python/paddle/v2/tests/CMakeLists.txt +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -1,5 +1,4 @@ py_test(test_op SRCS test_op.py) -py_test(test_image SRCS test_image.py) py_test(test_layer SRCS test_layer.py) py_test(test_topology SRCS test_topology.py) py_test(test_rnn_layer SRCS test_rnn_layer.py) diff --git a/python/paddle/v2/tests/test_paramconf_order.py b/python/paddle/v2/tests/test_paramconf_order.py index 264442be182ea69c95b39b3bdb4c389d52eff66e..8320217da2795da756cf12a80f39279182789eef 100644 --- a/python/paddle/v2/tests/test_paramconf_order.py +++ b/python/paddle/v2/tests/test_paramconf_order.py @@ -27,6 +27,7 @@ # limitations under the License. import unittest import math +import paddle.dataset as dataset import paddle.v2 as paddle @@ -40,7 +41,7 @@ def wordemb(inlayer): def train(): - word_dict = paddle.dataset.imikolov.build_dict() + word_dict = dataset.imikolov.build_dict() dict_size = len(word_dict) # Every layer takes integer value of range [0, dict_size) firstword = paddle.layer.data( diff --git a/python/setup.py.in b/python/setup.py.in index 831d173d424b8c663f728af748ad1942bb20a418..d73a3a6a1c41b87efb9600ac59983bd16547ec6a 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -63,6 +63,8 @@ write_version_py(filename='@PADDLE_SOURCE_DIR@/python/paddle/version.py') packages=['paddle', 'paddle.utils', + 'paddle.dataset', + 'paddle.reader', 'paddle.fluid', 'paddle.fluid.proto', 'paddle.fluid.proto.profiler', @@ -73,8 +75,6 @@ if '${WITH_FLUID_ONLY}'== 'OFF': 'paddle.trainer', 'paddle.trainer_config_helpers', 'paddle.v2', - 'paddle.v2.dataset', - 'paddle.v2.reader', 'paddle.v2.master', 'paddle.v2.plot', 'py_paddle']