From bcf7c36b0b3d62caeea351d9905ac901cb7a1f26 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Thu, 29 Mar 2018 15:21:47 -0700 Subject: [PATCH] Make paddle.fluid no longer depends on paddle.v2 In this way we can build and test using WITH_FLUID_ONLY flag being set to ON. - move paddle.v2.dataset,reader to paddle.dataset,reader - remove unused code (which depends on v2) in paddle.dataset,reader --- python/CMakeLists.txt | 3 +- python/paddle/__init__.py | 6 +++ python/paddle/{v2/minibatch.py => batch.py} | 0 python/paddle/{v2 => }/dataset/__init__.py | 2 + python/paddle/{v2 => }/dataset/cifar.py | 22 ++++---- python/paddle/{v2 => }/dataset/common.py | 16 +++--- python/paddle/{v2 => }/dataset/conll05.py | 29 +++++------ python/paddle/{v2 => }/dataset/flowers.py | 4 +- python/paddle/{v2 => dataset}/image.py | 0 python/paddle/{v2 => }/dataset/imdb.py | 11 ++-- python/paddle/{v2 => }/dataset/imikolov.py | 25 +++++---- python/paddle/{v2 => }/dataset/mnist.py | 29 +++++------ python/paddle/{v2 => }/dataset/movielens.py | 10 ++-- python/paddle/{v2 => }/dataset/mq2007.py | 0 python/paddle/{v2 => }/dataset/sentiment.py | 15 +++--- python/paddle/dataset/tests/CMakeLists.txt | 1 + python/paddle/{v2 => dataset}/tests/cat.jpg | Bin .../{v2 => }/dataset/tests/cifar_test.py | 10 ++-- .../{v2 => }/dataset/tests/common_test.py | 20 +++---- .../{v2 => }/dataset/tests/flowers_test.py | 8 +-- .../{v2 => }/dataset/tests/imdb_test.py | 12 ++--- .../{v2 => }/dataset/tests/imikolov_test.py | 16 +++--- .../{v2 => }/dataset/tests/mnist_test.py | 6 +-- .../{v2 => }/dataset/tests/mq2007_test.py | 6 +-- .../{v2 => dataset}/tests/test_image.py | 2 +- .../{v2 => }/dataset/tests/test_sentiment.py | 2 +- .../{v2 => }/dataset/tests/voc2012_test.py | 8 +-- .../{v2 => }/dataset/tests/wmt16_test.py | 10 ++-- python/paddle/{v2 => }/dataset/uci_housing.py | 21 +++----- python/paddle/{v2 => }/dataset/voc2012.py | 4 +- python/paddle/{v2 => }/dataset/wmt14.py | 27 ++++------ python/paddle/{v2 => }/dataset/wmt16.py | 26 +++++----- .../tests/book/notest_rnn_encoder_decoer.py | 2 +- .../fluid/tests/book/test_fit_a_line.py | 2 +- .../tests/book/test_image_classification.py | 2 +- .../tests/book/test_label_semantic_roles.py | 4 +- .../tests/book/test_machine_translation.py | 2 +- .../fluid/tests/book/test_recognize_digits.py | 2 +- .../tests/book/test_recommender_system.py | 2 +- .../tests/book/test_understand_sentiment.py | 2 +- .../paddle/fluid/tests/book/test_word2vec.py | 2 +- .../test_memopt_fit_a_line.py | 2 +- .../test_memopt_image_classification_train.py | 2 +- .../test_memopt_machine_translation.py | 2 +- python/paddle/fluid/tests/demo/fc_gan.py | 2 +- python/paddle/fluid/tests/test_cpp_reader.py | 2 +- python/paddle/fluid/tests/test_error_clip.py | 2 +- .../paddle/fluid/tests/test_gradient_clip.py | 2 +- .../fluid/tests/test_mnist_if_else_op.py | 2 +- .../fluid/tests/unittests/test_dyn_rnn.py | 2 +- .../unittests/test_dynrnn_static_input.py | 2 +- .../tests/unittests/test_multi_pass_reader.py | 4 +- .../tests/unittests/test_multiple_reader.py | 4 +- .../tests/unittests/test_parallel_executor.py | 6 +-- .../tests/unittests/test_recordio_reader.py | 4 +- python/paddle/{v2 => }/reader/__init__.py | 0 python/paddle/{v2 => }/reader/creator.py | 49 +----------------- python/paddle/{v2 => }/reader/decorator.py | 0 .../{v2 => }/reader/tests/CMakeLists.txt | 0 .../paddle/{v2 => }/reader/tests/__init__.py | 0 .../{v2 => }/reader/tests/creator_test.py | 8 +-- .../{v2 => }/reader/tests/decorator_test.py | 32 ++++++------ .../reader/tests/test_data_creator.txt | 0 .../reader/tests/test_reader_recordio.dat | Bin .../reader/tests/test_recordio_creator.dat | Bin python/paddle/v2/__init__.py | 8 --- python/paddle/v2/inference.py | 4 +- python/paddle/v2/layer.py | 2 +- python/paddle/v2/tests/CMakeLists.txt | 1 - .../paddle/v2/tests/test_paramconf_order.py | 3 +- python/setup.py.in | 4 +- 71 files changed, 225 insertions(+), 295 deletions(-) rename python/paddle/{v2/minibatch.py => batch.py} (100%) rename python/paddle/{v2 => }/dataset/__init__.py (97%) rename python/paddle/{v2 => }/dataset/cifar.py (80%) rename python/paddle/{v2 => }/dataset/common.py (93%) rename python/paddle/{v2 => }/dataset/conll05.py (88%) rename python/paddle/{v2 => }/dataset/flowers.py (99%) rename python/paddle/{v2 => dataset}/image.py (100%) rename python/paddle/{v2 => }/dataset/imdb.py (91%) rename python/paddle/{v2 => }/dataset/imikolov.py (86%) rename python/paddle/{v2 => }/dataset/mnist.py (76%) rename python/paddle/{v2 => }/dataset/movielens.py (95%) rename python/paddle/{v2 => }/dataset/mq2007.py (100%) rename python/paddle/{v2 => }/dataset/sentiment.py (87%) create mode 100644 python/paddle/dataset/tests/CMakeLists.txt rename python/paddle/{v2 => dataset}/tests/cat.jpg (100%) rename python/paddle/{v2 => }/dataset/tests/cifar_test.py (88%) rename python/paddle/{v2 => }/dataset/tests/common_test.py (81%) rename python/paddle/{v2 => }/dataset/tests/flowers_test.py (89%) rename python/paddle/{v2 => }/dataset/tests/imdb_test.py (77%) rename python/paddle/{v2 => }/dataset/tests/imikolov_test.py (79%) rename python/paddle/{v2 => }/dataset/tests/mnist_test.py (91%) rename python/paddle/{v2 => }/dataset/tests/mq2007_test.py (85%) rename python/paddle/{v2 => dataset}/tests/test_image.py (97%) rename python/paddle/{v2 => }/dataset/tests/test_sentiment.py (97%) rename python/paddle/{v2 => }/dataset/tests/voc2012_test.py (82%) rename python/paddle/{v2 => }/dataset/tests/wmt16_test.py (89%) rename python/paddle/{v2 => }/dataset/uci_housing.py (82%) rename python/paddle/{v2 => }/dataset/voc2012.py (97%) rename python/paddle/{v2 => }/dataset/wmt14.py (84%) rename python/paddle/{v2 => }/dataset/wmt16.py (94%) rename python/paddle/{v2 => }/reader/__init__.py (100%) rename python/paddle/{v2 => }/reader/creator.py (62%) rename python/paddle/{v2 => }/reader/decorator.py (100%) rename python/paddle/{v2 => }/reader/tests/CMakeLists.txt (100%) rename python/paddle/{v2 => }/reader/tests/__init__.py (100%) rename python/paddle/{v2 => }/reader/tests/creator_test.py (92%) rename python/paddle/{v2 => }/reader/tests/decorator_test.py (81%) rename python/paddle/{v2 => }/reader/tests/test_data_creator.txt (100%) rename python/paddle/{v2 => }/reader/tests/test_reader_recordio.dat (100%) rename python/paddle/{v2 => }/reader/tests/test_recordio_creator.dat (100%) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b0242b20b..f5ae553c8 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -73,12 +73,13 @@ add_custom_target(paddle_python ALL DEPENDS ${paddle_python_deps}) set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) if (WITH_TESTING) + add_subdirectory(paddle/reader/tests) + add_subdirectory(paddle/dataset/tests) if(NOT WITH_FLUID_ONLY) add_subdirectory(paddle/trainer_config_helpers/tests) if (WITH_SWIG_PY) # enable v2 API unittest only when paddle swig api is compiled add_subdirectory(paddle/v2/tests) - add_subdirectory(paddle/v2/reader/tests) add_subdirectory(paddle/v2/plot/tests) endif() endif() diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 1030c94e1..d1cf04161 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -14,8 +14,14 @@ try: from version import full_version as __version__ from version import commit as __git_commit__ + except ImportError: import sys sys.stderr.write('''Warning with import paddle: you should not import paddle from the source directory; please install paddlepaddle*.whl firstly.''' ) + +import reader +import dataset +import batch +batch = batch.batch diff --git a/python/paddle/v2/minibatch.py b/python/paddle/batch.py similarity index 100% rename from python/paddle/v2/minibatch.py rename to python/paddle/batch.py diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/dataset/__init__.py similarity index 97% rename from python/paddle/v2/dataset/__init__.py rename to python/paddle/dataset/__init__.py index c1acbecd9..1fdfd49f1 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/dataset/__init__.py @@ -28,6 +28,7 @@ import wmt16 import mq2007 import flowers import voc2012 +import image __all__ = [ 'mnist', @@ -43,4 +44,5 @@ __all__ = [ 'mq2007', 'flowers', 'voc2012', + 'image', ] diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/dataset/cifar.py similarity index 80% rename from python/paddle/v2/dataset/cifar.py rename to python/paddle/dataset/cifar.py index 0a2a1ced1..07f4dcbda 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/dataset/cifar.py @@ -31,7 +31,7 @@ images per class. import cPickle import itertools import numpy -import paddle.v2.dataset.common +import paddle.dataset.common import tarfile __all__ = ['train100', 'test100', 'train10', 'test10', 'convert'] @@ -75,7 +75,7 @@ def train100(): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), + paddle.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train') @@ -90,7 +90,7 @@ def test100(): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), + paddle.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test') @@ -105,7 +105,7 @@ def train10(): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), + paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch') @@ -120,20 +120,20 @@ def test10(): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), + paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch') def fetch(): - paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5) - paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5) + paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5) + paddle.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5) def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train100(), 1000, "cifar_train100") - paddle.v2.dataset.common.convert(path, test100(), 1000, "cifar_test100") - paddle.v2.dataset.common.convert(path, train10(), 1000, "cifar_train10") - paddle.v2.dataset.common.convert(path, test10(), 1000, "cifar_test10") + paddle.dataset.common.convert(path, train100(), 1000, "cifar_train100") + paddle.dataset.common.convert(path, test100(), 1000, "cifar_test100") + paddle.dataset.common.convert(path, train10(), 1000, "cifar_train10") + paddle.dataset.common.convert(path, test10(), 1000, "cifar_test10") diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/dataset/common.py similarity index 93% rename from python/paddle/v2/dataset/common.py rename to python/paddle/dataset/common.py index c6ff09a1d..68660601c 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/dataset/common.py @@ -19,7 +19,7 @@ import errno import shutil import sys import importlib -import paddle.v2.dataset +import paddle.dataset import cPickle import glob import cPickle as pickle @@ -105,24 +105,24 @@ def download(url, module_name, md5sum, save_name=None): def fetch_all(): for module_name in filter(lambda x: not x.startswith("__"), - dir(paddle.v2.dataset)): + dir(paddle.dataset)): if "fetch" in dir( - importlib.import_module("paddle.v2.dataset.%s" % module_name)): + importlib.import_module("paddle.dataset.%s" % module_name)): getattr( - importlib.import_module("paddle.v2.dataset.%s" % module_name), + importlib.import_module("paddle.dataset.%s" % module_name), "fetch")() def fetch_all_recordio(path): for module_name in filter(lambda x: not x.startswith("__"), - dir(paddle.v2.dataset)): + dir(paddle.dataset)): if "convert" in dir( - importlib.import_module("paddle.v2.dataset.%s" % module_name)) and \ + importlib.import_module("paddle.dataset.%s" % module_name)) and \ not module_name == "common": ds_path = os.path.join(path, module_name) must_mkdirs(ds_path) getattr( - importlib.import_module("paddle.v2.dataset.%s" % module_name), + importlib.import_module("paddle.dataset.%s" % module_name), "convert")(ds_path) @@ -130,7 +130,7 @@ def split(reader, line_count, suffix="%05d.pickle", dumper=cPickle.dump): """ you can call the function as: - split(paddle.v2.dataset.cifar.train10(), line_count=1000, + split(paddle.dataset.cifar.train10(), line_count=1000, suffix="imikolov-train-%05d.pickle") the output files as: diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/dataset/conll05.py similarity index 88% rename from python/paddle/v2/dataset/conll05.py rename to python/paddle/dataset/conll05.py index 0d544efac..4e94ce898 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/dataset/conll05.py @@ -23,7 +23,7 @@ to initialize SRL model. import tarfile import gzip import itertools -import paddle.v2.dataset.common +import paddle.dataset.common __all__ = ['test, get_dict', 'get_embedding', 'convert'] @@ -203,14 +203,11 @@ def get_dict(): Get the word, verb and label dictionary of Wikipedia corpus. """ word_dict = load_dict( - paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', - WORDDICT_MD5)) + paddle.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)) verb_dict = load_dict( - paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', - VERBDICT_MD5)) + paddle.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)) label_dict = load_label_dict( - paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', - TRGDICT_MD5)) + paddle.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)) return word_dict, verb_dict, label_dict @@ -218,7 +215,7 @@ def get_embedding(): """ Get the trained word vector based on Wikipedia corpus. """ - return paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) + return paddle.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) def test(): @@ -235,23 +232,23 @@ def test(): """ word_dict, verb_dict, label_dict = get_dict() reader = corpus_reader( - paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5), + paddle.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5), words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') return reader_creator(reader, word_dict, verb_dict, label_dict) def fetch(): - paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) - paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) - paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) - paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) - paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5) + paddle.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) + paddle.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) + paddle.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) + paddle.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) + paddle.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5) def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_train") - paddle.v2.dataset.common.convert(path, test(), 1000, "conl105_test") + paddle.dataset.common.convert(path, test(), 1000, "conl105_train") + paddle.dataset.common.convert(path, test(), 1000, "conl105_test") diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/dataset/flowers.py similarity index 99% rename from python/paddle/v2/dataset/flowers.py rename to python/paddle/dataset/flowers.py index 7bdddeaab..f082e33be 100644 --- a/python/paddle/v2/dataset/flowers.py +++ b/python/paddle/dataset/flowers.py @@ -34,8 +34,8 @@ import functools from common import download import tarfile import scipy.io as scio -from paddle.v2.image import * -from paddle.v2.reader import * +from paddle.dataset.image import * +from paddle.reader import * import os import numpy as np from multiprocessing import cpu_count diff --git a/python/paddle/v2/image.py b/python/paddle/dataset/image.py similarity index 100% rename from python/paddle/v2/image.py rename to python/paddle/dataset/image.py diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/dataset/imdb.py similarity index 91% rename from python/paddle/v2/dataset/imdb.py rename to python/paddle/dataset/imdb.py index 37c4296f9..5ff05b1e9 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/dataset/imdb.py @@ -20,7 +20,7 @@ of 25,000 highly polar movie reviews for training, and 25,000 for testing. Besides, this module also provides API for building dictionary. """ -import paddle.v2.dataset.common +import paddle.dataset.common import collections import tarfile import re @@ -37,8 +37,7 @@ def tokenize(pattern): Read files that match the given pattern. Tokenize and yield each file. """ - with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb', - MD5)) as tarf: + with tarfile.open(paddle.dataset.common.download(URL, 'imdb', MD5)) as tarf: # Note that we should use tarfile.next(), which does # sequential access of member files, other than # tarfile.extractfile, which does random access and might @@ -136,7 +135,7 @@ def word_dict(): def fetch(): - paddle.v2.dataset.common.download(URL, 'imdb', MD5) + paddle.dataset.common.download(URL, 'imdb', MD5) def convert(path): @@ -144,5 +143,5 @@ def convert(path): Converts dataset to recordio format """ w = word_dict() - paddle.v2.dataset.common.convert(path, lambda: train(w), 1000, "imdb_train") - paddle.v2.dataset.common.convert(path, lambda: test(w), 1000, "imdb_test") + paddle.dataset.common.convert(path, lambda: train(w), 1000, "imdb_train") + paddle.dataset.common.convert(path, lambda: test(w), 1000, "imdb_test") diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/dataset/imikolov.py similarity index 86% rename from python/paddle/v2/dataset/imikolov.py rename to python/paddle/dataset/imikolov.py index 617c722c4..c6c0a0f54 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/dataset/imikolov.py @@ -18,7 +18,7 @@ This module will download dataset from http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set into paddle reader creators. """ -import paddle.v2.dataset.common +import paddle.dataset.common import collections import tarfile @@ -54,9 +54,9 @@ def build_dict(min_word_freq=50): train_filename = './simple-examples/data/ptb.train.txt' test_filename = './simple-examples/data/ptb.valid.txt' with tarfile.open( - paddle.v2.dataset.common.download( - paddle.v2.dataset.imikolov.URL, 'imikolov', - paddle.v2.dataset.imikolov.MD5)) as tf: + paddle.dataset.common.download(paddle.dataset.imikolov.URL, + 'imikolov', + paddle.dataset.imikolov.MD5)) as tf: trainf = tf.extractfile(train_filename) testf = tf.extractfile(test_filename) word_freq = word_count(testf, word_count(trainf)) @@ -77,9 +77,9 @@ def build_dict(min_word_freq=50): def reader_creator(filename, word_idx, n, data_type): def reader(): with tarfile.open( - paddle.v2.dataset.common.download( - paddle.v2.dataset.imikolov.URL, 'imikolov', - paddle.v2.dataset.imikolov.MD5)) as tf: + paddle.dataset.common.download( + paddle.dataset.imikolov.URL, 'imikolov', + paddle.dataset.imikolov.MD5)) as tf: f = tf.extractfile(filename) UNK = word_idx[''] @@ -145,7 +145,7 @@ def test(word_idx, n, data_type=DataType.NGRAM): def fetch(): - paddle.v2.dataset.common.download(URL, "imikolov", MD5) + paddle.dataset.common.download(URL, "imikolov", MD5) def convert(path): @@ -154,8 +154,7 @@ def convert(path): """ N = 5 word_dict = build_dict() - paddle.v2.dataset.common.convert(path, - train(word_dict, N), 1000, - "imikolov_train") - paddle.v2.dataset.common.convert(path, - test(word_dict, N), 1000, "imikolov_test") + paddle.dataset.common.convert(path, + train(word_dict, N), 1000, "imikolov_train") + paddle.dataset.common.convert(path, + test(word_dict, N), 1000, "imikolov_test") diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/dataset/mnist.py similarity index 76% rename from python/paddle/v2/dataset/mnist.py rename to python/paddle/dataset/mnist.py index 9f675bed8..6a1b8b5fa 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/dataset/mnist.py @@ -17,7 +17,7 @@ MNIST dataset. This module will download dataset from http://yann.lecun.com/exdb/mnist/ and parse training set and test set into paddle reader creators. """ -import paddle.v2.dataset.common +import paddle.dataset.common import subprocess import numpy import platform @@ -85,10 +85,10 @@ def train(): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', - TRAIN_IMAGE_MD5), - paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', - TRAIN_LABEL_MD5), 100) + paddle.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', + TRAIN_IMAGE_MD5), + paddle.dataset.common.download(TRAIN_LABEL_URL, 'mnist', + TRAIN_LABEL_MD5), 100) def test(): @@ -102,22 +102,21 @@ def test(): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', - TEST_IMAGE_MD5), - paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', - TEST_LABEL_MD5), 100) + paddle.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5), + paddle.dataset.common.download(TEST_LABEL_URL, 'mnist', TEST_LABEL_MD5), + 100) def fetch(): - paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5) - paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) - paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) - paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) + paddle.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5) + paddle.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) + paddle.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) + paddle.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train(), 1000, "minist_train") - paddle.v2.dataset.common.convert(path, test(), 1000, "minist_test") + paddle.dataset.common.convert(path, train(), 1000, "minist_train") + paddle.dataset.common.convert(path, test(), 1000, "minist_test") diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/dataset/movielens.py similarity index 95% rename from python/paddle/v2/dataset/movielens.py rename to python/paddle/dataset/movielens.py index 5b61a9420..ab1171620 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/dataset/movielens.py @@ -23,7 +23,7 @@ set and test set into paddle reader creators. """ import zipfile -import paddle.v2.dataset.common +import paddle.dataset.common import re import random import functools @@ -100,7 +100,7 @@ USER_INFO = None def __initialize_meta_info__(): - fn = paddle.v2.dataset.common.download(URL, "movielens", MD5) + fn = paddle.dataset.common.download(URL, "movielens", MD5) global MOVIE_INFO if MOVIE_INFO is None: pattern = re.compile(r'^(.*)\((\d+)\)$') @@ -247,15 +247,15 @@ def unittest(): def fetch(): - paddle.v2.dataset.common.download(URL, "movielens", MD5) + paddle.dataset.common.download(URL, "movielens", MD5) def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train(), 1000, "movielens_train") - paddle.v2.dataset.common.convert(path, test(), 1000, "movielens_test") + paddle.dataset.common.convert(path, train(), 1000, "movielens_train") + paddle.dataset.common.convert(path, test(), 1000, "movielens_test") if __name__ == '__main__': diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/dataset/mq2007.py similarity index 100% rename from python/paddle/v2/dataset/mq2007.py rename to python/paddle/dataset/mq2007.py diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/dataset/sentiment.py similarity index 87% rename from python/paddle/v2/dataset/sentiment.py rename to python/paddle/dataset/sentiment.py index b0b9757c1..f5461164f 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/dataset/sentiment.py @@ -26,7 +26,7 @@ from itertools import chain import nltk from nltk.corpus import movie_reviews -import paddle.v2.dataset.common +import paddle.dataset.common __all__ = ['train', 'test', 'get_word_dict', 'convert'] NUM_TRAINING_INSTANCES = 1600 @@ -39,13 +39,13 @@ def download_data_if_not_yet(): """ try: # make sure that nltk can find the data - if paddle.v2.dataset.common.DATA_HOME not in nltk.data.path: - nltk.data.path.append(paddle.v2.dataset.common.DATA_HOME) + if paddle.dataset.common.DATA_HOME not in nltk.data.path: + nltk.data.path.append(paddle.dataset.common.DATA_HOME) movie_reviews.categories() except LookupError: print "Downloading movie_reviews data set, please wait....." nltk.download( - 'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME) + 'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME) print "Download data set success....." print "Path is " + nltk.data.find('corpora/movie_reviews').path @@ -129,13 +129,12 @@ def test(): def fetch(): - nltk.download( - 'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME) + nltk.download('movie_reviews', download_dir=paddle.dataset.common.DATA_HOME) def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train, 1000, "sentiment_train") - paddle.v2.dataset.common.convert(path, test, 1000, "sentiment_test") + paddle.dataset.common.convert(path, train, 1000, "sentiment_train") + paddle.dataset.common.convert(path, test, 1000, "sentiment_test") diff --git a/python/paddle/dataset/tests/CMakeLists.txt b/python/paddle/dataset/tests/CMakeLists.txt new file mode 100644 index 000000000..485c38a13 --- /dev/null +++ b/python/paddle/dataset/tests/CMakeLists.txt @@ -0,0 +1 @@ +py_test(test_image SRCS test_image.py) diff --git a/python/paddle/v2/tests/cat.jpg b/python/paddle/dataset/tests/cat.jpg similarity index 100% rename from python/paddle/v2/tests/cat.jpg rename to python/paddle/dataset/tests/cat.jpg diff --git a/python/paddle/v2/dataset/tests/cifar_test.py b/python/paddle/dataset/tests/cifar_test.py similarity index 88% rename from python/paddle/v2/dataset/tests/cifar_test.py rename to python/paddle/dataset/tests/cifar_test.py index e0e18229d..839125b09 100644 --- a/python/paddle/v2/dataset/tests/cifar_test.py +++ b/python/paddle/dataset/tests/cifar_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.cifar +import paddle.dataset.cifar import unittest @@ -29,25 +29,25 @@ class TestCIFAR(unittest.TestCase): def test_test10(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.cifar.test10()) + paddle.dataset.cifar.test10()) self.assertEqual(instances, 10000) self.assertEqual(max_label_value, 9) def test_train10(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.cifar.train10()) + paddle.dataset.cifar.train10()) self.assertEqual(instances, 50000) self.assertEqual(max_label_value, 9) def test_test100(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.cifar.test100()) + paddle.dataset.cifar.test100()) self.assertEqual(instances, 10000) self.assertEqual(max_label_value, 99) def test_train100(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.cifar.train100()) + paddle.dataset.cifar.train100()) self.assertEqual(instances, 50000) self.assertEqual(max_label_value, 99) diff --git a/python/paddle/v2/dataset/tests/common_test.py b/python/paddle/dataset/tests/common_test.py similarity index 81% rename from python/paddle/v2/dataset/tests/common_test.py rename to python/paddle/dataset/tests/common_test.py index cfa194eba..e7cc02aa8 100644 --- a/python/paddle/v2/dataset/tests/common_test.py +++ b/python/paddle/dataset/tests/common_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.common +import paddle.dataset.common import unittest import tempfile import glob @@ -24,14 +24,14 @@ class TestCommon(unittest.TestCase): with open(temp_path, 'w') as f: f.write("Hello\n") self.assertEqual('09f7e02f1290be211da707a266f153b3', - paddle.v2.dataset.common.md5file(temp_path)) + paddle.dataset.common.md5file(temp_path)) def test_download(self): yi_avatar = 'https://avatars0.githubusercontent.com/u/1548775?v=3&s=460' self.assertEqual( - paddle.v2.dataset.common.DATA_HOME + '/test/1548775?v=3&s=460', - paddle.v2.dataset.common.download( - yi_avatar, 'test', 'f75287202d6622414c706c36c16f8e0d')) + paddle.dataset.common.DATA_HOME + '/test/1548775?v=3&s=460', + paddle.dataset.common.download(yi_avatar, 'test', + 'f75287202d6622414c706c36c16f8e0d')) def test_split(self): def test_reader(): @@ -42,7 +42,7 @@ class TestCommon(unittest.TestCase): return reader _, temp_path = tempfile.mkstemp() - paddle.v2.dataset.common.split( + paddle.dataset.common.split( test_reader(), 4, suffix=temp_path + '/test-%05d.pickle') files = glob.glob(temp_path + '/test-%05d.pickle') self.assertEqual(len(files), 3) @@ -52,7 +52,7 @@ class TestCommon(unittest.TestCase): for x in xrange(5): with open(temp_path + '/%05d.test' % x) as f: f.write('%d\n' % x) - reader = paddle.v2.dataset.common.cluster_files_reader( + reader = paddle.dataset.common.cluster_files_reader( temp_path + '/*.test', 5, 0) for idx, e in enumerate(reader()): self.assertEqual(e, str("0")) @@ -69,9 +69,9 @@ class TestCommon(unittest.TestCase): return reader path = tempfile.mkdtemp() - paddle.v2.dataset.common.convert(path, - test_reader(), num_shards, - 'random_images') + paddle.dataset.common.convert(path, + test_reader(), num_shards, + 'random_images') files = glob.glob(path + '/random_images-*') self.assertEqual(len(files), num_shards) diff --git a/python/paddle/v2/dataset/tests/flowers_test.py b/python/paddle/dataset/tests/flowers_test.py similarity index 89% rename from python/paddle/v2/dataset/tests/flowers_test.py rename to python/paddle/dataset/tests/flowers_test.py index a8ae9a07a..06260fd79 100644 --- a/python/paddle/v2/dataset/tests/flowers_test.py +++ b/python/paddle/dataset/tests/flowers_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.flowers +import paddle.dataset.flowers import unittest @@ -30,19 +30,19 @@ class TestFlowers(unittest.TestCase): def test_train(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.flowers.train()) + paddle.dataset.flowers.train()) self.assertEqual(instances, 6149) self.assertEqual(max_label_value, 102) def test_test(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.flowers.test()) + paddle.dataset.flowers.test()) self.assertEqual(instances, 1020) self.assertEqual(max_label_value, 102) def test_valid(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.flowers.valid()) + paddle.dataset.flowers.valid()) self.assertEqual(instances, 1020) self.assertEqual(max_label_value, 102) diff --git a/python/paddle/v2/dataset/tests/imdb_test.py b/python/paddle/dataset/tests/imdb_test.py similarity index 77% rename from python/paddle/v2/dataset/tests/imdb_test.py rename to python/paddle/dataset/tests/imdb_test.py index c4d82f268..539da0494 100644 --- a/python/paddle/v2/dataset/tests/imdb_test.py +++ b/python/paddle/dataset/tests/imdb_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.imdb +import paddle.dataset.imdb import unittest import re @@ -30,15 +30,13 @@ class TestIMDB(unittest.TestCase): def test_build_dict(self): if self.word_idx == None: - self.word_idx = paddle.v2.dataset.imdb.build_dict(TRAIN_PATTERN, - 150) + self.word_idx = paddle.dataset.imdb.build_dict(TRAIN_PATTERN, 150) self.assertEqual(len(self.word_idx), 7036) def check_dataset(self, dataset, expected_size): if self.word_idx == None: - self.word_idx = paddle.v2.dataset.imdb.build_dict(TRAIN_PATTERN, - 150) + self.word_idx = paddle.dataset.imdb.build_dict(TRAIN_PATTERN, 150) sum = 0 for l in dataset(self.word_idx): @@ -47,10 +45,10 @@ class TestIMDB(unittest.TestCase): self.assertEqual(sum, expected_size) def test_train(self): - self.check_dataset(paddle.v2.dataset.imdb.train, 25000) + self.check_dataset(paddle.dataset.imdb.train, 25000) def test_test(self): - self.check_dataset(paddle.v2.dataset.imdb.test, 25000) + self.check_dataset(paddle.dataset.imdb.test, 25000) if __name__ == '__main__': diff --git a/python/paddle/v2/dataset/tests/imikolov_test.py b/python/paddle/dataset/tests/imikolov_test.py similarity index 79% rename from python/paddle/v2/dataset/tests/imikolov_test.py rename to python/paddle/dataset/tests/imikolov_test.py index 714a75d6f..233fd9fc8 100644 --- a/python/paddle/v2/dataset/tests/imikolov_test.py +++ b/python/paddle/dataset/tests/imikolov_test.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.imikolov +import paddle.dataset.imikolov import unittest -WORD_DICT = paddle.v2.dataset.imikolov.build_dict() +WORD_DICT = paddle.dataset.imikolov.build_dict() class TestMikolov(unittest.TestCase): @@ -25,7 +25,7 @@ class TestMikolov(unittest.TestCase): def test_train(self): n = 5 - self.check_reader(paddle.v2.dataset.imikolov.train(WORD_DICT, n), n) + self.check_reader(paddle.dataset.imikolov.train(WORD_DICT, n), n) first_line = 'aer banknote berlitz calloway centrust cluett fromstein '\ 'gitano guterman hydro-quebec ipo kia memotec mlx nahb punts '\ @@ -34,16 +34,16 @@ class TestMikolov(unittest.TestCase): WORD_DICT.get(ch, WORD_DICT['']) for ch in first_line.split(' ') ] - for l in paddle.v2.dataset.imikolov.train( + for l in paddle.dataset.imikolov.train( WORD_DICT, n=-1, - data_type=paddle.v2.dataset.imikolov.DataType.SEQ)(): + data_type=paddle.dataset.imikolov.DataType.SEQ)(): read_line = l[0][1:] break self.assertEqual(first_line, read_line) def test_test(self): n = 5 - self.check_reader(paddle.v2.dataset.imikolov.test(WORD_DICT, n), n) + self.check_reader(paddle.dataset.imikolov.test(WORD_DICT, n), n) first_line = 'consumers may want to move their telephones a little '\ 'closer to the tv set' @@ -51,9 +51,9 @@ class TestMikolov(unittest.TestCase): WORD_DICT.get(ch, WORD_DICT['']) for ch in first_line.split(' ') ] - for l in paddle.v2.dataset.imikolov.test( + for l in paddle.dataset.imikolov.test( WORD_DICT, n=-1, - data_type=paddle.v2.dataset.imikolov.DataType.SEQ)(): + data_type=paddle.dataset.imikolov.DataType.SEQ)(): read_line = l[0][1:] break self.assertEqual(first_line, read_line) diff --git a/python/paddle/v2/dataset/tests/mnist_test.py b/python/paddle/dataset/tests/mnist_test.py similarity index 91% rename from python/paddle/v2/dataset/tests/mnist_test.py rename to python/paddle/dataset/tests/mnist_test.py index 1d344cac3..8ada19d3f 100644 --- a/python/paddle/v2/dataset/tests/mnist_test.py +++ b/python/paddle/dataset/tests/mnist_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.mnist +import paddle.dataset.mnist import unittest @@ -29,13 +29,13 @@ class TestMNIST(unittest.TestCase): def test_train(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.mnist.train()) + paddle.dataset.mnist.train()) self.assertEqual(instances, 60000) self.assertEqual(max_label_value, 9) def test_test(self): instances, max_label_value = self.check_reader( - paddle.v2.dataset.mnist.test()) + paddle.dataset.mnist.test()) self.assertEqual(instances, 10000) self.assertEqual(max_label_value, 9) diff --git a/python/paddle/v2/dataset/tests/mq2007_test.py b/python/paddle/dataset/tests/mq2007_test.py similarity index 85% rename from python/paddle/v2/dataset/tests/mq2007_test.py rename to python/paddle/dataset/tests/mq2007_test.py index 59847b6c1..fba388724 100644 --- a/python/paddle/v2/dataset/tests/mq2007_test.py +++ b/python/paddle/dataset/tests/mq2007_test.py @@ -12,19 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.mq2007 +import paddle.dataset.mq2007 import unittest class TestMQ2007(unittest.TestCase): def test_pairwise(self): - for label, query_left, query_right in paddle.v2.dataset.mq2007.test( + for label, query_left, query_right in paddle.dataset.mq2007.test( format="pairwise"): self.assertEqual(query_left.shape(), (46, )) self.assertEqual(query_right.shape(), (46, )) def test_listwise(self): - for label_array, query_array in paddle.v2.dataset.mq2007.test( + for label_array, query_array in paddle.dataset.mq2007.test( format="listwise"): self.assertEqual(len(label_array), len(query_array)) diff --git a/python/paddle/v2/tests/test_image.py b/python/paddle/dataset/tests/test_image.py similarity index 97% rename from python/paddle/v2/tests/test_image.py rename to python/paddle/dataset/tests/test_image.py index c78bbdc40..8bd56607a 100644 --- a/python/paddle/v2/tests/test_image.py +++ b/python/paddle/dataset/tests/test_image.py @@ -15,7 +15,7 @@ import unittest import numpy as np -import paddle.v2.image as image +import paddle.dataset.image as image class Image(unittest.TestCase): diff --git a/python/paddle/v2/dataset/tests/test_sentiment.py b/python/paddle/dataset/tests/test_sentiment.py similarity index 97% rename from python/paddle/v2/dataset/tests/test_sentiment.py rename to python/paddle/dataset/tests/test_sentiment.py index 407405290..543f4b737 100644 --- a/python/paddle/v2/dataset/tests/test_sentiment.py +++ b/python/paddle/dataset/tests/test_sentiment.py @@ -17,7 +17,7 @@ import unittest import nltk -import paddle.v2.dataset.sentiment as st +import paddle.dataset.sentiment as st from nltk.corpus import movie_reviews diff --git a/python/paddle/v2/dataset/tests/voc2012_test.py b/python/paddle/dataset/tests/voc2012_test.py similarity index 82% rename from python/paddle/v2/dataset/tests/voc2012_test.py rename to python/paddle/dataset/tests/voc2012_test.py index 31e72ebf5..0d285461a 100644 --- a/python/paddle/v2/dataset/tests/voc2012_test.py +++ b/python/paddle/dataset/tests/voc2012_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.voc2012 +import paddle.dataset.voc2012 import unittest @@ -26,15 +26,15 @@ class TestVOC(unittest.TestCase): return sum def test_train(self): - count = self.check_reader(paddle.v2.dataset.voc_seg.train()) + count = self.check_reader(paddle.dataset.voc_seg.train()) self.assertEqual(count, 2913) def test_test(self): - count = self.check_reader(paddle.v2.dataset.voc_seg.test()) + count = self.check_reader(paddle.dataset.voc_seg.test()) self.assertEqual(count, 1464) def test_val(self): - count = self.check_reader(paddle.v2.dataset.voc_seg.val()) + count = self.check_reader(paddle.dataset.voc_seg.val()) self.assertEqual(count, 1449) diff --git a/python/paddle/v2/dataset/tests/wmt16_test.py b/python/paddle/dataset/tests/wmt16_test.py similarity index 89% rename from python/paddle/v2/dataset/tests/wmt16_test.py rename to python/paddle/dataset/tests/wmt16_test.py index cef6c3216..8b949d8bf 100644 --- a/python/paddle/v2/dataset/tests/wmt16_test.py +++ b/python/paddle/dataset/tests/wmt16_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.wmt16 +import paddle.dataset.wmt16 import unittest @@ -34,28 +34,28 @@ class TestWMT16(unittest.TestCase): def test_train(self): for idx, sample in enumerate( - paddle.v2.dataset.wmt16.train( + paddle.dataset.wmt16.train( src_dict_size=100000, trg_dict_size=100000)()): if idx >= 10: break self.checkout_one_sample(sample) def test_test(self): for idx, sample in enumerate( - paddle.v2.dataset.wmt16.test( + paddle.dataset.wmt16.test( src_dict_size=1000, trg_dict_size=1000)()): if idx >= 10: break self.checkout_one_sample(sample) def test_val(self): for idx, sample in enumerate( - paddle.v2.dataset.wmt16.validation( + paddle.dataset.wmt16.validation( src_dict_size=1000, trg_dict_size=1000)()): if idx >= 10: break self.checkout_one_sample(sample) def test_get_dict(self): dict_size = 1000 - word_dict = paddle.v2.dataset.wmt16.get_dict("en", dict_size, True) + word_dict = paddle.dataset.wmt16.get_dict("en", dict_size, True) self.assertEqual(len(word_dict), dict_size) self.assertEqual(word_dict[0], "") self.assertEqual(word_dict[1], "") diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/dataset/uci_housing.py similarity index 82% rename from python/paddle/v2/dataset/uci_housing.py rename to python/paddle/dataset/uci_housing.py index f10bf7e42..6a56e9d55 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/dataset/uci_housing.py @@ -21,8 +21,7 @@ parse training set and test set into paddle reader creators. import numpy as np import os -import paddle.v2.dataset.common -from paddle.v2.parameters import Parameters +import paddle.dataset.common __all__ = ['train', 'test'] @@ -85,7 +84,7 @@ def train(): :rtype: callable """ global UCI_TRAIN_DATA - load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)) + load_data(paddle.dataset.common.download(URL, 'uci_housing', MD5)) def reader(): for d in UCI_TRAIN_DATA: @@ -105,7 +104,7 @@ def test(): :rtype: callable """ global UCI_TEST_DATA - load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)) + load_data(paddle.dataset.common.download(URL, 'uci_housing', MD5)) def reader(): for d in UCI_TEST_DATA: @@ -114,21 +113,13 @@ def test(): return reader -def model(): - tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'fit_a_line.tar', - MD5_MODEL) - with open(tar_file, 'r') as f: - parameters = Parameters.from_tar(f) - return parameters - - def fetch(): - paddle.v2.dataset.common.download(URL, 'uci_housing', MD5) + paddle.dataset.common.download(URL, 'uci_housing', MD5) def convert(path): """ Converts dataset to recordio format """ - paddle.v2.dataset.common.convert(path, train(), 1000, "uci_housing_train") - paddle.v2.dataset.common.convert(path, test(), 1000, "uci_houseing_test") + paddle.dataset.common.convert(path, train(), 1000, "uci_housing_train") + paddle.dataset.common.convert(path, test(), 1000, "uci_houseing_test") diff --git a/python/paddle/v2/dataset/voc2012.py b/python/paddle/dataset/voc2012.py similarity index 97% rename from python/paddle/v2/dataset/voc2012.py rename to python/paddle/dataset/voc2012.py index 617e212d6..9c945574d 100644 --- a/python/paddle/v2/dataset/voc2012.py +++ b/python/paddle/dataset/voc2012.py @@ -22,8 +22,8 @@ with segmentation has been increased from 7,062 to 9,993. import tarfile import io import numpy as np -from paddle.v2.dataset.common import download -from paddle.v2.image import * +from paddle.dataset.common import download +from paddle.dataset.image import * from PIL import Image __all__ = ['train', 'test', 'val'] diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/dataset/wmt14.py similarity index 84% rename from python/paddle/v2/dataset/wmt14.py rename to python/paddle/dataset/wmt14.py index 5104e2905..f0908c737 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/dataset/wmt14.py @@ -22,8 +22,7 @@ parse training set and test set into paddle reader creators. import tarfile import gzip -import paddle.v2.dataset.common -from paddle.v2.parameters import Parameters +import paddle.dataset.common __all__ = [ 'train', @@ -123,7 +122,7 @@ def train(dict_size): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size) @@ -139,27 +138,20 @@ def test(dict_size): :rtype: callable """ return reader_creator( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size) def gen(dict_size): return reader_creator( - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'gen/gen', dict_size) -def model(): - tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) - with gzip.open(tar_file, 'r') as f: - parameters = Parameters.from_tar(f) - return parameters - - def get_dict(dict_size, reverse=True): # if reverse = False, return dict = {'a':'001', 'b':'002', ...} # else reverse = true, return dict = {'001':'a', '002':'b', ...} - tar_file = paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) + tar_file = paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) src_dict, trg_dict = __read_to_dict(tar_file, dict_size) if reverse: src_dict = {v: k for k, v in src_dict.items()} @@ -168,8 +160,8 @@ def get_dict(dict_size, reverse=True): def fetch(): - paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) - paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) + paddle.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) + paddle.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) def convert(path): @@ -177,6 +169,5 @@ def convert(path): Converts dataset to recordio format """ dict_size = 30000 - paddle.v2.dataset.common.convert(path, - train(dict_size), 1000, "wmt14_train") - paddle.v2.dataset.common.convert(path, test(dict_size), 1000, "wmt14_test") + paddle.dataset.common.convert(path, train(dict_size), 1000, "wmt14_train") + paddle.dataset.common.convert(path, test(dict_size), 1000, "wmt14_test") diff --git a/python/paddle/v2/dataset/wmt16.py b/python/paddle/dataset/wmt16.py similarity index 94% rename from python/paddle/v2/dataset/wmt16.py rename to python/paddle/dataset/wmt16.py index c8818f715..ad23338a9 100644 --- a/python/paddle/v2/dataset/wmt16.py +++ b/python/paddle/dataset/wmt16.py @@ -33,7 +33,7 @@ import tarfile import gzip from collections import defaultdict -import paddle.v2.dataset.common +import paddle.dataset.common __all__ = [ "train", @@ -76,7 +76,7 @@ def __build_dict(tar_file, dict_size, save_path, lang): def __load_dict(tar_file, dict_size, lang, reverse=False): - dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME, + dict_path = os.path.join(paddle.dataset.common.DATA_HOME, "wmt16/%s_%d.dict" % (lang, dict_size)) if not os.path.exists(dict_path) or ( len(open(dict_path, "r").readlines()) != dict_size): @@ -178,8 +178,8 @@ def train(src_dict_size, trg_dict_size, src_lang="en"): src_lang) return reader_creator( - tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, - "wmt16.tar.gz"), + tar_file=paddle.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, + "wmt16.tar.gz"), file_name="wmt16/train", src_dict_size=src_dict_size, trg_dict_size=trg_dict_size, @@ -227,8 +227,8 @@ def test(src_dict_size, trg_dict_size, src_lang="en"): src_lang) return reader_creator( - tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, - "wmt16.tar.gz"), + tar_file=paddle.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, + "wmt16.tar.gz"), file_name="wmt16/test", src_dict_size=src_dict_size, trg_dict_size=trg_dict_size, @@ -274,8 +274,8 @@ def validation(src_dict_size, trg_dict_size, src_lang="en"): src_lang) return reader_creator( - tar_file=paddle.v2.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, - "wmt16.tar.gz"), + tar_file=paddle.dataset.common.download(DATA_URL, "wmt16", DATA_MD5, + "wmt16.tar.gz"), file_name="wmt16/val", src_dict_size=src_dict_size, trg_dict_size=trg_dict_size, @@ -303,12 +303,12 @@ def get_dict(lang, dict_size, reverse=False): if lang == "en": dict_size = min(dict_size, TOTAL_EN_WORDS) else: dict_size = min(dict_size, TOTAL_DE_WORDS) - dict_path = os.path.join(paddle.v2.dataset.common.DATA_HOME, + dict_path = os.path.join(paddle.dataset.common.DATA_HOME, "wmt16/%s_%d.dict" % (lang, dict_size)) assert os.path.exists(dict_path), "Word dictionary does not exist. " "Please invoke paddle.dataset.wmt16.train/test/validation first " "to build the dictionary." - tar_file = os.path.join(paddle.v2.dataset.common.DATA_HOME, "wmt16.tar.gz") + tar_file = os.path.join(paddle.dataset.common.DATA_HOME, "wmt16.tar.gz") return __load_dict(tar_file, dict_size, lang, reverse) @@ -323,7 +323,7 @@ def convert(path, src_dict_size, trg_dict_size, src_lang): """Converts dataset to recordio format. """ - paddle.v2.dataset.common.convert( + paddle.dataset.common.convert( path, train( src_dict_size=src_dict_size, @@ -331,7 +331,7 @@ def convert(path, src_dict_size, trg_dict_size, src_lang): src_lang=src_lang), 1000, "wmt16_train") - paddle.v2.dataset.common.convert( + paddle.dataset.common.convert( path, test( src_dict_size=src_dict_size, @@ -339,7 +339,7 @@ def convert(path, src_dict_size, trg_dict_size, src_lang): src_lang=src_lang), 1000, "wmt16_test") - paddle.v2.dataset.common.convert( + paddle.dataset.common.convert( path, validation( src_dict_size=src_dict_size, diff --git a/python/paddle/fluid/tests/book/notest_rnn_encoder_decoer.py b/python/paddle/fluid/tests/book/notest_rnn_encoder_decoer.py index 983f8f4db..ce640dece 100644 --- a/python/paddle/fluid/tests/book/notest_rnn_encoder_decoer.py +++ b/python/paddle/fluid/tests/book/notest_rnn_encoder_decoer.py @@ -13,7 +13,7 @@ # limitations under the License. import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.framework as framework diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index 93ef66851..6dfc2997a 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import contextlib import numpy diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py index b01c1875d..e8bb082be 100644 --- a/python/paddle/fluid/tests/book/test_image_classification.py +++ b/python/paddle/fluid/tests/book/test_image_classification.py @@ -14,7 +14,7 @@ from __future__ import print_function -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import contextlib import math diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index f488527e0..c0a6df831 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -15,8 +15,8 @@ import math import numpy as np -import paddle.v2 as paddle -import paddle.v2.dataset.conll05 as conll05 +import paddle +import paddle.dataset.conll05 as conll05 import paddle.fluid as fluid from paddle.fluid.initializer import init_on_cpu import contextlib diff --git a/python/paddle/fluid/tests/book/test_machine_translation.py b/python/paddle/fluid/tests/book/test_machine_translation.py index 3a1a0859e..830d78df8 100644 --- a/python/paddle/fluid/tests/book/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/test_machine_translation.py @@ -14,7 +14,7 @@ import contextlib import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import paddle.fluid.framework as framework import paddle.fluid.layers as pd diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py index e85b97a7f..e4997b406 100644 --- a/python/paddle/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/fluid/tests/book/test_recognize_digits.py @@ -14,7 +14,7 @@ from __future__ import print_function import argparse import paddle.fluid as fluid -import paddle.v2 as paddle +import paddle import sys import numpy import unittest diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 2ce66d32c..2172c275b 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -16,7 +16,7 @@ import math import sys import os import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import paddle.fluid.framework as framework import paddle.fluid.layers as layers diff --git a/python/paddle/fluid/tests/book/test_understand_sentiment.py b/python/paddle/fluid/tests/book/test_understand_sentiment.py index d2f3f7404..dedd15377 100644 --- a/python/paddle/fluid/tests/book/test_understand_sentiment.py +++ b/python/paddle/fluid/tests/book/test_understand_sentiment.py @@ -15,7 +15,7 @@ from __future__ import print_function import unittest import paddle.fluid as fluid -import paddle.v2 as paddle +import paddle import contextlib import math import numpy as np diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py index 26b97c3e2..8929779de 100644 --- a/python/paddle/fluid/tests/book/test_word2vec.py +++ b/python/paddle/fluid/tests/book/test_word2vec.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import unittest import os diff --git a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py index ad79e96b9..8818cf96f 100644 --- a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py +++ b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py @@ -13,7 +13,7 @@ # limitations under the License. import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import math import sys diff --git a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py index 204669d7e..dfebb9a06 100644 --- a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py +++ b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py @@ -16,7 +16,7 @@ from __future__ import print_function import sys -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import math import sys diff --git a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py index a24834a6f..a1ca6d981 100644 --- a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py +++ b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py @@ -13,7 +13,7 @@ # limitations under the License. import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.framework as framework diff --git a/python/paddle/fluid/tests/demo/fc_gan.py b/python/paddle/fluid/tests/demo/fc_gan.py index 7452ea2a3..8ea1b2b15 100644 --- a/python/paddle/fluid/tests/demo/fc_gan.py +++ b/python/paddle/fluid/tests/demo/fc_gan.py @@ -19,7 +19,7 @@ import os import matplotlib import numpy -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid matplotlib.use('Agg') diff --git a/python/paddle/fluid/tests/test_cpp_reader.py b/python/paddle/fluid/tests/test_cpp_reader.py index 4b0d039b7..e54c73b29 100644 --- a/python/paddle/fluid/tests/test_cpp_reader.py +++ b/python/paddle/fluid/tests/test_cpp_reader.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import numpy as np import sys diff --git a/python/paddle/fluid/tests/test_error_clip.py b/python/paddle/fluid/tests/test_error_clip.py index b2fd5ae29..89f4c6497 100644 --- a/python/paddle/fluid/tests/test_error_clip.py +++ b/python/paddle/fluid/tests/test_error_clip.py @@ -14,7 +14,7 @@ from __future__ import print_function import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid BATCH_SIZE = 128 diff --git a/python/paddle/fluid/tests/test_gradient_clip.py b/python/paddle/fluid/tests/test_gradient_clip.py index 68b682f68..d530601f1 100644 --- a/python/paddle/fluid/tests/test_gradient_clip.py +++ b/python/paddle/fluid/tests/test_gradient_clip.py @@ -13,7 +13,7 @@ # limitations under the License. import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid BATCH_SIZE = 128 diff --git a/python/paddle/fluid/tests/test_mnist_if_else_op.py b/python/paddle/fluid/tests/test_mnist_if_else_op.py index 94395f6cf..d34f52db5 100644 --- a/python/paddle/fluid/tests/test_mnist_if_else_op.py +++ b/python/paddle/fluid/tests/test_mnist_if_else_op.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import paddle import paddle.fluid.layers as layers from paddle.fluid.framework import Program, program_guard, default_main_program, default_startup_program from paddle.fluid.executor import Executor from paddle.fluid.optimizer import MomentumOptimizer import paddle.fluid.core as core -import paddle.v2 as paddle import unittest import numpy as np diff --git a/python/paddle/fluid/tests/unittests/test_dyn_rnn.py b/python/paddle/fluid/tests/unittests/test_dyn_rnn.py index df7ab0d29..0faed94de 100644 --- a/python/paddle/fluid/tests/unittests/test_dyn_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_dyn_rnn.py @@ -13,7 +13,7 @@ # limitations under the License. import paddle.fluid as fluid -import paddle.v2 as paddle +import paddle import unittest import numpy diff --git a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py index b03a70f1b..d3f63ee2c 100644 --- a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py +++ b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py @@ -13,7 +13,7 @@ # limitations under the License. import unittest -import paddle.v2 as paddle +import paddle import paddle.fluid.core as core import paddle.fluid as fluid from paddle.fluid.backward import append_backward diff --git a/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py b/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py index 8add35330..0b7a29075 100644 --- a/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py +++ b/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py @@ -15,8 +15,8 @@ import unittest import paddle.fluid as fluid -import paddle.v2 as paddle -import paddle.v2.dataset.mnist as mnist +import paddle +import paddle.dataset.mnist as mnist class TestMultipleReader(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_multiple_reader.py b/python/paddle/fluid/tests/unittests/test_multiple_reader.py index 69f8acf81..a60a5d6c4 100644 --- a/python/paddle/fluid/tests/unittests/test_multiple_reader.py +++ b/python/paddle/fluid/tests/unittests/test_multiple_reader.py @@ -15,8 +15,8 @@ import unittest import paddle.fluid as fluid -import paddle.v2 as paddle -import paddle.v2.dataset.mnist as mnist +import paddle +import paddle.dataset.mnist as mnist from shutil import copyfile diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index bbfd03c63..95d0f9da4 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -16,9 +16,9 @@ import numpy import unittest import paddle.fluid as fluid -import paddle.v2 as paddle -import paddle.v2.dataset.mnist as mnist -import paddle.v2.dataset.wmt16 as wmt16 +import paddle +import paddle.dataset.mnist as mnist +import paddle.dataset.wmt16 as wmt16 def simple_fc_net(): diff --git a/python/paddle/fluid/tests/unittests/test_recordio_reader.py b/python/paddle/fluid/tests/unittests/test_recordio_reader.py index 24a0074d9..640264d82 100644 --- a/python/paddle/fluid/tests/unittests/test_recordio_reader.py +++ b/python/paddle/fluid/tests/unittests/test_recordio_reader.py @@ -15,8 +15,8 @@ import unittest import paddle.fluid as fluid -import paddle.v2 as paddle -import paddle.v2.dataset.mnist as mnist +import paddle +import paddle.dataset.mnist as mnist class TestRecordIO(unittest.TestCase): diff --git a/python/paddle/v2/reader/__init__.py b/python/paddle/reader/__init__.py similarity index 100% rename from python/paddle/v2/reader/__init__.py rename to python/paddle/reader/__init__.py diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/reader/creator.py similarity index 62% rename from python/paddle/v2/reader/creator.py rename to python/paddle/reader/creator.py index fda5246d7..4c905d959 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/reader/creator.py @@ -16,7 +16,7 @@ Creator package contains some simple reader creator, which could be used in user program. """ -__all__ = ['np_array', 'text_file', 'recordio', 'cloud_reader'] +__all__ = ['np_array', 'text_file', 'recordio'] def np_array(x): @@ -66,7 +66,7 @@ def recordio(paths, buf_size=100): """ import recordio as rec - import paddle.v2.reader.decorator as dec + import paddle.reader.decorator as dec import cPickle as pickle def reader(): @@ -83,48 +83,3 @@ def recordio(paths, buf_size=100): f.close() return dec.buffered(reader, buf_size) - - -pass_num = 0 - - -def cloud_reader(paths, etcd_endpoints, timeout_sec=5, buf_size=64): - """ - Create a data reader that yield a record one by one from - the paths: - :paths: path of recordio files, can be a string or a string list. - :etcd_endpoints: the endpoints for etcd cluster - :returns: data reader of recordio files. - - .. code-block:: python - from paddle.v2.reader.creator import cloud_reader - etcd_endpoints = "http://127.0.0.1:2379" - trainer.train.( - reader=cloud_reader(["/work/dataset/uci_housing/uci_housing*"], etcd_endpoints), - ) - """ - import os - import cPickle as pickle - import paddle.v2.master as master - c = master.client(etcd_endpoints, timeout_sec, buf_size) - - if isinstance(paths, basestring): - path = [paths] - else: - path = paths - c.set_dataset(path) - - def reader(): - global pass_num - c.paddle_start_get_records(pass_num) - pass_num += 1 - - while True: - r, e = c.next_record() - if not r: - if e != -2: - print "get record error: ", e - break - yield pickle.loads(r) - - return reader diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/reader/decorator.py similarity index 100% rename from python/paddle/v2/reader/decorator.py rename to python/paddle/reader/decorator.py diff --git a/python/paddle/v2/reader/tests/CMakeLists.txt b/python/paddle/reader/tests/CMakeLists.txt similarity index 100% rename from python/paddle/v2/reader/tests/CMakeLists.txt rename to python/paddle/reader/tests/CMakeLists.txt diff --git a/python/paddle/v2/reader/tests/__init__.py b/python/paddle/reader/tests/__init__.py similarity index 100% rename from python/paddle/v2/reader/tests/__init__.py rename to python/paddle/reader/tests/__init__.py diff --git a/python/paddle/v2/reader/tests/creator_test.py b/python/paddle/reader/tests/creator_test.py similarity index 92% rename from python/paddle/v2/reader/tests/creator_test.py rename to python/paddle/reader/tests/creator_test.py index 7fe374e66..c4238c12a 100644 --- a/python/paddle/v2/reader/tests/creator_test.py +++ b/python/paddle/reader/tests/creator_test.py @@ -28,14 +28,14 @@ import os import unittest import numpy as np -import paddle.v2.reader.creator +import paddle.reader.creator class TestNumpyArray(unittest.TestCase): def test_numpy_array(self): l = [[1, 2, 3], [4, 5, 6]] x = np.array(l, np.int32) - reader = paddle.v2.reader.creator.np_array(x) + reader = paddle.reader.creator.np_array(x) for idx, e in enumerate(reader()): self.assertItemsEqual(e, l[idx]) @@ -43,14 +43,14 @@ class TestNumpyArray(unittest.TestCase): class TestTextFile(unittest.TestCase): def test_text_file(self): path = os.path.join(os.path.dirname(__file__), "test_data_creator.txt") - reader = paddle.v2.reader.creator.text_file(path) + reader = paddle.reader.creator.text_file(path) for idx, e in enumerate(reader()): self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1)) class TestRecordIO(unittest.TestCase): def do_test(self, path): - reader = paddle.v2.reader.creator.recordio(path) + reader = paddle.reader.creator.recordio(path) idx = 0 for e in reader(): if idx == 0: diff --git a/python/paddle/v2/reader/tests/decorator_test.py b/python/paddle/reader/tests/decorator_test.py similarity index 81% rename from python/paddle/v2/reader/tests/decorator_test.py rename to python/paddle/reader/tests/decorator_test.py index 6b680e39f..bee24d3b6 100644 --- a/python/paddle/v2/reader/tests/decorator_test.py +++ b/python/paddle/reader/tests/decorator_test.py @@ -15,7 +15,7 @@ import time import unittest -import paddle.v2.reader +import paddle.reader def reader_creator_10(dur): @@ -39,7 +39,7 @@ class TestMap(unittest.TestCase): yield "h" yield "i" - r = paddle.v2.reader.map_readers(tokenize, read) + r = paddle.reader.map_readers(tokenize, read) for i, e in enumerate(r()): self.assertEqual(e, i) @@ -47,7 +47,7 @@ class TestMap(unittest.TestCase): class TestBuffered(unittest.TestCase): def test_read(self): for size in range(20): - b = paddle.v2.reader.buffered(reader_creator_10(0), size) + b = paddle.reader.buffered(reader_creator_10(0), size) c = 0 for i in b(): self.assertEqual(i, c) @@ -56,7 +56,7 @@ class TestBuffered(unittest.TestCase): def test_buffering(self): # read have 30ms delay. - b = paddle.v2.reader.buffered(reader_creator_10(0.03), 10) + b = paddle.reader.buffered(reader_creator_10(0.03), 10) last_time = time.time() for idx, i in enumerate(b()): elapsed_time = time.time() - last_time @@ -70,17 +70,17 @@ class TestBuffered(unittest.TestCase): class TestCompose(unittest.TestCase): def test_compse(self): - reader = paddle.v2.reader.compose( + reader = paddle.reader.compose( reader_creator_10(0), reader_creator_10(0)) for idx, e in enumerate(reader()): self.assertEqual(e, (idx, idx)) def test_compose_not_aligned(self): total = 0 - reader = paddle.v2.reader.compose( - paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)), + reader = paddle.reader.compose( + paddle.reader.chain(reader_creator_10(0), reader_creator_10(0)), reader_creator_10(0)) - with self.assertRaises(paddle.v2.reader.ComposeNotAligned): + with self.assertRaises(paddle.reader.ComposeNotAligned): for e in reader(): total += 1 # expecting 10, not 20 @@ -88,8 +88,8 @@ class TestCompose(unittest.TestCase): def test_compose_not_aligned_no_check(self): total = 0 - reader = paddle.v2.reader.compose( - paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)), + reader = paddle.reader.compose( + paddle.reader.chain(reader_creator_10(0), reader_creator_10(0)), reader_creator_10(0), check_alignment=False) for e in reader(): @@ -100,7 +100,7 @@ class TestCompose(unittest.TestCase): class TestChain(unittest.TestCase): def test_chain(self): - c = paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)) + c = paddle.reader.chain(reader_creator_10(0), reader_creator_10(0)) idx = 0 for e in c(): self.assertEqual(e, idx % 10) @@ -113,7 +113,7 @@ class TestShuffle(unittest.TestCase): case = [(0, True), (1, True), (10, False), (100, False)] a = reader_creator_10(0) for size, checkEq in case: - s = paddle.v2.reader.shuffle(a, size) + s = paddle.reader.shuffle(a, size) total = 0 for idx, e in enumerate(s()): if checkEq: @@ -133,9 +133,9 @@ class TestXmap(unittest.TestCase): for order in orders: for tNum in thread_nums: for size in buffered_size: - reader = paddle.v2.reader.xmap_readers(mapper, - reader_creator_10(0), - tNum, size, order) + reader = paddle.reader.xmap_readers(mapper, + reader_creator_10(0), + tNum, size, order) for n in xrange(3): result = [] for i in reader(): @@ -150,7 +150,7 @@ class TestPipeReader(unittest.TestCase): def test_pipe_reader(self): def example_reader(myfiles): for f in myfiles: - pr = paddle.v2.reader.PipeReader("cat %s" % f, bufsize=128) + pr = paddle.reader.PipeReader("cat %s" % f, bufsize=128) for l in pr.get_line(): yield l diff --git a/python/paddle/v2/reader/tests/test_data_creator.txt b/python/paddle/reader/tests/test_data_creator.txt similarity index 100% rename from python/paddle/v2/reader/tests/test_data_creator.txt rename to python/paddle/reader/tests/test_data_creator.txt diff --git a/python/paddle/v2/reader/tests/test_reader_recordio.dat b/python/paddle/reader/tests/test_reader_recordio.dat similarity index 100% rename from python/paddle/v2/reader/tests/test_reader_recordio.dat rename to python/paddle/reader/tests/test_reader_recordio.dat diff --git a/python/paddle/v2/reader/tests/test_recordio_creator.dat b/python/paddle/reader/tests/test_recordio_creator.dat similarity index 100% rename from python/paddle/v2/reader/tests/test_recordio_creator.dat rename to python/paddle/reader/tests/test_recordio_creator.dat diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index df710c33d..02b0d077e 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -22,17 +22,13 @@ import data_type import topology import networks import evaluator -from . import dataset -from . import reader from . import plot import attr import op import pooling import inference import networks -import minibatch import plot -import image import paddle.trainer.config_parser as cp __all__ = [ @@ -48,14 +44,11 @@ __all__ = [ 'data_type', 'attr', 'pooling', - 'dataset', - 'reader', 'topology', 'networks', 'infer', 'plot', 'evaluator', - 'image', 'master', ] @@ -153,4 +146,3 @@ def init(**kwargs): infer = inference.infer -batch = minibatch.batch diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py index 52f5b947f..14b64742f 100644 --- a/python/paddle/v2/inference.py +++ b/python/paddle/v2/inference.py @@ -15,7 +15,7 @@ import numpy import collections import topology -import minibatch +import paddle import cPickle __all__ = ['infer', 'Inference'] @@ -80,7 +80,7 @@ class Inference(object): for each_sample in input: yield each_sample - reader = minibatch.batch(__reader_impl__, batch_size=batch_size) + reader = paddle.batch(__reader_impl__, batch_size=batch_size) self.__gradient_machine__.start() for data_batch in reader(): diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 6a2bb8d33..a188a03eb 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -20,7 +20,7 @@ The primary usage shows below. .. code-block:: python - import paddle.v2 as paddle + import paddle img = paddle.layer.data(name='img', type=paddle.data_type.dense_vector(784)) hidden = paddle.layer.fc(input=img, size=200) diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt index b4333ed53..46e4feb8e 100644 --- a/python/paddle/v2/tests/CMakeLists.txt +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -1,5 +1,4 @@ py_test(test_op SRCS test_op.py) -py_test(test_image SRCS test_image.py) py_test(test_layer SRCS test_layer.py) py_test(test_topology SRCS test_topology.py) py_test(test_rnn_layer SRCS test_rnn_layer.py) diff --git a/python/paddle/v2/tests/test_paramconf_order.py b/python/paddle/v2/tests/test_paramconf_order.py index 264442be1..8320217da 100644 --- a/python/paddle/v2/tests/test_paramconf_order.py +++ b/python/paddle/v2/tests/test_paramconf_order.py @@ -27,6 +27,7 @@ # limitations under the License. import unittest import math +import paddle.dataset as dataset import paddle.v2 as paddle @@ -40,7 +41,7 @@ def wordemb(inlayer): def train(): - word_dict = paddle.dataset.imikolov.build_dict() + word_dict = dataset.imikolov.build_dict() dict_size = len(word_dict) # Every layer takes integer value of range [0, dict_size) firstword = paddle.layer.data( diff --git a/python/setup.py.in b/python/setup.py.in index 831d173d4..d73a3a6a1 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -63,6 +63,8 @@ write_version_py(filename='@PADDLE_SOURCE_DIR@/python/paddle/version.py') packages=['paddle', 'paddle.utils', + 'paddle.dataset', + 'paddle.reader', 'paddle.fluid', 'paddle.fluid.proto', 'paddle.fluid.proto.profiler', @@ -73,8 +75,6 @@ if '${WITH_FLUID_ONLY}'== 'OFF': 'paddle.trainer', 'paddle.trainer_config_helpers', 'paddle.v2', - 'paddle.v2.dataset', - 'paddle.v2.reader', 'paddle.v2.master', 'paddle.v2.plot', 'py_paddle'] -- GitLab