diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 71af50a9a4ed835635362c24d6c1ae5e92de050b..48e0a1993d07f801e65dfa54a991995c593fe475 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -4,7 +4,7 @@ set(OUTPUT_DIR file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py) file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py) file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py) -file(GLOB V2_PY_FILES . ./paddle/v2/*.py) +file(GLOB_RECURSE V2_PY_FILES ./paddle/v2/ *.py) set(PY_FILES paddle/__init__.py ${TRAINER_PY_FILES} @@ -24,7 +24,7 @@ add_custom_target(paddle_python ALL DEPENDS ${OUTPUT_DIR}/.timestamp) add_subdirectory(paddle/trainer_config_helpers/tests) -add_subdirectory(paddle/reader/tests) +add_subdirectory(paddle/v2/reader/tests) add_subdirectory(paddle/v2/tests) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/ diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 49d1983a2a422b7e105c66dd92419426f0853212..1122bcb5e45727cf78031259817a84f9e36a3163 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -19,12 +19,13 @@ import trainer import event import data_type import data_feeder +from . import dataset import attr import py_paddle.swig_paddle as api __all__ = [ 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', - 'event', 'data_type', 'attr', 'data_feeder' + 'event', 'data_type', 'attr', 'data_feeder', 'dataset' ] diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..9647e9850332b795f6480d26e4f9c736129782ae 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -0,0 +1,3 @@ +import mnist + +__all__ = ['mnist'] diff --git a/python/paddle/reader/__init__.py b/python/paddle/v2/reader/__init__.py similarity index 100% rename from python/paddle/reader/__init__.py rename to python/paddle/v2/reader/__init__.py diff --git a/python/paddle/reader/creator.py b/python/paddle/v2/reader/creator.py similarity index 100% rename from python/paddle/reader/creator.py rename to python/paddle/v2/reader/creator.py diff --git a/python/paddle/reader/decorator.py b/python/paddle/v2/reader/decorator.py similarity index 100% rename from python/paddle/reader/decorator.py rename to python/paddle/v2/reader/decorator.py diff --git a/python/paddle/reader/tests/CMakeLists.txt b/python/paddle/v2/reader/tests/CMakeLists.txt similarity index 100% rename from python/paddle/reader/tests/CMakeLists.txt rename to python/paddle/v2/reader/tests/CMakeLists.txt diff --git a/python/paddle/v2/reader/tests/__init__.py b/python/paddle/v2/reader/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/python/paddle/reader/tests/creator_test.py b/python/paddle/v2/reader/tests/creator_test.py similarity index 89% rename from python/paddle/reader/tests/creator_test.py rename to python/paddle/v2/reader/tests/creator_test.py index eda8ab6715b2be0c9cb6163adf60d8fbdf2d7e8c..9f8d7133b8694aae5541eff9576eaba8a31e77dc 100644 --- a/python/paddle/reader/tests/creator_test.py +++ b/python/paddle/v2/reader/tests/creator_test.py @@ -11,17 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os import unittest -import paddle.reader.creator + import numpy as np -import os + +import paddle.v2.reader.creator class TestNumpyArray(unittest.TestCase): def test_numpy_array(self): l = [[1, 2, 3], [4, 5, 6]] x = np.array(l, np.int32) - reader = paddle.reader.creator.np_array(x) + reader = paddle.v2.reader.creator.np_array(x) for idx, e in enumerate(reader()): self.assertItemsEqual(e, l[idx]) @@ -29,7 +31,7 @@ class TestNumpyArray(unittest.TestCase): class TestTextFile(unittest.TestCase): def test_text_file(self): path = os.path.join(os.path.dirname(__file__), "test_data_creator.txt") - reader = paddle.reader.creator.text_file(path) + reader = paddle.v2.reader.creator.text_file(path) for idx, e in enumerate(reader()): self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1)) diff --git a/python/paddle/reader/tests/decorator_test.py b/python/paddle/v2/reader/tests/decorator_test.py similarity index 81% rename from python/paddle/reader/tests/decorator_test.py rename to python/paddle/v2/reader/tests/decorator_test.py index 0396a61786539bf57be4cab9ebbd108ade9e7c83..734154b9790a4dc118d11992343648364c907305 100644 --- a/python/paddle/reader/tests/decorator_test.py +++ b/python/paddle/v2/reader/tests/decorator_test.py @@ -11,9 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import unittest -import paddle.reader import time +import unittest + +import paddle.v2.reader def reader_creator_10(dur): @@ -37,7 +38,7 @@ class TestMap(unittest.TestCase): yield "h" yield "i" - r = paddle.reader.map_readers(tokenize, read) + r = paddle.v2.reader.map_readers(tokenize, read) for i, e in enumerate(r()): self.assertEqual(e, i) @@ -45,7 +46,7 @@ class TestMap(unittest.TestCase): class TestBuffered(unittest.TestCase): def test_read(self): for size in range(20): - b = paddle.reader.buffered(reader_creator_10(0), size) + b = paddle.v2.reader.buffered(reader_creator_10(0), size) c = 0 for i in b(): self.assertEqual(i, c) @@ -54,7 +55,7 @@ class TestBuffered(unittest.TestCase): def test_buffering(self): # read have 30ms delay. - b = paddle.reader.buffered(reader_creator_10(0.03), 10) + b = paddle.v2.reader.buffered(reader_creator_10(0.03), 10) last_time = time.time() for idx, i in enumerate(b()): elapsed_time = time.time() - last_time @@ -68,17 +69,17 @@ class TestBuffered(unittest.TestCase): class TestCompose(unittest.TestCase): def test_compse(self): - reader = paddle.reader.compose( + reader = paddle.v2.reader.compose( reader_creator_10(0), reader_creator_10(0)) for idx, e in enumerate(reader()): self.assertEqual(e, (idx, idx)) def test_compose_not_aligned(self): total = 0 - reader = paddle.reader.compose( - paddle.reader.chain(reader_creator_10(0), reader_creator_10(0)), + reader = paddle.v2.reader.compose( + paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)), reader_creator_10(0)) - with self.assertRaises(paddle.reader.ComposeNotAligned): + with self.assertRaises(paddle.v2.reader.ComposeNotAligned): for e in reader(): total += 1 # expecting 10, not 20 @@ -86,8 +87,8 @@ class TestCompose(unittest.TestCase): def test_compose_not_aligned_no_check(self): total = 0 - reader = paddle.reader.compose( - paddle.reader.chain(reader_creator_10(0), reader_creator_10(0)), + reader = paddle.v2.reader.compose( + paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)), reader_creator_10(0), check_alignment=False) for e in reader(): @@ -98,7 +99,7 @@ class TestCompose(unittest.TestCase): class TestChain(unittest.TestCase): def test_chain(self): - c = paddle.reader.chain(reader_creator_10(0), reader_creator_10(0)) + c = paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)) idx = 0 for e in c(): self.assertEqual(e, idx % 10) @@ -111,7 +112,7 @@ class TestShuffle(unittest.TestCase): case = [(0, True), (1, True), (10, False), (100, False)] a = reader_creator_10(0) for size, checkEq in case: - s = paddle.reader.shuffle(a, size) + s = paddle.v2.reader.shuffle(a, size) total = 0 for idx, e in enumerate(s()): if checkEq: diff --git a/python/paddle/reader/tests/test_data_creator.txt b/python/paddle/v2/reader/tests/test_data_creator.txt similarity index 100% rename from python/paddle/reader/tests/test_data_creator.txt rename to python/paddle/v2/reader/tests/test_data_creator.txt diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index f65c631f7bfdd4dd0a3649a5911149055ad4e10e..89415787eb2e36f29cfed8bb5144558d82337fb0 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -62,7 +62,7 @@ class SGD(ITrainer): self.__optimizer__ = update_equation def train(self, - train_data_reader, + train_reader_creator, topology, parameters, num_passes=1, @@ -74,7 +74,7 @@ class SGD(ITrainer): """ Training method. Will train num_passes of input data. - :param train_data_reader: + :param train_reader_creator: :param topology: Network Topology, use one or more Layers to represent it. :param parameters: The parameter pools. :param num_passes: The total train passes. @@ -109,7 +109,7 @@ class SGD(ITrainer): for pass_id in xrange(num_passes): updater.startPass() for batch_id, data_batch in enumerate( - __data_reader_to_batch__(train_data_reader, batch_size, + __data_reader_to_batch__(train_reader_creator, batch_size, topology)): pass_type = updater.startBatch(len(data_batch)) gm.forwardBackward(feeder(data_batch), out_args, pass_type) diff --git a/python/setup.py.in b/python/setup.py.in index 1e1324eea825ab1945a38cb43eceec29a4ebb1a1..68ca35265cf13265ad0b171b0f70e20b83006ff9 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -5,7 +5,9 @@ packages=['paddle', 'paddle.trainer', 'paddle.trainer_config_helpers', 'paddle.utils', - 'paddle.v2'] + 'paddle.v2', + 'paddle.v2.dataset', + 'paddle.v2.reader'] setup(name='paddle', version='${PADDLE_VERSION}',