diff --git a/.gitignore b/.gitignore index 369fa1cb919c82caec326d1429c8a2eba3b928d6..10a4262aa7e129c48d79fbe7d978720b28f4bcea 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -python/paddle/fluid/tests/unittests/reader_reset_test.recordio paddle/operators/check_t.save paddle/operators/check_tensor.ls paddle/operators/tensor.save diff --git a/paddle/fluid/operators/reader/reader_op_registry.cc b/paddle/fluid/operators/reader/reader_op_registry.cc index 64a1f6b68702f33ec72d901cf6621b674b331030..6a9506b5cd91b893540e07302d7305e11774ca74 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.cc +++ b/paddle/fluid/operators/reader/reader_op_registry.cc @@ -38,21 +38,6 @@ std::unordered_map& FileReaderRegistry() { return regs; } -std::unique_ptr CreateReaderByFileName( - const std::string& file_name) { - size_t separator_pos = file_name.find_last_of(kFileFormatSeparator); - PADDLE_ENFORCE_NE(separator_pos, std::string::npos, - "File name illegal! A legal file name should be like: " - "[file_name].[file_format] (e.g., 'data_file.recordio')."); - std::string filetype = file_name.substr(separator_pos + 1); - - auto itor = FileReaderRegistry().find(filetype); - PADDLE_ENFORCE(itor != FileReaderRegistry().end(), - "No file reader registered for '%s' format.", filetype); - framework::ReaderBase* reader = (itor->second)(file_name); - return std::unique_ptr(reader); -} - void FileReaderMakerBase::Make() { AddOutput("Out", "(ReaderHolder): The created random reader.").AsDuplicable(); AddAttr>("shape_concat", "The concat of all data's shapes."); diff --git a/paddle/fluid/operators/reader/reader_op_registry.h b/paddle/fluid/operators/reader/reader_op_registry.h index 795a5806050efe6469732004125e4a80b08e5304..de0c34ad32e226cacc998767bf824e4a7c8a28ef 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.h +++ b/paddle/fluid/operators/reader/reader_op_registry.h @@ -40,9 +40,6 @@ int RegisterFileReader(const std::string& filetype) { return 0; } -std::unique_ptr CreateReaderByFileName( - const std::string& file_name); - extern std::vector RestoreShapes( const std::vector& shape_concat, const std::vector& ranks); diff --git a/python/paddle/dataset/tests/common_test.py b/python/paddle/dataset/tests/common_test.py deleted file mode 100644 index 0ce7d83f374f8c09f68527473418de8ce84c36b1..0000000000000000000000000000000000000000 --- a/python/paddle/dataset/tests/common_test.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import paddle.dataset.common -import unittest -import tempfile -import glob -from six.moves import range - - -class TestCommon(unittest.TestCase): - def test_md5file(self): - _, temp_path = tempfile.mkstemp() - with open(temp_path, 'w') as f: - f.write("Hello\n") - self.assertEqual('09f7e02f1290be211da707a266f153b3', - paddle.dataset.common.md5file(temp_path)) - - def test_download(self): - yi_avatar = 'https://avatars0.githubusercontent.com/u/1548775?v=3&s=460' - self.assertEqual( - paddle.dataset.common.DATA_HOME + '/test/1548775?v=3&s=460', - paddle.dataset.common.download(yi_avatar, 'test', - 'f75287202d6622414c706c36c16f8e0d')) - - def test_split(self): - def test_reader(): - def reader(): - for x in range(10): - yield x - - return reader - - _, temp_path = tempfile.mkstemp() - paddle.dataset.common.split( - test_reader(), 4, suffix=temp_path + '/test-%05d.pickle') - files = glob.glob(temp_path + '/test-%05d.pickle') - self.assertEqual(len(files), 3) - - def test_cluster_file_reader(self): - _, temp_path = tempfile.mkstemp() - for x in range(5): - with open(temp_path + '/%05d.test' % x) as f: - f.write('%d\n' % x) - reader = paddle.dataset.common.cluster_files_reader( - temp_path + '/*.test', 5, 0) - for idx, e in enumerate(reader()): - self.assertEqual(e, str("0")) - - def test_convert(self): - record_num = 10 - num_shards = 4 - - def test_reader(): - def reader(): - for x in range(record_num): - yield x - - return reader - - path = tempfile.mkdtemp() - paddle.dataset.common.convert(path, - test_reader(), num_shards, - 'random_images') - - files = glob.glob(path + '/random_images-*') - self.assertEqual(len(files), num_shards) - - recs = [] - for i in range(0, num_shards): - n = "%s/random_images-%05d-of-%05d" % (path, i, num_shards - 1) - r = recordio.reader(n) - while True: - d = r.read() - if d is None: - break - recs.append(d) - - recs.sort() - self.assertEqual(total, record_num) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py index d3641b646f32ea9d581603e2bc5e9c56dd21909b..a4705e8b833a7b44ad97981aabd5cd679dcbe293 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py @@ -130,13 +130,8 @@ unsupported_fp16_list = { 'send_barrier', 'recv', 'fetch_barrier', - 'create_recordio_file_reader', - 'create_random_data_generator', 'create_py_reader', - 'create_shuffle_reader', - 'create_batch_reader', 'create_double_buffer_reader', - 'create_multi_pass_reader', 'read', 'load', diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index cfb2c42fcd8071463b71c514987d16b17c31d385..2d17a97b0ef399a852a035f0aba621a19be594c2 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -754,98 +754,6 @@ def create_py_reader_by_data(capacity, feed_list=feed_list) -def open_files(filenames, - shapes, - lod_levels, - dtypes, - thread_num=None, - buffer_size=None, - pass_num=1, - is_test=None): - """ - Open files - - This layer takes a list of files to read from and returns a Reader Variable. - Via the Reader Variable, we can get data from given files. All files must - have name suffixs to indicate their formats, e.g., '*.recordio'. - - Args: - filenames(list): The list of file names. - shapes(list): List of tuples which declaring data shapes. - lod_levels(list): List of ints which declaring data lod_level. - dtypes(list): List of strs which declaring data type. - thread_num(None): The number of thread to read files. - Default: min(len(filenames), cpu_number). - buffer_size(None): The buffer size of reader. Default: 3 * thread_num - pass_num(int): Number of passes to run. - is_test(bool|None): Whether `open_files` used for testing or not. If it - is used for testing, the order of data generated is same as the file - order. Otherwise, it is not guaranteed the order of data is same - between every epoch. [Default: False]. - - Returns: - Variable: A Reader Variable via which we can get file data. - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - reader = fluid.layers.io.open_files(filenames=['./data1.recordio', - './data2.recordio'], - shapes=[(3,224,224), (1,)], - lod_levels=[0, 0], - dtypes=['float32', 'int64']) - - # Via the reader, we can use 'read_file' layer to get data: - image, label = fluid.layers.io.read_file(reader) - """ - if thread_num is None: - thread_num = min(len(filenames), multiprocessing.cpu_count()) - else: - thread_num = int(thread_num) - - if buffer_size is None: - buffer_size = 3 * thread_num - else: - buffer_size = int(buffer_size) - - if isinstance(filenames, six.string_types): - filenames = [filenames] - dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes] - shape_concat = [] - ranks = [] - - for shape in shapes: - shape_concat.extend(shape) - ranks.append(len(shape)) - - multi_file_reader_name = unique_name('multi_file_reader') - startup_blk = default_startup_program().current_block() - startup_reader = startup_blk.create_var(name=multi_file_reader_name) - attrs = { - 'shape_concat': shape_concat, - 'lod_levels': lod_levels, - 'ranks': ranks, - 'file_names': filenames, - 'thread_num': thread_num, - 'buffer_size': buffer_size - } - if is_test is not None: - attrs['is_test'] = is_test - startup_blk.append_op( - type='open_files', outputs={'Out': [startup_reader]}, attrs=attrs) - - startup_reader.desc.set_dtypes(dtypes) - startup_reader.persistable = True - main_prog_reader = _copy_reader_var_(default_main_program().current_block(), - startup_reader) - if pass_num > 1: - main_prog_reader = multi_pass( - reader=main_prog_reader, pass_num=pass_num) - - return monkey_patch_reader_methods(main_prog_reader) - - def __create_shared_decorated_reader__(op_type, reader, attrs): var_name = unique_name(op_type) startup_blk = default_startup_program().current_block() diff --git a/python/paddle/fluid/tests/demo/file_reader/.gitignore b/python/paddle/fluid/tests/demo/file_reader/.gitignore deleted file mode 100644 index 780d05b94667d3ea726e37bf9cf1b5b2baeff354..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/demo/file_reader/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.recordio diff --git a/python/paddle/fluid/tests/demo/file_reader/train.py b/python/paddle/fluid/tests/demo/file_reader/train.py deleted file mode 100644 index 5f5d2848da42e18f2a142faae0c89352344d8cee..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/demo/file_reader/train.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import paddle.fluid as fluid -import numpy -import sys - -TRAIN_FILES = ['train.recordio'] -TEST_FILES = ['test.recordio'] - -DICT_DIM = 5147 - -# embedding dim -emb_dim = 128 - -# hidden dim -hid_dim = 128 - -# class num -class_dim = 2 - -# epoch num -epoch_num = 10 - - -def build_program(is_train): - file_obj_handle = fluid.layers.io.open_files( - filenames=TRAIN_FILES if is_train else TEST_FILES, - shapes=[[-1, 1], [-1, 1]], - lod_levels=[1, 0], - dtypes=['int64', 'int64']) - - file_obj = fluid.layers.io.double_buffer(file_obj_handle) - - with fluid.unique_name.guard(): - - data, label = fluid.layers.read_file(file_obj) - - emb = fluid.layers.embedding(input=data, size=[DICT_DIM, emb_dim]) - - conv_3 = fluid.nets.sequence_conv_pool( - input=emb, - num_filters=hid_dim, - filter_size=3, - act="tanh", - pool_type="sqrt") - - conv_4 = fluid.nets.sequence_conv_pool( - input=emb, - num_filters=hid_dim, - filter_size=4, - act="tanh", - pool_type="sqrt") - - prediction = fluid.layers.fc(input=[conv_3, conv_4], - size=class_dim, - act="softmax") - - # cross entropy loss - cost = fluid.layers.cross_entropy(input=prediction, label=label) - - # mean loss - avg_cost = fluid.layers.mean(x=cost) - acc = fluid.layers.accuracy(input=prediction, label=label) - - if is_train: - # SGD optimizer - sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=0.001) - sgd_optimizer.minimize(avg_cost) - - return {'loss': avg_cost, 'log': [avg_cost, acc], 'file': file_obj_handle} - - -def main(): - train = fluid.Program() - startup = fluid.Program() - test = fluid.Program() - - with fluid.program_guard(train, startup): - train_args = build_program(is_train=True) - - with fluid.program_guard(test, startup): - test_args = build_program(is_train=False) - - use_cuda = fluid.core.is_compiled_with_cuda() - # startup - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - exe = fluid.Executor(place=place) - exe.run(startup) - - train_exe = fluid.ParallelExecutor( - use_cuda=use_cuda, - loss_name=train_args['loss'].name, - main_program=train) - test_exe = fluid.ParallelExecutor( - use_cuda=use_cuda, main_program=test, share_vars_from=train_exe) - - fetch_var_list = [var.name for var in train_args['log']] - for epoch_id in range(epoch_num): - # train - try: - batch_id = 0 - while True: - loss, acc = map(numpy.array, - train_exe.run(fetch_list=fetch_var_list)) - print 'Train epoch', epoch_id, 'batch', batch_id, 'loss:', loss, 'acc:', acc - batch_id += 1 - except fluid.core.EOFException: - print 'End of epoch', epoch_id - train_args['file'].reset() - - # test - loss = [] - acc = [] - try: - while True: - loss_np, acc_np = map(numpy.array, - test_exe.run(fetch_list=fetch_var_list)) - loss.append(loss_np[0]) - acc.append(acc_np[0]) - except: - test_args['file'].reset() - print 'Test loss:', numpy.mean(loss), 'acc:', numpy.mean(acc) - - -if __name__ == '__main__': - main() diff --git a/python/paddle/fluid/tests/unittests/.gitignore b/python/paddle/fluid/tests/unittests/.gitignore deleted file mode 100644 index b1e8fda03aa42f5f7528eafb46c16d55b868bae5..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -mnist.recordio -mnist_0.recordio -mnist_1.recordio -mnist_2.recordio -flowers.recordio -wmt16.recordio -data_balance_test.recordio -data_balance_with_lod_test.recordio diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py index 44568ff66b61affdd5be809e23ba09597645d470..1b507042541c100942dd61065bc78d92a2c399e4 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py @@ -18,8 +18,6 @@ import paddle.fluid as fluid fluid.core._set_eager_deletion_mode(0.0, 1.0, True) -os.environ['RECORDIO_FILENAME'] = './eager_deletion_transformer.wmt16.recordio' - from test_parallel_executor_transformer import TestTransformer if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer_auto_growth.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer_auto_growth.py index 001149c07b6846ce1c2c920e8770cbe0be378823..e7afa27b7b9fed679a0f3fa8f308b5f0518bc036 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer_auto_growth.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer_auto_growth.py @@ -12,9 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -os.environ['RECORDIO_FILENAME'] = './auto_growth_pe_transformer.wmt16.recordio' - import unittest from test_parallel_executor_transformer import * diff --git a/python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py b/python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py index ef06e7d9fcf7597c721b19a1e13647471c83e7a6..1661f753a8464baa0c9497e9dbd0e348b5431750 100644 --- a/python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py @@ -12,12 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import unittest import paddle.fluid as fluid -os.environ['RECORDIO_FILENAME'] = './p_gc_transformer.wmt16.recordio' - fluid.core._set_eager_deletion_mode(0.0, 0.55, True) from test_parallel_executor_transformer import TestTransformer