提交 5dce1da6 编写于 作者: Z Zeng Jinle 提交者: Tao Luo

remove reset recordio usage (#19519)

上级 85914f7a
python/paddle/fluid/tests/unittests/reader_reset_test.recordio
paddle/operators/check_t.save
paddle/operators/check_tensor.ls
paddle/operators/tensor.save
......
......@@ -38,21 +38,6 @@ std::unordered_map<std::string, FileReaderCreator>& FileReaderRegistry() {
return regs;
}
std::unique_ptr<framework::ReaderBase> CreateReaderByFileName(
const std::string& file_name) {
size_t separator_pos = file_name.find_last_of(kFileFormatSeparator);
PADDLE_ENFORCE_NE(separator_pos, std::string::npos,
"File name illegal! A legal file name should be like: "
"[file_name].[file_format] (e.g., 'data_file.recordio').");
std::string filetype = file_name.substr(separator_pos + 1);
auto itor = FileReaderRegistry().find(filetype);
PADDLE_ENFORCE(itor != FileReaderRegistry().end(),
"No file reader registered for '%s' format.", filetype);
framework::ReaderBase* reader = (itor->second)(file_name);
return std::unique_ptr<framework::ReaderBase>(reader);
}
void FileReaderMakerBase::Make() {
AddOutput("Out", "(ReaderHolder): The created random reader.").AsDuplicable();
AddAttr<std::vector<int>>("shape_concat", "The concat of all data's shapes.");
......
......@@ -40,9 +40,6 @@ int RegisterFileReader(const std::string& filetype) {
return 0;
}
std::unique_ptr<framework::ReaderBase> CreateReaderByFileName(
const std::string& file_name);
extern std::vector<framework::DDim> RestoreShapes(
const std::vector<int>& shape_concat, const std::vector<int>& ranks);
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import paddle.dataset.common
import unittest
import tempfile
import glob
from six.moves import range
class TestCommon(unittest.TestCase):
def test_md5file(self):
_, temp_path = tempfile.mkstemp()
with open(temp_path, 'w') as f:
f.write("Hello\n")
self.assertEqual('09f7e02f1290be211da707a266f153b3',
paddle.dataset.common.md5file(temp_path))
def test_download(self):
yi_avatar = 'https://avatars0.githubusercontent.com/u/1548775?v=3&s=460'
self.assertEqual(
paddle.dataset.common.DATA_HOME + '/test/1548775?v=3&s=460',
paddle.dataset.common.download(yi_avatar, 'test',
'f75287202d6622414c706c36c16f8e0d'))
def test_split(self):
def test_reader():
def reader():
for x in range(10):
yield x
return reader
_, temp_path = tempfile.mkstemp()
paddle.dataset.common.split(
test_reader(), 4, suffix=temp_path + '/test-%05d.pickle')
files = glob.glob(temp_path + '/test-%05d.pickle')
self.assertEqual(len(files), 3)
def test_cluster_file_reader(self):
_, temp_path = tempfile.mkstemp()
for x in range(5):
with open(temp_path + '/%05d.test' % x) as f:
f.write('%d\n' % x)
reader = paddle.dataset.common.cluster_files_reader(
temp_path + '/*.test', 5, 0)
for idx, e in enumerate(reader()):
self.assertEqual(e, str("0"))
def test_convert(self):
record_num = 10
num_shards = 4
def test_reader():
def reader():
for x in range(record_num):
yield x
return reader
path = tempfile.mkdtemp()
paddle.dataset.common.convert(path,
test_reader(), num_shards,
'random_images')
files = glob.glob(path + '/random_images-*')
self.assertEqual(len(files), num_shards)
recs = []
for i in range(0, num_shards):
n = "%s/random_images-%05d-of-%05d" % (path, i, num_shards - 1)
r = recordio.reader(n)
while True:
d = r.read()
if d is None:
break
recs.append(d)
recs.sort()
self.assertEqual(total, record_num)
if __name__ == '__main__':
unittest.main()
......@@ -130,13 +130,8 @@ unsupported_fp16_list = {
'send_barrier',
'recv',
'fetch_barrier',
'create_recordio_file_reader',
'create_random_data_generator',
'create_py_reader',
'create_shuffle_reader',
'create_batch_reader',
'create_double_buffer_reader',
'create_multi_pass_reader',
'read',
'load',
......
......@@ -754,98 +754,6 @@ def create_py_reader_by_data(capacity,
feed_list=feed_list)
def open_files(filenames,
shapes,
lod_levels,
dtypes,
thread_num=None,
buffer_size=None,
pass_num=1,
is_test=None):
"""
Open files
This layer takes a list of files to read from and returns a Reader Variable.
Via the Reader Variable, we can get data from given files. All files must
have name suffixs to indicate their formats, e.g., '*.recordio'.
Args:
filenames(list): The list of file names.
shapes(list): List of tuples which declaring data shapes.
lod_levels(list): List of ints which declaring data lod_level.
dtypes(list): List of strs which declaring data type.
thread_num(None): The number of thread to read files.
Default: min(len(filenames), cpu_number).
buffer_size(None): The buffer size of reader. Default: 3 * thread_num
pass_num(int): Number of passes to run.
is_test(bool|None): Whether `open_files` used for testing or not. If it
is used for testing, the order of data generated is same as the file
order. Otherwise, it is not guaranteed the order of data is same
between every epoch. [Default: False].
Returns:
Variable: A Reader Variable via which we can get file data.
Examples:
.. code-block:: python
import paddle.fluid as fluid
reader = fluid.layers.io.open_files(filenames=['./data1.recordio',
'./data2.recordio'],
shapes=[(3,224,224), (1,)],
lod_levels=[0, 0],
dtypes=['float32', 'int64'])
# Via the reader, we can use 'read_file' layer to get data:
image, label = fluid.layers.io.read_file(reader)
"""
if thread_num is None:
thread_num = min(len(filenames), multiprocessing.cpu_count())
else:
thread_num = int(thread_num)
if buffer_size is None:
buffer_size = 3 * thread_num
else:
buffer_size = int(buffer_size)
if isinstance(filenames, six.string_types):
filenames = [filenames]
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
shape_concat = []
ranks = []
for shape in shapes:
shape_concat.extend(shape)
ranks.append(len(shape))
multi_file_reader_name = unique_name('multi_file_reader')
startup_blk = default_startup_program().current_block()
startup_reader = startup_blk.create_var(name=multi_file_reader_name)
attrs = {
'shape_concat': shape_concat,
'lod_levels': lod_levels,
'ranks': ranks,
'file_names': filenames,
'thread_num': thread_num,
'buffer_size': buffer_size
}
if is_test is not None:
attrs['is_test'] = is_test
startup_blk.append_op(
type='open_files', outputs={'Out': [startup_reader]}, attrs=attrs)
startup_reader.desc.set_dtypes(dtypes)
startup_reader.persistable = True
main_prog_reader = _copy_reader_var_(default_main_program().current_block(),
startup_reader)
if pass_num > 1:
main_prog_reader = multi_pass(
reader=main_prog_reader, pass_num=pass_num)
return monkey_patch_reader_methods(main_prog_reader)
def __create_shared_decorated_reader__(op_type, reader, attrs):
var_name = unique_name(op_type)
startup_blk = default_startup_program().current_block()
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import paddle.fluid as fluid
import numpy
import sys
TRAIN_FILES = ['train.recordio']
TEST_FILES = ['test.recordio']
DICT_DIM = 5147
# embedding dim
emb_dim = 128
# hidden dim
hid_dim = 128
# class num
class_dim = 2
# epoch num
epoch_num = 10
def build_program(is_train):
file_obj_handle = fluid.layers.io.open_files(
filenames=TRAIN_FILES if is_train else TEST_FILES,
shapes=[[-1, 1], [-1, 1]],
lod_levels=[1, 0],
dtypes=['int64', 'int64'])
file_obj = fluid.layers.io.double_buffer(file_obj_handle)
with fluid.unique_name.guard():
data, label = fluid.layers.read_file(file_obj)
emb = fluid.layers.embedding(input=data, size=[DICT_DIM, emb_dim])
conv_3 = fluid.nets.sequence_conv_pool(
input=emb,
num_filters=hid_dim,
filter_size=3,
act="tanh",
pool_type="sqrt")
conv_4 = fluid.nets.sequence_conv_pool(
input=emb,
num_filters=hid_dim,
filter_size=4,
act="tanh",
pool_type="sqrt")
prediction = fluid.layers.fc(input=[conv_3, conv_4],
size=class_dim,
act="softmax")
# cross entropy loss
cost = fluid.layers.cross_entropy(input=prediction, label=label)
# mean loss
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
if is_train:
# SGD optimizer
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost)
return {'loss': avg_cost, 'log': [avg_cost, acc], 'file': file_obj_handle}
def main():
train = fluid.Program()
startup = fluid.Program()
test = fluid.Program()
with fluid.program_guard(train, startup):
train_args = build_program(is_train=True)
with fluid.program_guard(test, startup):
test_args = build_program(is_train=False)
use_cuda = fluid.core.is_compiled_with_cuda()
# startup
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place=place)
exe.run(startup)
train_exe = fluid.ParallelExecutor(
use_cuda=use_cuda,
loss_name=train_args['loss'].name,
main_program=train)
test_exe = fluid.ParallelExecutor(
use_cuda=use_cuda, main_program=test, share_vars_from=train_exe)
fetch_var_list = [var.name for var in train_args['log']]
for epoch_id in range(epoch_num):
# train
try:
batch_id = 0
while True:
loss, acc = map(numpy.array,
train_exe.run(fetch_list=fetch_var_list))
print 'Train epoch', epoch_id, 'batch', batch_id, 'loss:', loss, 'acc:', acc
batch_id += 1
except fluid.core.EOFException:
print 'End of epoch', epoch_id
train_args['file'].reset()
# test
loss = []
acc = []
try:
while True:
loss_np, acc_np = map(numpy.array,
test_exe.run(fetch_list=fetch_var_list))
loss.append(loss_np[0])
acc.append(acc_np[0])
except:
test_args['file'].reset()
print 'Test loss:', numpy.mean(loss), 'acc:', numpy.mean(acc)
if __name__ == '__main__':
main()
mnist.recordio
mnist_0.recordio
mnist_1.recordio
mnist_2.recordio
flowers.recordio
wmt16.recordio
data_balance_test.recordio
data_balance_with_lod_test.recordio
......@@ -18,8 +18,6 @@ import paddle.fluid as fluid
fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
os.environ['RECORDIO_FILENAME'] = './eager_deletion_transformer.wmt16.recordio'
from test_parallel_executor_transformer import TestTransformer
if __name__ == '__main__':
......
......@@ -12,9 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
os.environ['RECORDIO_FILENAME'] = './auto_growth_pe_transformer.wmt16.recordio'
import unittest
from test_parallel_executor_transformer import *
......
......@@ -12,12 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import paddle.fluid as fluid
os.environ['RECORDIO_FILENAME'] = './p_gc_transformer.wmt16.recordio'
fluid.core._set_eager_deletion_mode(0.0, 0.55, True)
from test_parallel_executor_transformer import TestTransformer
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册