Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
5dce1da6
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5dce1da6
编写于
8月 31, 2019
作者:
Z
Zeng Jinle
提交者:
Tao Luo
8月 31, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove reset recordio usage (#19519)
上级
85914f7a
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
0 addition
and
370 deletion
+0
-370
.gitignore
.gitignore
+0
-1
paddle/fluid/operators/reader/reader_op_registry.cc
paddle/fluid/operators/reader/reader_op_registry.cc
+0
-15
paddle/fluid/operators/reader/reader_op_registry.h
paddle/fluid/operators/reader/reader_op_registry.h
+0
-3
python/paddle/dataset/tests/common_test.py
python/paddle/dataset/tests/common_test.py
+0
-97
python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
+0
-5
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+0
-92
python/paddle/fluid/tests/demo/file_reader/.gitignore
python/paddle/fluid/tests/demo/file_reader/.gitignore
+0
-1
python/paddle/fluid/tests/demo/file_reader/train.py
python/paddle/fluid/tests/demo/file_reader/train.py
+0
-140
python/paddle/fluid/tests/unittests/.gitignore
python/paddle/fluid/tests/unittests/.gitignore
+0
-8
python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py
.../fluid/tests/unittests/test_eager_deletion_transformer.py
+0
-2
python/paddle/fluid/tests/unittests/test_parallel_executor_transformer_auto_growth.py
...ittests/test_parallel_executor_transformer_auto_growth.py
+0
-3
python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py
...ests/unittests/test_partial_eager_deletion_transformer.py
+0
-3
未找到文件。
.gitignore
浏览文件 @
5dce1da6
python/paddle/fluid/tests/unittests/reader_reset_test.recordio
paddle/operators/check_t.save
paddle/operators/check_tensor.ls
paddle/operators/tensor.save
...
...
paddle/fluid/operators/reader/reader_op_registry.cc
浏览文件 @
5dce1da6
...
...
@@ -38,21 +38,6 @@ std::unordered_map<std::string, FileReaderCreator>& FileReaderRegistry() {
return
regs
;
}
std
::
unique_ptr
<
framework
::
ReaderBase
>
CreateReaderByFileName
(
const
std
::
string
&
file_name
)
{
size_t
separator_pos
=
file_name
.
find_last_of
(
kFileFormatSeparator
);
PADDLE_ENFORCE_NE
(
separator_pos
,
std
::
string
::
npos
,
"File name illegal! A legal file name should be like: "
"[file_name].[file_format] (e.g., 'data_file.recordio')."
);
std
::
string
filetype
=
file_name
.
substr
(
separator_pos
+
1
);
auto
itor
=
FileReaderRegistry
().
find
(
filetype
);
PADDLE_ENFORCE
(
itor
!=
FileReaderRegistry
().
end
(),
"No file reader registered for '%s' format."
,
filetype
);
framework
::
ReaderBase
*
reader
=
(
itor
->
second
)(
file_name
);
return
std
::
unique_ptr
<
framework
::
ReaderBase
>
(
reader
);
}
void
FileReaderMakerBase
::
Make
()
{
AddOutput
(
"Out"
,
"(ReaderHolder): The created random reader."
).
AsDuplicable
();
AddAttr
<
std
::
vector
<
int
>>
(
"shape_concat"
,
"The concat of all data's shapes."
);
...
...
paddle/fluid/operators/reader/reader_op_registry.h
浏览文件 @
5dce1da6
...
...
@@ -40,9 +40,6 @@ int RegisterFileReader(const std::string& filetype) {
return
0
;
}
std
::
unique_ptr
<
framework
::
ReaderBase
>
CreateReaderByFileName
(
const
std
::
string
&
file_name
);
extern
std
::
vector
<
framework
::
DDim
>
RestoreShapes
(
const
std
::
vector
<
int
>&
shape_concat
,
const
std
::
vector
<
int
>&
ranks
);
...
...
python/paddle/dataset/tests/common_test.py
已删除
100644 → 0
浏览文件 @
85914f7a
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
paddle.dataset.common
import
unittest
import
tempfile
import
glob
from
six.moves
import
range
class
TestCommon
(
unittest
.
TestCase
):
def
test_md5file
(
self
):
_
,
temp_path
=
tempfile
.
mkstemp
()
with
open
(
temp_path
,
'w'
)
as
f
:
f
.
write
(
"Hello
\n
"
)
self
.
assertEqual
(
'09f7e02f1290be211da707a266f153b3'
,
paddle
.
dataset
.
common
.
md5file
(
temp_path
))
def
test_download
(
self
):
yi_avatar
=
'https://avatars0.githubusercontent.com/u/1548775?v=3&s=460'
self
.
assertEqual
(
paddle
.
dataset
.
common
.
DATA_HOME
+
'/test/1548775?v=3&s=460'
,
paddle
.
dataset
.
common
.
download
(
yi_avatar
,
'test'
,
'f75287202d6622414c706c36c16f8e0d'
))
def
test_split
(
self
):
def
test_reader
():
def
reader
():
for
x
in
range
(
10
):
yield
x
return
reader
_
,
temp_path
=
tempfile
.
mkstemp
()
paddle
.
dataset
.
common
.
split
(
test_reader
(),
4
,
suffix
=
temp_path
+
'/test-%05d.pickle'
)
files
=
glob
.
glob
(
temp_path
+
'/test-%05d.pickle'
)
self
.
assertEqual
(
len
(
files
),
3
)
def
test_cluster_file_reader
(
self
):
_
,
temp_path
=
tempfile
.
mkstemp
()
for
x
in
range
(
5
):
with
open
(
temp_path
+
'/%05d.test'
%
x
)
as
f
:
f
.
write
(
'%d
\n
'
%
x
)
reader
=
paddle
.
dataset
.
common
.
cluster_files_reader
(
temp_path
+
'/*.test'
,
5
,
0
)
for
idx
,
e
in
enumerate
(
reader
()):
self
.
assertEqual
(
e
,
str
(
"0"
))
def
test_convert
(
self
):
record_num
=
10
num_shards
=
4
def
test_reader
():
def
reader
():
for
x
in
range
(
record_num
):
yield
x
return
reader
path
=
tempfile
.
mkdtemp
()
paddle
.
dataset
.
common
.
convert
(
path
,
test_reader
(),
num_shards
,
'random_images'
)
files
=
glob
.
glob
(
path
+
'/random_images-*'
)
self
.
assertEqual
(
len
(
files
),
num_shards
)
recs
=
[]
for
i
in
range
(
0
,
num_shards
):
n
=
"%s/random_images-%05d-of-%05d"
%
(
path
,
i
,
num_shards
-
1
)
r
=
recordio
.
reader
(
n
)
while
True
:
d
=
r
.
read
()
if
d
is
None
:
break
recs
.
append
(
d
)
recs
.
sort
()
self
.
assertEqual
(
total
,
record_num
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
浏览文件 @
5dce1da6
...
...
@@ -130,13 +130,8 @@ unsupported_fp16_list = {
'send_barrier',
'recv',
'fetch_barrier',
'create_recordio_file_reader',
'create_random_data_generator',
'create_py_reader',
'create_shuffle_reader',
'create_batch_reader',
'create_double_buffer_reader',
'create_multi_pass_reader',
'read',
'load',
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
5dce1da6
...
...
@@ -754,98 +754,6 @@ def create_py_reader_by_data(capacity,
feed_list
=
feed_list
)
def
open_files
(
filenames
,
shapes
,
lod_levels
,
dtypes
,
thread_num
=
None
,
buffer_size
=
None
,
pass_num
=
1
,
is_test
=
None
):
"""
Open files
This layer takes a list of files to read from and returns a Reader Variable.
Via the Reader Variable, we can get data from given files. All files must
have name suffixs to indicate their formats, e.g., '*.recordio'.
Args:
filenames(list): The list of file names.
shapes(list): List of tuples which declaring data shapes.
lod_levels(list): List of ints which declaring data lod_level.
dtypes(list): List of strs which declaring data type.
thread_num(None): The number of thread to read files.
Default: min(len(filenames), cpu_number).
buffer_size(None): The buffer size of reader. Default: 3 * thread_num
pass_num(int): Number of passes to run.
is_test(bool|None): Whether `open_files` used for testing or not. If it
is used for testing, the order of data generated is same as the file
order. Otherwise, it is not guaranteed the order of data is same
between every epoch. [Default: False].
Returns:
Variable: A Reader Variable via which we can get file data.
Examples:
.. code-block:: python
import paddle.fluid as fluid
reader = fluid.layers.io.open_files(filenames=['./data1.recordio',
'./data2.recordio'],
shapes=[(3,224,224), (1,)],
lod_levels=[0, 0],
dtypes=['float32', 'int64'])
# Via the reader, we can use 'read_file' layer to get data:
image, label = fluid.layers.io.read_file(reader)
"""
if
thread_num
is
None
:
thread_num
=
min
(
len
(
filenames
),
multiprocessing
.
cpu_count
())
else
:
thread_num
=
int
(
thread_num
)
if
buffer_size
is
None
:
buffer_size
=
3
*
thread_num
else
:
buffer_size
=
int
(
buffer_size
)
if
isinstance
(
filenames
,
six
.
string_types
):
filenames
=
[
filenames
]
dtypes
=
[
convert_np_dtype_to_dtype_
(
dt
)
for
dt
in
dtypes
]
shape_concat
=
[]
ranks
=
[]
for
shape
in
shapes
:
shape_concat
.
extend
(
shape
)
ranks
.
append
(
len
(
shape
))
multi_file_reader_name
=
unique_name
(
'multi_file_reader'
)
startup_blk
=
default_startup_program
().
current_block
()
startup_reader
=
startup_blk
.
create_var
(
name
=
multi_file_reader_name
)
attrs
=
{
'shape_concat'
:
shape_concat
,
'lod_levels'
:
lod_levels
,
'ranks'
:
ranks
,
'file_names'
:
filenames
,
'thread_num'
:
thread_num
,
'buffer_size'
:
buffer_size
}
if
is_test
is
not
None
:
attrs
[
'is_test'
]
=
is_test
startup_blk
.
append_op
(
type
=
'open_files'
,
outputs
=
{
'Out'
:
[
startup_reader
]},
attrs
=
attrs
)
startup_reader
.
desc
.
set_dtypes
(
dtypes
)
startup_reader
.
persistable
=
True
main_prog_reader
=
_copy_reader_var_
(
default_main_program
().
current_block
(),
startup_reader
)
if
pass_num
>
1
:
main_prog_reader
=
multi_pass
(
reader
=
main_prog_reader
,
pass_num
=
pass_num
)
return
monkey_patch_reader_methods
(
main_prog_reader
)
def
__create_shared_decorated_reader__
(
op_type
,
reader
,
attrs
):
var_name
=
unique_name
(
op_type
)
startup_blk
=
default_startup_program
().
current_block
()
...
...
python/paddle/fluid/tests/demo/file_reader/.gitignore
已删除
100644 → 0
浏览文件 @
85914f7a
*.recordio
python/paddle/fluid/tests/demo/file_reader/train.py
已删除
100644 → 0
浏览文件 @
85914f7a
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
paddle.fluid
as
fluid
import
numpy
import
sys
TRAIN_FILES
=
[
'train.recordio'
]
TEST_FILES
=
[
'test.recordio'
]
DICT_DIM
=
5147
# embedding dim
emb_dim
=
128
# hidden dim
hid_dim
=
128
# class num
class_dim
=
2
# epoch num
epoch_num
=
10
def
build_program
(
is_train
):
file_obj_handle
=
fluid
.
layers
.
io
.
open_files
(
filenames
=
TRAIN_FILES
if
is_train
else
TEST_FILES
,
shapes
=
[[
-
1
,
1
],
[
-
1
,
1
]],
lod_levels
=
[
1
,
0
],
dtypes
=
[
'int64'
,
'int64'
])
file_obj
=
fluid
.
layers
.
io
.
double_buffer
(
file_obj_handle
)
with
fluid
.
unique_name
.
guard
():
data
,
label
=
fluid
.
layers
.
read_file
(
file_obj
)
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
size
=
[
DICT_DIM
,
emb_dim
])
conv_3
=
fluid
.
nets
.
sequence_conv_pool
(
input
=
emb
,
num_filters
=
hid_dim
,
filter_size
=
3
,
act
=
"tanh"
,
pool_type
=
"sqrt"
)
conv_4
=
fluid
.
nets
.
sequence_conv_pool
(
input
=
emb
,
num_filters
=
hid_dim
,
filter_size
=
4
,
act
=
"tanh"
,
pool_type
=
"sqrt"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
[
conv_3
,
conv_4
],
size
=
class_dim
,
act
=
"softmax"
)
# cross entropy loss
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
# mean loss
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
acc
=
fluid
.
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
if
is_train
:
# SGD optimizer
sgd_optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
0.001
)
sgd_optimizer
.
minimize
(
avg_cost
)
return
{
'loss'
:
avg_cost
,
'log'
:
[
avg_cost
,
acc
],
'file'
:
file_obj_handle
}
def
main
():
train
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
test
=
fluid
.
Program
()
with
fluid
.
program_guard
(
train
,
startup
):
train_args
=
build_program
(
is_train
=
True
)
with
fluid
.
program_guard
(
test
,
startup
):
test_args
=
build_program
(
is_train
=
False
)
use_cuda
=
fluid
.
core
.
is_compiled_with_cuda
()
# startup
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
=
place
)
exe
.
run
(
startup
)
train_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
loss_name
=
train_args
[
'loss'
].
name
,
main_program
=
train
)
test_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
main_program
=
test
,
share_vars_from
=
train_exe
)
fetch_var_list
=
[
var
.
name
for
var
in
train_args
[
'log'
]]
for
epoch_id
in
range
(
epoch_num
):
# train
try
:
batch_id
=
0
while
True
:
loss
,
acc
=
map
(
numpy
.
array
,
train_exe
.
run
(
fetch_list
=
fetch_var_list
))
print
'Train epoch'
,
epoch_id
,
'batch'
,
batch_id
,
'loss:'
,
loss
,
'acc:'
,
acc
batch_id
+=
1
except
fluid
.
core
.
EOFException
:
print
'End of epoch'
,
epoch_id
train_args
[
'file'
].
reset
()
# test
loss
=
[]
acc
=
[]
try
:
while
True
:
loss_np
,
acc_np
=
map
(
numpy
.
array
,
test_exe
.
run
(
fetch_list
=
fetch_var_list
))
loss
.
append
(
loss_np
[
0
])
acc
.
append
(
acc_np
[
0
])
except
:
test_args
[
'file'
].
reset
()
print
'Test loss:'
,
numpy
.
mean
(
loss
),
'acc:'
,
numpy
.
mean
(
acc
)
if
__name__
==
'__main__'
:
main
()
python/paddle/fluid/tests/unittests/.gitignore
已删除
100644 → 0
浏览文件 @
85914f7a
mnist.recordio
mnist_0.recordio
mnist_1.recordio
mnist_2.recordio
flowers.recordio
wmt16.recordio
data_balance_test.recordio
data_balance_with_lod_test.recordio
python/paddle/fluid/tests/unittests/test_eager_deletion_transformer.py
浏览文件 @
5dce1da6
...
...
@@ -18,8 +18,6 @@ import paddle.fluid as fluid
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
os
.
environ
[
'RECORDIO_FILENAME'
]
=
'./eager_deletion_transformer.wmt16.recordio'
from
test_parallel_executor_transformer
import
TestTransformer
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_transformer_auto_growth.py
浏览文件 @
5dce1da6
...
...
@@ -12,9 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
os
.
environ
[
'RECORDIO_FILENAME'
]
=
'./auto_growth_pe_transformer.wmt16.recordio'
import
unittest
from
test_parallel_executor_transformer
import
*
...
...
python/paddle/fluid/tests/unittests/test_partial_eager_deletion_transformer.py
浏览文件 @
5dce1da6
...
...
@@ -12,12 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
unittest
import
paddle.fluid
as
fluid
os
.
environ
[
'RECORDIO_FILENAME'
]
=
'./p_gc_transformer.wmt16.recordio'
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
0.55
,
True
)
from
test_parallel_executor_transformer
import
TestTransformer
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录