Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
1478a5fc
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1478a5fc
编写于
7月 14, 2018
作者:
Y
yuyang18
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Make open_files use buffer
上级
dc34effd
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
30 addition
and
19 deletion
+30
-19
paddle/fluid/operators/reader/CMakeLists.txt
paddle/fluid/operators/reader/CMakeLists.txt
+1
-1
paddle/fluid/operators/reader/open_files_op.cc
paddle/fluid/operators/reader/open_files_op.cc
+12
-4
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+16
-13
python/paddle/fluid/tests/unittests/test_data_balance.py
python/paddle/fluid/tests/unittests/test_data_balance.py
+1
-1
未找到文件。
paddle/fluid/operators/reader/CMakeLists.txt
浏览文件 @
1478a5fc
...
...
@@ -16,7 +16,7 @@ function(reader_library TARGET_NAME)
endfunction
()
cc_library
(
buffered_reader SRCS buffered_reader.cc DEPS reader simple_threadpool
)
reader_library
(
open_files_op SRCS open_files_op.cc
)
reader_library
(
open_files_op SRCS open_files_op.cc
DEPS buffered_reader
)
reader_library
(
create_random_data_generator_op SRCS create_random_data_generator_op.cc
)
reader_library
(
create_shuffle_reader_op SRCS create_shuffle_reader_op.cc
)
reader_library
(
create_batch_reader_op SRCS create_batch_reader_op.cc
)
...
...
paddle/fluid/operators/reader/open_files_op.cc
浏览文件 @
1478a5fc
...
...
@@ -18,6 +18,7 @@
#include "ThreadPool.h"
#include "paddle/fluid/framework/blocking_queue.h"
#include "paddle/fluid/operators/reader/blocking_queue.h"
#include "paddle/fluid/operators/reader/buffered_reader.h"
#include "paddle/fluid/operators/reader/reader_op_registry.h"
namespace
paddle
{
...
...
@@ -232,12 +233,17 @@ class OpenFilesOp : public framework::OperatorBase {
container
.
reset
(
new
OrderedReaderContainer
());
}
else
{
container
.
reset
(
new
PreemptiveReaderContainer
(
std
::
min
(
file_names
.
size
(),
static_cast
<
size_t
>
(
std
::
thread
::
hardware_concurrency
()))));
static_cast
<
size_t
>
(
Attr
<
int
>
(
"thread_num"
))));
}
out
->
Reset
(
std
::
make_shared
<
MultiFileReader
>
(
file_names
,
std
::
move
(
container
)));
auto
reader
=
std
::
make_shared
<
MultiFileReader
>
(
file_names
,
std
::
move
(
container
));
auto
buffer_size
=
Attr
<
int
>
(
"buffer_size"
);
if
(
buffer_size
>
1
)
{
reader
=
framework
::
MakeDecoratedReader
<
BufferedReader
>
(
reader
,
platform
::
CPUPlace
(),
buffer_size
);
}
out
->
Reset
(
reader
);
}
};
...
...
@@ -253,6 +259,8 @@ class OpenFilesOpMaker : public FileReaderMakerBase {
An OpenFilesOp creates a MultiFileReader, which is able to
read data multi-threaded from multiple files.
)DOC"
);
AddAttr
<
int
>
(
"thread_num"
,
"Number of thread to read files."
);
AddAttr
<
int
>
(
"buffer_size"
,
"The reading buffer of these files."
);
}
};
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
1478a5fc
...
...
@@ -21,6 +21,7 @@ from ..layer_helper import LayerHelper
from
..executor
import
global_scope
from
layer_function_generator
import
generate_layer_fn
,
templatedoc
import
sys
import
multiprocessing
__all__
=
[
'data'
,
'BlockGuardServ'
,
'ListenAndServ'
,
'Send'
,
'Recv'
,
...
...
@@ -549,10 +550,9 @@ def open_files(filenames,
shapes(list): List of tuples which declaring data shapes.
lod_levels(list): List of ints which declaring data lod_level.
dtypes(list): List of strs which declaring data type.
thread_num(None): Deprecated argument. It will be set by open_files
automatically.
buffer_size(None): Deprecated argument. It will be set by open_files
automatically.
thread_num(None): The number of thread to read files.
Default: min(len(filenames), cpu_number).
buffer_size(None): The buffer size of reader. Default: 3 * thread_num
pass_num(int): Number of passes to run.
is_test(bool|None): Whether `open_files` used for testing or not. If it
is used for testing, the order of data generated is same as the file
...
...
@@ -574,14 +574,15 @@ def open_files(filenames,
# Via the reader, we can use 'read_file' layer to get data:
image, label = fluid.layers.io.read_file(reader)
"""
if
thread_num
is
not
None
:
print
>>
sys
.
stderr
,
"thread_num parameter of open_files is "
\
"deprecated. It will be ignored and set "
\
"automatically by open_files "
if
buffer_size
is
not
None
:
print
>>
sys
.
stderr
,
"buffer_size parameter of open_files is "
\
"deprecated. It will be ignored and set "
\
"automatically by open_files "
if
thread_num
is
None
:
thread_num
=
min
(
len
(
filenames
),
multiprocessing
.
cpu_count
())
else
:
thread_num
=
int
(
thread_num
)
if
buffer_size
is
None
:
buffer_size
=
3
*
thread_num
else
:
buffer_size
=
int
(
buffer_size
)
if
isinstance
(
filenames
,
basestring
):
filenames
=
[
filenames
]
...
...
@@ -600,7 +601,9 @@ def open_files(filenames,
'shape_concat'
:
shape_concat
,
'lod_levels'
:
lod_levels
,
'ranks'
:
ranks
,
'file_names'
:
filenames
'file_names'
:
filenames
,
'thread_num'
:
thread_num
,
'buffer_size'
:
buffer_size
}
if
is_test
is
not
None
:
attrs
[
'is_test'
]
=
is_test
...
...
python/paddle/fluid/tests/unittests/test_data_balance.py
浏览文件 @
1478a5fc
...
...
@@ -155,7 +155,7 @@ class TestDataBalance(unittest.TestCase):
main_program
=
main_prog
,
build_strategy
=
build_strategy
)
if
(
parallel_exe
.
device_count
>
self
.
batch_size
)
:
if
parallel_exe
.
device_count
>
self
.
batch_size
:
print
(
"WARNING: Unittest TestDataBalance skipped.
\
For the result is not correct when device count
\
is larger than batch size."
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录