Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
4b950951
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4b950951
编写于
6月 29, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add unittests and fix a few bugs
上级
ba538012
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
206 addition
and
8 deletion
+206
-8
paddle/fluid/framework/details/data_balance_op_handle.cc
paddle/fluid/framework/details/data_balance_op_handle.cc
+0
-1
paddle/fluid/framework/details/fetch_op_handle.cc
paddle/fluid/framework/details/fetch_op_handle.cc
+1
-1
paddle/fluid/framework/details/multi_devices_graph_builder.cc
...le/fluid/framework/details/multi_devices_graph_builder.cc
+4
-2
paddle/fluid/framework/lod_tensor.cc
paddle/fluid/framework/lod_tensor.cc
+1
-0
paddle/fluid/operators/read_op.cc
paddle/fluid/operators/read_op.cc
+12
-4
python/paddle/fluid/tests/unittests/test_data_balance.py
python/paddle/fluid/tests/unittests/test_data_balance.py
+188
-0
未找到文件。
paddle/fluid/framework/details/data_balance_op_handle.cc
浏览文件 @
4b950951
...
...
@@ -107,7 +107,6 @@ void DataBalanceOpHandle::RunImpl() {
auto
*
tensor_var
=
local_scope
->
FindVar
(
in_var_handles
[
i
]
->
name_
);
PADDLE_ENFORCE
(
tensor_var
->
IsType
<
LoDTensor
>
());
auto
*
tensor
=
tensor_var
->
GetMutable
<
LoDTensor
>
();
PADDLE_ENFORCE
(
places_
[
place_idx
]
==
tensor
->
place
());
lod_tensors
[
data_idx
].
push_back
(
tensor
);
int
ins_size
=
tensor
->
lod
().
empty
()
?
tensor
->
dims
()[
0
]
:
tensor
->
NumElements
();
...
...
paddle/fluid/framework/details/fetch_op_handle.cc
浏览文件 @
4b950951
...
...
@@ -67,8 +67,8 @@ void FetchOpHandle::RunImpl() {
#endif
}
else
{
tensors_
[
i
].
ShareDataWith
(
t
);
tensors_
[
i
].
set_lod
(
t
.
lod
());
}
tensors_
[
i
].
set_lod
(
t
.
lod
());
}
this
->
WaitAndMergeCPUTensors
();
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.cc
浏览文件 @
4b950951
...
...
@@ -216,11 +216,13 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
}
else
{
// This op runs on all devices, and its output may have parameter's
// gradients.
CreateComputationalOps
(
&
result
,
*
op
,
places_
.
size
());
if
(
op
->
Type
()
==
"read"
)
{
op
->
SetAttr
(
"throw_eof_exp"
,
false
);
CreateComputationalOps
(
&
result
,
*
op
,
places_
.
size
());
const
auto
&
data_var_names
=
op
->
Output
(
"Out"
);
InsertDataBalanceOp
(
&
result
,
data_var_names
);
}
else
{
CreateComputationalOps
(
&
result
,
*
op
,
places_
.
size
());
}
if
(
!
is_forwarding
&&
places_
.
size
()
>
1
)
{
...
...
paddle/fluid/framework/lod_tensor.cc
浏览文件 @
4b950951
...
...
@@ -393,6 +393,7 @@ void LoDTensor::MergeLoDTensor(
new_dim
[
0
]
+=
t
->
dims
()[
0
];
auto
&
lod
=
t
->
lod
();
PADDLE_ENFORCE_EQ
(
new_lod
.
size
(),
lod
.
size
());
for
(
size_t
j
=
0
;
j
<
lod
.
size
();
++
j
)
{
auto
&
sub_lod
=
new_lod
[
j
];
auto
&
offset
=
sub_lod
.
back
();
...
...
paddle/fluid/operators/read_op.cc
浏览文件 @
4b950951
...
...
@@ -67,10 +67,14 @@ class ReadOp : public framework::OperatorBase {
std
::
vector
<
framework
::
LoDTensor
>
ins
;
reader
->
ReadNext
(
&
ins
);
if
(
ins
.
empty
())
{
ins
.
resize
(
out_arg_names
.
size
());
for
(
auto
&
tensor
:
ins
)
{
// data type is not important for subsequent DataBalanceOpHandle
tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
0
}),
dev_place
);
if
(
Attr
<
bool
>
(
"throw_eof_exp"
))
{
PADDLE_THROW
(
"There is no next data."
);
}
else
{
ins
.
resize
(
out_arg_names
.
size
());
for
(
auto
&
tensor
:
ins
)
{
// data type is not important for subsequent DataBalanceOpHandle
tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
0
}),
dev_place
);
}
}
}
PADDLE_ENFORCE_EQ
(
ins
.
size
(),
out_arg_names
.
size
());
...
...
@@ -88,6 +92,10 @@ class ReadOpMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
AddInput
(
"Reader"
,
"(ReaderHolder) The executed reader."
);
AddOutput
(
"Out"
,
"(LoDTensor) The output data."
).
AsDuplicable
();
AddAttr
<
bool
>
(
"throw_eof_exp"
,
"If set true, an exception will be thrown when the Reader "
"yields empty (which means there is no next data)."
)
.
SetDefault
(
true
);
AddComment
(
R"DOC(
Read Operator
...
...
python/paddle/fluid/tests/unittests/test_data_balance.py
0 → 100644
浏览文件 @
4b950951
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
paddle.fluid
as
fluid
import
paddle.v2
as
paddle
import
paddle.v2.dataset.mnist
as
mnist
import
numpy
as
np
class
TestDataBalance
(
unittest
.
TestCase
):
def
prepare_data
(
self
):
def
fake_data_generator
():
for
n
in
xrange
(
self
.
total_ins_num
):
yield
np
.
ones
((
3
,
4
))
*
n
,
n
# Prepare data
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
reader
=
paddle
.
batch
(
fake_data_generator
,
batch_size
=
self
.
batch_size
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
3
,
4
],
dtype
=
'float32'
),
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
),
],
place
=
fluid
.
CPUPlace
())
self
.
num_batches
=
fluid
.
recordio_writer
.
convert_reader_to_recordio_file
(
self
.
data_file_name
,
reader
,
feeder
)
def
prepare_lod_data
(
self
):
def
fake_data_generator
():
for
n
in
xrange
(
1
,
self
.
total_ins_num
+
1
):
d1
=
(
np
.
ones
((
n
,
3
))
*
n
).
astype
(
'float32'
)
d2
=
(
np
.
array
(
n
).
reshape
((
1
,
1
))).
astype
(
'int32'
)
yield
d1
,
d2
# Prepare lod data
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
with
fluid
.
recordio_writer
.
create_recordio_writer
(
filename
=
self
.
lod_data_file_name
)
as
writer
:
eof
=
False
generator
=
fake_data_generator
()
while
(
not
eof
):
data_batch
=
[
np
.
array
([]).
reshape
((
0
,
3
)),
np
.
array
([]).
reshape
(
(
0
,
1
))
]
lod
=
[
0
]
for
_
in
xrange
(
self
.
batch_size
):
try
:
ins
=
generator
.
next
()
except
StopIteration
:
eof
=
True
break
for
i
,
d
in
enumerate
(
ins
):
data_batch
[
i
]
=
np
.
concatenate
(
(
data_batch
[
i
],
d
),
axis
=
0
)
lod
.
append
(
lod
[
-
1
]
+
ins
[
0
].
shape
[
0
])
if
data_batch
[
0
].
shape
[
0
]
>
0
:
for
i
,
d
in
enumerate
(
data_batch
):
t
=
fluid
.
LoDTensor
()
t
.
set
(
data_batch
[
i
],
fluid
.
CPUPlace
())
if
i
==
0
:
t
.
set_lod
([
lod
])
writer
.
append_tensor
(
t
)
writer
.
complete_append_tensor
()
def
setUp
(
self
):
self
.
use_cuda
=
fluid
.
core
.
is_compiled_with_cuda
()
self
.
data_file_name
=
'./data_balance_test.recordio'
self
.
lod_data_file_name
=
'./data_balance_with_lod_test.recordio'
self
.
total_ins_num
=
50
self
.
batch_size
=
10
self
.
prepare_data
()
self
.
prepare_lod_data
()
def
main
(
self
):
main_prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
data_reader
=
fluid
.
layers
.
io
.
open_files
(
filenames
=
[
self
.
data_file_name
],
shapes
=
[[
-
1
,
3
,
4
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
])
if
self
.
use_cuda
:
data_reader
=
fluid
.
layers
.
double_buffer
(
data_reader
)
image
,
label
=
fluid
.
layers
.
read_file
(
data_reader
)
place
=
fluid
.
CUDAPlace
(
0
)
if
self
.
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_prog
)
parallel_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
self
.
use_cuda
,
main_program
=
main_prog
)
if
(
parallel_exe
.
device_count
>
self
.
batch_size
):
print
(
"WARNING: Unittest TestDataBalance skipped.
\
For the result is not correct when device count
\
is larger than batch size."
)
exit
(
0
)
fetch_list
=
[
image
.
name
,
label
.
name
]
data_appeared
=
[
False
]
*
self
.
total_ins_num
while
(
True
):
try
:
image_val
,
label_val
=
parallel_exe
.
run
(
fetch_list
,
return_numpy
=
True
)
except
fluid
.
core
.
EnforceNotMet
as
ex
:
self
.
assertIn
(
"There is no next data."
,
ex
.
message
)
break
ins_num
=
image_val
.
shape
[
0
]
broadcasted_label
=
np
.
ones
(
(
ins_num
,
3
,
4
))
*
label_val
.
reshape
((
ins_num
,
1
,
1
))
self
.
assertEqual
(
image_val
.
all
(),
broadcasted_label
.
all
())
for
l
in
label_val
:
self
.
assertFalse
(
data_appeared
[
l
[
0
]])
data_appeared
[
l
[
0
]]
=
True
for
i
in
data_appeared
:
self
.
assertTrue
(
i
)
def
main_lod
(
self
):
main_prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
data_reader
=
fluid
.
layers
.
io
.
open_files
(
filenames
=
[
self
.
lod_data_file_name
],
shapes
=
[[
-
1
,
3
],
[
-
1
,
1
]],
lod_levels
=
[
1
,
0
],
dtypes
=
[
'float32'
,
'int32'
],
thread_num
=
1
)
ins
,
label
=
fluid
.
layers
.
read_file
(
data_reader
)
place
=
fluid
.
CUDAPlace
(
0
)
if
self
.
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_prog
)
parallel_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
self
.
use_cuda
,
main_program
=
main_prog
)
if
(
parallel_exe
.
device_count
>
self
.
batch_size
):
print
(
"WARNING: Unittest TestDataBalance skipped.
\
For the result is not correct when device count
\
is larger than batch size."
)
exit
(
0
)
fetch_list
=
[
ins
.
name
,
label
.
name
]
data_appeared
=
[
False
]
*
self
.
total_ins_num
while
(
True
):
try
:
ins_tensor
,
label_tensor
=
parallel_exe
.
run
(
fetch_list
,
return_numpy
=
False
)
except
fluid
.
core
.
EnforceNotMet
as
ex
:
self
.
assertIn
(
"There is no next data."
,
ex
.
message
)
break
ins_val
=
np
.
array
(
ins_tensor
)
label_val
=
np
.
array
(
label_tensor
)
ins_lod
=
ins_tensor
.
lod
()[
0
]
self
.
assertEqual
(
ins_val
.
shape
[
1
],
3
)
self
.
assertEqual
(
label_val
.
shape
[
1
],
1
)
self
.
assertEqual
(
len
(
ins_lod
)
-
1
,
label_val
.
shape
[
0
])
for
i
in
range
(
0
,
len
(
ins_lod
)
-
1
):
ins_elem
=
ins_val
[
ins_lod
[
i
]:
ins_lod
[
i
+
1
]][:]
label_elem
=
label_val
[
i
][
0
]
self
.
assertEqual
(
ins_elem
.
all
(),
label_elem
.
all
())
self
.
assertFalse
(
data_appeared
[
int
(
label_elem
-
1
)])
data_appeared
[
int
(
label_elem
-
1
)]
=
True
for
i
in
data_appeared
:
self
.
assertTrue
(
i
)
def
test_all
(
self
):
self
.
main
()
self
.
main_lod
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录