Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
48f41a7f
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
48f41a7f
编写于
4月 20, 2020
作者:
G
guofei
提交者:
GitHub
4月 20, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support LoDTensorArray in fetch (#23645) (#23968)
cherry-pick #23645
上级
5bcf1632
变更
36
隐藏空白更改
内联
并排
Showing
36 changed file
with
599 addition
and
247 deletion
+599
-247
paddle/fluid/framework/details/async_ssa_graph_executor.cc
paddle/fluid/framework/details/async_ssa_graph_executor.cc
+20
-6
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
...uid/framework/details/fast_threaded_ssa_graph_executor.cc
+1
-1
paddle/fluid/framework/details/fetch_op_handle.cc
paddle/fluid/framework/details/fetch_op_handle.cc
+90
-45
paddle/fluid/framework/details/fetch_op_handle.h
paddle/fluid/framework/details/fetch_op_handle.h
+2
-2
paddle/fluid/framework/details/parallel_ssa_graph_executor.cc
...le/fluid/framework/details/parallel_ssa_graph_executor.cc
+31
-10
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
...le/fluid/framework/details/threaded_ssa_graph_executor.cc
+1
-1
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+3
-3
paddle/fluid/framework/executor.h
paddle/fluid/framework/executor.h
+2
-2
paddle/fluid/framework/feed_fetch_method.cc
paddle/fluid/framework/feed_fetch_method.cc
+21
-13
paddle/fluid/framework/feed_fetch_method.h
paddle/fluid/framework/feed_fetch_method.h
+1
-1
paddle/fluid/framework/feed_fetch_type.h
paddle/fluid/framework/feed_fetch_type.h
+23
-4
paddle/fluid/framework/lod_tensor_array.h
paddle/fluid/framework/lod_tensor_array.h
+0
-1
paddle/fluid/framework/var_type.h
paddle/fluid/framework/var_type.h
+4
-0
paddle/fluid/framework/var_type_traits.h
paddle/fluid/framework/var_type_traits.h
+3
-1
paddle/fluid/framework/variable_helper.cc
paddle/fluid/framework/variable_helper.cc
+2
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+4
-3
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+2
-1
paddle/fluid/inference/api/api_impl_tester.cc
paddle/fluid/inference/api/api_impl_tester.cc
+21
-17
paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc
...e/fluid/inference/tests/book/test_inference_fit_a_line.cc
+7
-5
paddle/fluid/inference/tests/book/test_inference_image_classification.cc
...ference/tests/book/test_inference_image_classification.cc
+12
-10
paddle/fluid/inference/tests/book/test_inference_label_semantic_roles.cc
...ference/tests/book/test_inference_label_semantic_roles.cc
+11
-9
paddle/fluid/inference/tests/book/test_inference_nlp.cc
paddle/fluid/inference/tests/book/test_inference_nlp.cc
+6
-5
paddle/fluid/inference/tests/book/test_inference_recognize_digits.cc
...d/inference/tests/book/test_inference_recognize_digits.cc
+9
-7
paddle/fluid/inference/tests/book/test_inference_recommender_system.cc
...inference/tests/book/test_inference_recommender_system.cc
+9
-7
paddle/fluid/inference/tests/book/test_inference_rnn_encoder_decoder.cc
...nference/tests/book/test_inference_rnn_encoder_decoder.cc
+11
-9
paddle/fluid/inference/tests/book/test_inference_understand_sentiment.cc
...ference/tests/book/test_inference_understand_sentiment.cc
+11
-9
paddle/fluid/inference/tests/book/test_inference_word2vec.cc
paddle/fluid/inference/tests/book/test_inference_word2vec.cc
+11
-9
paddle/fluid/inference/tests/test_helper.h
paddle/fluid/inference/tests/test_helper.h
+3
-2
paddle/fluid/operators/controlflow/feed_op.cc
paddle/fluid/operators/controlflow/feed_op.cc
+2
-2
paddle/fluid/operators/controlflow/fetch_op.cc
paddle/fluid/operators/controlflow/fetch_op.cc
+46
-29
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+77
-9
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+15
-11
python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
...id/tests/unittests/test_async_ssa_graph_executor_mnist.py
+13
-5
python/paddle/fluid/tests/unittests/test_executor_and_mul.py
python/paddle/fluid/tests/unittests/test_executor_and_mul.py
+20
-5
python/paddle/fluid/tests/unittests/test_feed_fetch_method.py
...on/paddle/fluid/tests/unittests/test_feed_fetch_method.py
+3
-1
python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py
...ddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py
+102
-0
未找到文件。
paddle/fluid/framework/details/async_ssa_graph_executor.cc
浏览文件 @
48f41a7f
...
...
@@ -197,13 +197,27 @@ FetchResultType AsyncSSAGraphExecutor::Run(
HandleException
();
Fe
edFe
tchList
ret
;
auto
&
val
=
boost
::
get
<
Fe
edFe
tchList
>
(
fetch_data
);
FetchList
ret
;
auto
&
val
=
boost
::
get
<
FetchList
>
(
fetch_data
);
for
(
size_t
fetch_idx
=
0
;
fetch_idx
<
fetch_tensors
.
size
();
++
fetch_idx
)
{
std
::
vector
<
const
LoDTensor
*>
lodtensor_ptrs
;
lodtensor_ptrs
.
push_back
(
&
val
.
at
(
fetch_idx
));
ret
.
emplace_back
();
ret
.
back
().
MergeLoDTensor
(
lodtensor_ptrs
,
platform
::
CPUPlace
());
if
(
data_is_lod_tensor
(
val
.
at
(
fetch_idx
)))
{
std
::
vector
<
const
LoDTensor
*>
lodtensor_ptrs
;
lodtensor_ptrs
.
push_back
(
&
(
boost
::
get
<
LoDTensor
>
(
val
.
at
(
fetch_idx
))));
LoDTensor
var
;
var
.
MergeLoDTensor
(
lodtensor_ptrs
,
platform
::
CPUPlace
());
ret
.
emplace_back
(
var
);
}
else
{
auto
array
=
boost
::
get
<
LoDTensorArray
>
(
val
.
at
(
fetch_idx
));
LoDTensorArray
item_array
;
item_array
.
reserve
(
array
.
size
());
for
(
size_t
i
=
0
;
i
<
array
.
size
();
++
i
)
{
std
::
vector
<
const
LoDTensor
*>
lodtensor_ptrs
;
lodtensor_ptrs
.
push_back
(
&
array
[
i
]);
item_array
.
emplace_back
();
item_array
.
back
().
MergeLoDTensor
(
lodtensor_ptrs
,
platform
::
CPUPlace
());
}
ret
.
emplace_back
(
item_array
);
}
}
return
ret
;
}
...
...
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
浏览文件 @
48f41a7f
...
...
@@ -63,7 +63,7 @@ FetchResultType FastThreadedSSAGraphExecutor::Run(
FetchResultType
fetches
;
if
(
return_merged
)
{
fetches
=
Fe
edFe
tchList
(
fetch_tensors
.
size
());
fetches
=
FetchList
(
fetch_tensors
.
size
());
}
else
{
fetches
=
FetchUnmergedList
(
fetch_tensors
.
size
());
}
...
...
paddle/fluid/framework/details/fetch_op_handle.cc
浏览文件 @
48f41a7f
...
...
@@ -39,51 +39,98 @@ void FetchOpHandle::RecordWaitEventOnCtx(platform::DeviceContext *waited_ctx) {
PADDLE_THROW
(
"Nobody should wait FetchOp. Unexpceted Error"
);
}
void
FetchOpHandle
::
WaitAndMergeCPUTensors
()
const
{
static
void
CheckDims
(
const
framework
::
DDim
&
tensor_dims
,
const
framework
::
DDim
&
ele_dims
,
const
size_t
offset
)
{
PADDLE_ENFORCE_EQ
(
tensor_dims
.
size
(),
ele_dims
.
size
(),
platform
::
errors
::
Fatal
(
"The dimension sizes of fetched Tensors or "
"the items of fetched LoDTensorArray are "
"different from each other on different "
"devices. And the error is caused by the %zu "
"(th) fetched variable. Please set the "
"parameter `return_merged = False` when you "
"call the `Executor.run()` method."
,
offset
));
for
(
int
j
=
1
;
j
<
tensor_dims
.
size
();
j
++
)
{
PADDLE_ENFORCE_EQ
(
tensor_dims
[
j
],
ele_dims
[
j
],
platform
::
errors
::
Fatal
(
"The dimensions of fetched Tensors or "
"the items of fetched LoDTensorArray are "
"different from each other on different "
"devices. And the error is caused by the "
"%zu (th) fetched variable. Please set the "
"parameter `return_merged = False` when "
"you call the `Executor.run()` method."
,
offset
));
}
}
void
FetchOpHandle
::
WaitAndMergeCPUFetchVars
()
const
{
if
(
return_merged_
)
{
const
auto
&
tensor_dims
=
tensors_
[
0
].
dims
();
for
(
size_t
i
=
1
;
i
<
tensors_
.
size
();
i
++
)
{
const
auto
&
ele_dims
=
tensors_
[
i
].
dims
();
PADDLE_ENFORCE_EQ
(
tensor_dims
.
size
(),
ele_dims
.
size
(),
platform
::
errors
::
Fatal
(
"The dimension sizes of fetched Tensors are "
"different from each other on different "
"devices. And the error is caused by the %zu "
"(th) fetched variable. Please set the "
"parameter `return_merged = False` when you "
"call the `Executor.run()` method."
,
offset_
));
for
(
int
j
=
1
;
j
<
tensor_dims
.
size
();
j
++
)
{
PADDLE_ENFORCE_EQ
(
tensor_dims
[
j
],
ele_dims
[
j
],
platform
::
errors
::
Fatal
(
"The dimensions of fetched Tensors are "
"different from each other on different "
"devices. And the error is caused by the "
"%zu (th) fetched variable. Please set the "
"parameter `return_merged = False` when "
"you call the `Executor.run()` method."
,
offset_
));
if
(
data_is_lod_tensor
(
tensors_
[
0
]))
{
const
auto
&
tensor_dims
=
boost
::
get
<
LoDTensor
>
(
tensors_
[
0
]).
dims
();
for
(
size_t
i
=
1
;
i
<
tensors_
.
size
();
i
++
)
{
const
auto
&
ele_dims
=
boost
::
get
<
LoDTensor
>
(
tensors_
[
i
]).
dims
();
CheckDims
(
tensor_dims
,
ele_dims
,
offset_
);
}
std
::
vector
<
const
LoDTensor
*>
tensors_ptr
;
tensors_ptr
.
reserve
(
tensors_
.
size
());
for
(
auto
&
t
:
tensors_
)
{
tensors_ptr
.
emplace_back
(
&
boost
::
get
<
LoDTensor
>
(
t
));
}
auto
&
val
=
boost
::
get
<
FetchList
>
(
*
data_
);
LoDTensor
var
;
var
.
MergeLoDTensor
(
tensors_ptr
,
platform
::
CPUPlace
());
val
.
at
(
offset_
)
=
std
::
move
(
var
);
}
else
{
auto
&
array
=
boost
::
get
<
LoDTensorArray
>
(
tensors_
[
0
]);
LoDTensorArray
tmp_array
;
tmp_array
.
reserve
(
array
.
size
());
for
(
size_t
i
=
0
;
i
<
array
.
size
();
++
i
)
{
const
auto
&
tensor_dims
=
array
[
i
].
dims
();
std
::
vector
<
const
LoDTensor
*>
tensors_ptr
;
tensors_ptr
.
reserve
(
tensors_
.
size
());
tensors_ptr
.
push_back
(
&
array
[
i
]);
for
(
size_t
j
=
1
;
j
<
tensors_
.
size
();
++
j
)
{
auto
&
element
=
boost
::
get
<
LoDTensorArray
>
(
tensors_
[
j
]);
const
auto
&
ele_dims
=
element
[
i
].
dims
();
CheckDims
(
tensor_dims
,
ele_dims
,
offset_
);
tensors_ptr
.
push_back
(
&
element
[
i
]);
}
tmp_array
.
emplace_back
();
tmp_array
.
back
().
MergeLoDTensor
(
tensors_ptr
,
platform
::
CPUPlace
());
}
auto
&
val
=
boost
::
get
<
FetchList
>
(
*
data_
);
val
.
at
(
offset_
)
=
std
::
move
(
tmp_array
);
}
std
::
vector
<
const
LoDTensor
*>
tensors_ptr
;
tensors_ptr
.
reserve
(
tensors_
.
size
());
for
(
auto
&
t
:
tensors_
)
{
tensors_ptr
.
emplace_back
(
&
t
);
}
auto
&
val
=
boost
::
get
<
FeedFetchList
>
(
*
data_
);
val
.
at
(
offset_
).
MergeLoDTensor
(
tensors_ptr
,
platform
::
CPUPlace
());
}
else
{
auto
&
val
=
boost
::
get
<
FetchUnmergedList
>
(
*
data_
);
val
.
at
(
offset_
)
=
std
::
move
(
tensors_
);
}
}
static
void
TransData
(
const
framework
::
LoDTensor
&
src_item
,
framework
::
LoDTensor
*
dst_item
)
{
if
(
src_item
.
IsInitialized
()
&&
src_item
.
numel
()
>
0
)
{
if
(
platform
::
is_gpu_place
(
src_item
.
place
()))
{
#ifdef PADDLE_WITH_CUDA
TensorCopy
(
src_item
,
platform
::
CPUPlace
(),
dst_item
);
#endif
}
else
{
dst_item
->
ShareDataWith
(
src_item
);
}
}
else
{
dst_item
->
clear
();
dst_item
->
Resize
({
0
});
}
dst_item
->
set_lod
(
src_item
.
lod
());
}
void
FetchOpHandle
::
RunImpl
()
{
platform
::
RecordEvent
record_event
(
Name
());
WaitInputVarGenerated
(
platform
::
CPUPlace
());
tensors_
.
resize
(
inputs_
.
size
());
platform
::
CPUPlace
cpu
;
auto
&
scopes
=
*
local_exec_scopes_
;
for
(
size_t
i
=
0
;
i
<
inputs_
.
size
();
++
i
)
{
...
...
@@ -93,23 +140,21 @@ void FetchOpHandle::RunImpl() {
PADDLE_ENFORCE_NOT_NULL
(
var
,
"Cannot find variable %s in execution scope"
,
var_handle
->
name
());
auto
&
t
=
var
->
Get
<
framework
::
LoDTensor
>
();
if
(
t
.
IsInitialized
()
&&
t
.
numel
()
>
0
)
{
if
(
platform
::
is_gpu_place
(
t
.
place
()))
{
#ifdef PADDLE_WITH_CUDA
TensorCopy
(
t
,
cpu
,
&
tensors_
[
i
]);
#endif
}
else
{
tensors_
[
i
].
ShareDataWith
(
t
);
}
if
(
var
->
IsType
<
LoDTensor
>
())
{
auto
&
t
=
var
->
Get
<
framework
::
LoDTensor
>
();
auto
&
item
=
boost
::
get
<
LoDTensor
>
(
tensors_
[
i
]);
TransData
(
t
,
&
item
);
}
else
{
tensors_
[
i
].
clear
();
tensors_
[
i
].
Resize
({
0
});
auto
&
t
=
var
->
Get
<
framework
::
LoDTensorArray
>
();
LoDTensorArray
tmp
(
t
.
size
());
tensors_
[
i
]
=
tmp
;
auto
&
item
=
boost
::
get
<
LoDTensorArray
>
(
tensors_
[
i
]);
for
(
size_t
j
=
0
;
j
<
t
.
size
();
++
j
)
{
TransData
(
t
[
j
],
&
item
[
j
]);
}
}
tensors_
[
i
].
set_lod
(
t
.
lod
());
}
this
->
WaitAndMergeCPUTensors
();
this
->
WaitAndMergeCPUFetchVars
();
}
void
FetchOpHandle
::
WaitInputVarGenerated
(
const
platform
::
Place
&
place
)
{
...
...
paddle/fluid/framework/details/fetch_op_handle.h
浏览文件 @
48f41a7f
...
...
@@ -36,7 +36,7 @@ struct FetchOpHandle : public OpHandleBase {
void
RecordWaitEventOnCtx
(
platform
::
DeviceContext
*
waited_ctx
)
override
;
void
WaitAndMergeCPU
Tenso
rs
()
const
;
void
WaitAndMergeCPU
FetchVa
rs
()
const
;
std
::
string
Name
()
const
override
;
...
...
@@ -54,7 +54,7 @@ struct FetchOpHandle : public OpHandleBase {
size_t
offset_
;
std
::
vector
<
Scope
*>
*
local_scopes_
;
std
::
vector
<
Scope
*>
*
local_exec_scopes_
;
std
::
vector
<
LoDTensor
>
tensors_
;
std
::
vector
<
FetchType
>
tensors_
;
bool
return_merged_
;
};
...
...
paddle/fluid/framework/details/parallel_ssa_graph_executor.cc
浏览文件 @
48f41a7f
...
...
@@ -179,7 +179,7 @@ FetchResultType ParallelSSAGraphExecutor::Run(
}
if
(
return_merged
)
{
return
Fe
edFe
tchList
();
return
FetchList
();
}
else
{
return
FetchUnmergedList
();
}
...
...
@@ -245,22 +245,43 @@ FetchResultType ParallelSSAGraphExecutor::Run(
}
if
(
return_merged
)
{
Fe
edFe
tchList
ret
;
FetchList
ret
;
ret
.
reserve
(
fetch_tensors
.
size
());
for
(
size_t
fetch_idx
=
0
;
fetch_idx
<
fetch_tensors
.
size
();
++
fetch_idx
)
{
std
::
vector
<
const
LoDTensor
*>
lodtensor_ptrs
;
lodtensor_ptrs
.
reserve
(
place_num
);
std
::
vector
<
const
LoDTensorArray
*>
lodtensorarray_ptrs
;
lodtensorarray_ptrs
.
reserve
(
place_num
);
for
(
size_t
scope_idx
=
0
;
scope_idx
<
place_num
;
++
scope_idx
)
{
if
(
!
is_valid
[
scope_idx
])
{
continue
;
}
const
auto
&
fetch_list
=
boost
::
get
<
FeedFetchList
>
(
fetch_data
[
scope_idx
]);
lodtensor_ptrs
.
push_back
(
&
fetch_list
[
fetch_idx
]);
const
auto
&
fetch_list
=
boost
::
get
<
FetchList
>
(
fetch_data
[
scope_idx
]);
if
(
data_is_lod_tensor
(
fetch_list
[
fetch_idx
]))
{
lodtensor_ptrs
.
push_back
(
&
(
boost
::
get
<
LoDTensor
>
(
fetch_list
[
fetch_idx
])));
}
else
{
lodtensorarray_ptrs
.
push_back
(
&
(
boost
::
get
<
LoDTensorArray
>
(
fetch_list
[
fetch_idx
])));
}
}
if
(
lodtensor_ptrs
.
size
()
!=
0
)
{
LoDTensor
var
;
var
.
MergeLoDTensor
(
lodtensor_ptrs
,
platform
::
CPUPlace
());
ret
.
emplace_back
(
var
);
}
else
{
LoDTensorArray
var_array
(
lodtensorarray_ptrs
[
0
]
->
size
());
for
(
size_t
i
=
0
;
i
<
lodtensorarray_ptrs
[
0
]
->
size
();
++
i
)
{
LoDTensor
var
;
std
::
vector
<
const
LoDTensor
*>
ptrs
;
for
(
size_t
j
=
0
;
j
<
lodtensorarray_ptrs
.
size
();
++
j
)
{
ptrs
.
push_back
(
&
(
lodtensorarray_ptrs
[
j
]
->
at
(
i
)));
}
var
.
MergeLoDTensor
(
ptrs
,
platform
::
CPUPlace
());
var_array
[
i
]
=
std
::
move
(
var
);
}
ret
.
emplace_back
(
var_array
);
}
ret
.
emplace_back
();
ret
.
back
().
MergeLoDTensor
(
lodtensor_ptrs
,
platform
::
CPUPlace
());
}
return
ret
;
}
else
{
...
...
@@ -277,8 +298,8 @@ FetchResultType ParallelSSAGraphExecutor::Run(
boost
::
get
<
FetchUnmergedList
>
(
fetch_data
[
scope_idx
]);
PADDLE_ENFORCE_EQ
(
fetch_list
[
fetch_idx
].
size
(),
1
,
platform
::
errors
::
Fatal
(
"Each place must have only one fetched LoDTensor
!"
));
platform
::
errors
::
Fatal
(
"Each place must have only one fetched "
"LoDTensor/LoDTensorArray
!"
));
ret
.
back
().
emplace_back
(
fetch_list
[
fetch_idx
][
0
]);
}
}
...
...
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
浏览文件 @
48f41a7f
...
...
@@ -72,7 +72,7 @@ inline FetchResultType ThreadedSSAGraphExecutor::RunImpl(
std
::
unordered_set
<
VarHandleBase
*>
fetch_dependencies
;
FetchResultType
fetch_data
;
if
(
return_merged
)
{
fetch_data
=
Fe
edFe
tchList
(
fetch_tensors
.
size
());
fetch_data
=
FetchList
(
fetch_tensors
.
size
());
}
else
{
fetch_data
=
FetchUnmergedList
(
fetch_tensors
.
size
());
}
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
48f41a7f
...
...
@@ -256,7 +256,7 @@ static bool has_feed_operators(
// Return true if the block has fetch operators and holder of matching info.
static
bool
has_fetch_operators
(
const
BlockDesc
&
block
,
const
std
::
map
<
std
::
string
,
LoDTensor
*>&
fetch_targets
,
const
std
::
map
<
std
::
string
,
FetchType
*>&
fetch_targets
,
const
std
::
string
&
fetch_holder_name
)
{
size_t
fetch_count
=
0
;
for
(
auto
*
op
:
block
.
AllOps
())
{
...
...
@@ -306,7 +306,7 @@ static bool has_fetch_operators(
void
Executor
::
Run
(
const
ProgramDesc
&
program
,
Scope
*
scope
,
std
::
map
<
std
::
string
,
const
LoDTensor
*>*
feed_targets
,
std
::
map
<
std
::
string
,
LoDTensor
*>*
fetch_targets
,
std
::
map
<
std
::
string
,
FetchType
*>*
fetch_targets
,
bool
create_local_scope
,
bool
create_vars
,
const
std
::
string
&
feed_holder_name
,
const
std
::
string
&
fetch_holder_name
)
{
...
...
@@ -504,7 +504,7 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
void
Executor
::
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
std
::
map
<
std
::
string
,
const
LoDTensor
*>*
feed_targets
,
std
::
map
<
std
::
string
,
LoDTensor
*>*
fetch_targets
,
bool
create_local_scope
,
std
::
map
<
std
::
string
,
FetchType
*>*
fetch_targets
,
bool
create_local_scope
,
bool
create_vars
,
const
std
::
string
&
feed_holder_name
,
const
std
::
string
&
fetch_holder_name
)
{
auto
&
global_block
=
ctx
->
prog_
.
Block
(
ctx
->
block_id_
);
...
...
paddle/fluid/framework/executor.h
浏览文件 @
48f41a7f
...
...
@@ -87,7 +87,7 @@ class Executor {
// This API is very slow.
void
Run
(
const
ProgramDesc
&
program
,
Scope
*
scope
,
std
::
map
<
std
::
string
,
const
LoDTensor
*>*
feed_targets
,
std
::
map
<
std
::
string
,
LoDTensor
*>*
fetch_targets
,
std
::
map
<
std
::
string
,
FetchType
*>*
fetch_targets
,
bool
create_local_scope
=
true
,
bool
create_vars
=
true
,
const
std
::
string
&
feed_holder_name
=
"feed"
,
const
std
::
string
&
fetch_holder_name
=
"fetch"
);
...
...
@@ -95,7 +95,7 @@ class Executor {
// This API is very slow.
void
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
std
::
map
<
std
::
string
,
const
LoDTensor
*>*
feed_targets
,
std
::
map
<
std
::
string
,
LoDTensor
*>*
fetch_targets
,
std
::
map
<
std
::
string
,
FetchType
*>*
fetch_targets
,
bool
create_local_scope
=
true
,
bool
create_vars
=
true
,
const
std
::
string
&
feed_holder_name
=
"feed"
,
...
...
paddle/fluid/framework/feed_fetch_method.cc
浏览文件 @
48f41a7f
...
...
@@ -29,7 +29,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
// be created.
VLOG
(
3
)
<<
"SetFeedVariable name="
<<
var_name
<<
" index="
<<
index
;
Variable
*
g_feed_value
=
scope
->
Var
(
var_name
);
auto
&
feed_inputs
=
*
(
g_feed_value
->
GetMutable
<
Feed
Fetch
List
>
());
auto
&
feed_inputs
=
*
(
g_feed_value
->
GetMutable
<
FeedList
>
());
if
(
index
>=
feed_inputs
.
size
())
{
feed_inputs
.
resize
(
index
+
1
);
}
...
...
@@ -39,27 +39,35 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
feed_inputs
[
index
].
set_lod
(
input
.
lod
());
}
LoDTensor
&
GetFetchVariable
(
const
Scope
&
scope
,
const
std
::
string
&
var_name
,
FetchType
&
GetFetchVariable
(
const
Scope
&
scope
,
const
std
::
string
&
var_name
,
size_t
index
)
{
// Since we want to fetch
LodTensor
from a variable, the variable must
// Since we want to fetch
FetchType
from a variable, the variable must
// be created alreadly.
Variable
*
g_fetch_value
=
scope
.
FindVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
g_fetch_value
,
"%s is not found."
,
var_name
);
PADDLE_ENFORCE
(
g_fetch_value
->
IsType
<
FeedFetchList
>
(),
"Only %s can be invoked by GetFetchVariable"
,
typeid
(
FeedFetchList
).
name
());
auto
&
fetch_outputs
=
*
g_fetch_value
->
GetMutable
<
FeedFetchList
>
();
PADDLE_ENFORCE_NOT_NULL
(
g_fetch_value
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
var_name
));
PADDLE_ENFORCE_EQ
(
g_fetch_value
->
IsType
<
FetchList
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Only %s can be invoked by GetFetchVariable"
,
typeid
(
FetchList
).
name
()));
auto
&
fetch_outputs
=
*
g_fetch_value
->
GetMutable
<
FetchList
>
();
auto
&
tensor
=
fetch_outputs
[
index
];
VLOG
(
3
)
<<
"Fetch "
<<
var_name
<<
" with index "
<<
index
<<
" shape= "
<<
tensor
.
dims
();
PADDLE_ENFORCE_LT
(
index
,
fetch_outputs
.
size
());
VLOG
(
3
)
<<
"Fetch "
<<
var_name
<<
" with index "
<<
index
;
PADDLE_ENFORCE_LT
(
index
,
fetch_outputs
.
size
(),
platform
::
errors
::
InvalidArgument
(
"index must less than fetch_outputs size."
));
return
tensor
;
}
LoDTensor
&
GetVariableTensor
(
const
Scope
&
scope
,
const
std
::
string
&
var_name
)
{
Variable
*
var
=
scope
.
FindVar
(
var_name
);
PADDLE_ENFORCE
(
var
,
"%s no in scope"
,
var_name
);
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
"Only support lod tensor now."
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
var_name
));
PADDLE_ENFORCE_EQ
(
var
->
IsType
<
LoDTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Only support lod tensor in GetVariableTensor now."
));
return
*
var
->
GetMutable
<
LoDTensor
>
();
}
...
...
paddle/fluid/framework/feed_fetch_method.h
浏览文件 @
48f41a7f
...
...
@@ -24,7 +24,7 @@ namespace framework {
void
SetFeedVariable
(
Scope
*
scope
,
const
LoDTensor
&
input
,
const
std
::
string
&
var_name
,
size_t
index
);
LoDTensor
&
GetFetchVariable
(
const
Scope
&
scope
,
const
std
::
string
&
var_name
,
FetchType
&
GetFetchVariable
(
const
Scope
&
scope
,
const
std
::
string
&
var_name
,
size_t
index
);
LoDTensor
&
GetVariableTensor
(
const
Scope
&
scope
,
const
std
::
string
&
var_name
);
...
...
paddle/fluid/framework/feed_fetch_type.h
浏览文件 @
48f41a7f
...
...
@@ -15,14 +15,33 @@ limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/platform/variant.h"
namespace
paddle
{
namespace
framework
{
using
FeedFetchType
=
LoDTensor
;
using
FeedFetchList
=
std
::
vector
<
FeedFetchType
>
;
using
FetchUnmergedList
=
std
::
vector
<
std
::
vector
<
FeedFetchType
>>
;
using
FetchResultType
=
boost
::
variant
<
FeedFetchList
,
FetchUnmergedList
>
;
using
FeedType
=
LoDTensor
;
using
FeedList
=
std
::
vector
<
FeedType
>
;
using
FetchType
=
boost
::
variant
<
LoDTensor
,
LoDTensorArray
>
;
using
FetchList
=
std
::
vector
<
FetchType
>
;
using
FetchUnmergedList
=
std
::
vector
<
std
::
vector
<
FetchType
>>
;
using
FetchResultType
=
boost
::
variant
<
FetchList
,
FetchUnmergedList
>
;
inline
bool
data_is_lod_tensor
(
const
FetchType
&
data
)
{
if
(
data
.
type
()
==
typeid
(
LoDTensor
))
{
return
true
;
}
return
false
;
}
inline
bool
data_is_lod_tensor_array
(
const
FetchType
&
data
)
{
if
(
data
.
type
()
==
typeid
(
LoDTensorArray
))
{
return
true
;
}
return
false
;
}
static
const
char
kFeedOpType
[]
=
"feed"
;
static
const
char
kFetchOpType
[]
=
"fetch"
;
...
...
paddle/fluid/framework/lod_tensor_array.h
浏览文件 @
48f41a7f
...
...
@@ -20,7 +20,6 @@ namespace paddle {
namespace
framework
{
using
LoDTensorArray
=
std
::
vector
<
LoDTensor
>
;
using
LoDTensor2DArray
=
std
::
vector
<
std
::
vector
<
LoDTensor
>>
;
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/var_type.h
浏览文件 @
48f41a7f
...
...
@@ -36,6 +36,7 @@ inline proto::VarType::Type ToVarType(int type) {
case
proto
::
VarType
::
SELECTED_ROWS
:
case
proto
::
VarType
::
LOD_RANK_TABLE
:
case
proto
::
VarType
::
LOD_TENSOR_ARRAY
:
case
proto
::
VarType
::
FETCH_LIST
:
case
proto
::
VarType
::
READER
:
return
static_cast
<
proto
::
VarType
::
Type
>
(
type
);
default:
...
...
@@ -61,6 +62,9 @@ inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
case
proto
::
VarType
::
READER
:
visitor
(
var
.
Get
<
ReaderHolder
>
());
return
;
case
proto
::
VarType
::
FETCH_LIST
:
visitor
(
var
.
Get
<
FetchList
>
());
return
;
default:
PADDLE_THROW
(
"Not supported visit type, %s"
,
ToTypeName
(
var
.
Type
()));
}
...
...
paddle/fluid/framework/var_type_traits.h
浏览文件 @
48f41a7f
...
...
@@ -19,6 +19,7 @@
#include <tuple>
#include <typeindex>
#include <vector>
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/platform/place.h"
...
...
@@ -139,7 +140,7 @@ struct VarTypeRegistryImpl {
using
VarTypeRegistry
=
detail
::
VarTypeRegistryImpl
<
Tensor
,
LoDTensor
,
SelectedRows
,
std
::
vector
<
Scope
*>
,
LoDRankTable
,
LoDTensorArray
,
platform
::
PlaceList
,
ReaderHolder
,
std
::
string
,
Scope
*
,
operators
::
reader
::
LoDTensorBlockingQueueHolder
,
operators
::
reader
::
LoDTensorBlockingQueueHolder
,
FetchList
,
operators
::
reader
::
OrderedMultiDeviceLoDTensorBlockingQueueHolder
,
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_NCCL)
...
...
@@ -178,6 +179,7 @@ REG_PROTO_VAR_TYPE_TRAIT(LoDRankTable, proto::VarType::LOD_RANK_TABLE);
REG_PROTO_VAR_TYPE_TRAIT
(
LoDTensorArray
,
proto
::
VarType
::
LOD_TENSOR_ARRAY
);
REG_PROTO_VAR_TYPE_TRAIT
(
platform
::
PlaceList
,
proto
::
VarType
::
PLACE_LIST
);
REG_PROTO_VAR_TYPE_TRAIT
(
ReaderHolder
,
proto
::
VarType
::
READER
);
REG_PROTO_VAR_TYPE_TRAIT
(
FetchList
,
proto
::
VarType
::
FETCH_LIST
);
REG_PROTO_VAR_TYPE_TRAIT
(
int
,
proto
::
VarType
::
INT32
);
REG_PROTO_VAR_TYPE_TRAIT
(
float
,
proto
::
VarType
::
FP32
);
...
...
paddle/fluid/framework/variable_helper.cc
浏览文件 @
48f41a7f
...
...
@@ -34,9 +34,9 @@ void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
}
else
if
(
var_type
==
proto
::
VarType
::
SELECTED_ROWS
)
{
var
->
GetMutable
<
SelectedRows
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
FEED_MINIBATCH
)
{
var
->
GetMutable
<
Feed
Fetch
List
>
();
var
->
GetMutable
<
FeedList
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
FETCH_LIST
)
{
var
->
GetMutable
<
Fe
edFe
tchList
>
();
var
->
GetMutable
<
FetchList
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
STEP_SCOPES
)
{
var
->
GetMutable
<
std
::
vector
<
framework
::
Scope
*>>
();
}
else
if
(
var_type
==
proto
::
VarType
::
LOD_RANK_TABLE
)
{
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
48f41a7f
...
...
@@ -383,8 +383,9 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
for
(
size_t
i
=
0
;
i
<
fetches_
.
size
();
++
i
)
{
int
idx
=
boost
::
get
<
int
>
(
fetches_
[
i
]
->
GetAttr
(
"col"
));
PADDLE_ENFORCE
((
size_t
)
idx
==
i
);
framework
::
LoDTensor
&
fetch
=
framework
::
FetchType
&
fetch_var
=
framework
::
GetFetchVariable
(
*
scope
,
"fetch"
,
idx
);
auto
&
fetch
=
boost
::
get
<
framework
::
LoDTensor
>
(
fetch_var
);
auto
type
=
fetch
.
type
();
auto
output
=
&
(
outputs
->
at
(
i
));
output
->
name
=
fetches_
[
idx
]
->
Input
(
"X"
)[
0
];
...
...
@@ -583,9 +584,9 @@ void AnalysisPredictor::PrepareFeedFetch() {
void
AnalysisPredictor
::
CreateFeedFetchVar
(
framework
::
Scope
*
scope
)
{
PADDLE_ENFORCE_NOT_NULL
(
scope
);
auto
*
var
=
scope
->
Var
(
"feed"
);
var
->
GetMutable
<
framework
::
Feed
Fetch
List
>
();
var
->
GetMutable
<
framework
::
FeedList
>
();
var
=
scope
->
Var
(
"fetch"
);
var
->
GetMutable
<
framework
::
Fe
edFe
tchList
>
();
var
->
GetMutable
<
framework
::
FetchList
>
();
}
std
::
vector
<
std
::
string
>
AnalysisPredictor
::
GetInputNames
()
{
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
48f41a7f
...
...
@@ -286,8 +286,9 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
for
(
size_t
i
=
0
;
i
<
fetchs_
.
size
();
++
i
)
{
int
idx
=
boost
::
get
<
int
>
(
fetchs_
[
i
]
->
GetAttr
(
"col"
));
PADDLE_ENFORCE
((
size_t
)
idx
==
i
);
framework
::
LoDTensor
&
fetch
=
framework
::
FetchType
&
fetch_var
=
framework
::
GetFetchVariable
(
*
scope
,
"fetch"
,
idx
);
auto
fetch
=
boost
::
get
<
framework
::
LoDTensor
>
(
fetch_var
);
auto
type
=
fetch
.
type
();
auto
output
=
&
(
outputs
->
at
(
i
));
output
->
name
=
fetchs_
[
idx
]
->
Input
(
"X"
)[
0
];
...
...
paddle/fluid/inference/api/api_impl_tester.cc
浏览文件 @
48f41a7f
...
...
@@ -102,14 +102,15 @@ void MainWord2Vec(bool use_gpu) {
cpu_feeds
.
push_back
(
&
third_word
);
cpu_feeds
.
push_back
(
&
fourth_word
);
framework
::
LoDTensor
output1
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs1
;
framework
::
FetchType
output1
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs1
;
cpu_fetchs1
.
push_back
(
&
output1
);
TestInference
<
platform
::
CPUPlace
>
(
config
.
model_dir
,
cpu_feeds
,
cpu_fetchs1
);
float
*
lod_data
=
output1
.
data
<
float
>
();
for
(
int
i
=
0
;
i
<
output1
.
numel
();
++
i
)
{
auto
output1_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output1
);
float
*
lod_data
=
output1_tensor
.
data
<
float
>
();
for
(
int
i
=
0
;
i
<
output1_tensor
.
numel
();
++
i
)
{
EXPECT_LT
(
lod_data
[
i
]
-
data
[
i
],
ACC_DIFF
);
EXPECT_GT
(
lod_data
[
i
]
-
data
[
i
],
-
ACC_DIFF
);
}
...
...
@@ -137,8 +138,8 @@ void MainImageClassification(bool use_gpu) {
std
::
vector
<
framework
::
LoDTensor
*>
cpu_feeds
;
cpu_feeds
.
push_back
(
&
input
);
framework
::
LoDTensor
output1
;
std
::
vector
<
framework
::
LoDTensor
*>
cpu_fetchs1
;
framework
::
FetchType
output1
;
std
::
vector
<
framework
::
FetchType
*>
cpu_fetchs1
;
cpu_fetchs1
.
push_back
(
&
output1
);
TestInference
<
platform
::
CPUPlace
,
false
,
true
>
(
...
...
@@ -153,7 +154,8 @@ void MainImageClassification(bool use_gpu) {
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
size_t
len
=
outputs
[
0
].
data
.
length
();
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
());
float
*
lod_data
=
output1
.
data
<
float
>
();
float
*
lod_data
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output1
).
data
<
float
>
();
for
(
size_t
j
=
0
;
j
<
len
/
sizeof
(
float
);
++
j
)
{
EXPECT_NEAR
(
lod_data
[
j
],
data
[
j
],
ACC_DIFF
);
}
...
...
@@ -168,7 +170,7 @@ void MainThreadsWord2Vec(bool use_gpu) {
constexpr
int
num_jobs
=
3
;
std
::
vector
<
std
::
vector
<
framework
::
LoDTensor
>>
jobs
(
num_jobs
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
paddle_tensor_feeds
(
num_jobs
);
std
::
vector
<
framework
::
LoDTensor
>
refs
(
num_jobs
);
std
::
vector
<
framework
::
FetchType
>
refs
(
num_jobs
);
for
(
size_t
i
=
0
;
i
<
jobs
.
size
();
++
i
)
{
// each job has 4 words
jobs
[
i
].
resize
(
4
);
...
...
@@ -181,7 +183,7 @@ void MainThreadsWord2Vec(bool use_gpu) {
// get reference result of each job
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
ref_feeds
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
ref_fetches
(
1
,
&
refs
[
i
]);
std
::
vector
<
paddle
::
framework
::
FetchType
*>
ref_fetches
(
1
,
&
refs
[
i
]);
for
(
auto
&
word
:
jobs
[
i
])
{
ref_feeds
.
push_back
(
&
word
);
}
...
...
@@ -207,9 +209,10 @@ void MainThreadsWord2Vec(bool use_gpu) {
}
// check outputs correctness
float
*
ref_data
=
refs
[
tid
].
data
<
float
>
();
EXPECT_EQ
(
refs
[
tid
].
numel
(),
static_cast
<
int64_t
>
(
len
/
sizeof
(
float
)));
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
auto
ref_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
refs
[
tid
]);
float
*
ref_data
=
ref_tensor
.
data
<
float
>
();
EXPECT_EQ
(
ref_tensor
.
numel
(),
static_cast
<
int64_t
>
(
len
/
sizeof
(
float
)));
for
(
int
i
=
0
;
i
<
ref_tensor
.
numel
();
++
i
)
{
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
2e-3
);
}
});
...
...
@@ -230,7 +233,7 @@ void MainThreadsImageClassification(bool use_gpu) {
auto
main_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
std
::
vector
<
framework
::
LoDTensor
>
jobs
(
num_jobs
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
paddle_tensor_feeds
(
num_jobs
);
std
::
vector
<
framework
::
LoDTensor
>
refs
(
num_jobs
);
std
::
vector
<
framework
::
FetchType
>
refs
(
num_jobs
);
for
(
size_t
i
=
0
;
i
<
jobs
.
size
();
++
i
)
{
// prepare inputs
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
...
...
@@ -242,7 +245,7 @@ void MainThreadsImageClassification(bool use_gpu) {
// get reference result of each job
std
::
vector
<
framework
::
LoDTensor
*>
ref_feeds
(
1
,
&
jobs
[
i
]);
std
::
vector
<
framework
::
LoDTensor
*>
ref_fetches
(
1
,
&
refs
[
i
]);
std
::
vector
<
framework
::
FetchType
*>
ref_fetches
(
1
,
&
refs
[
i
]);
TestInference
<
platform
::
CPUPlace
>
(
config
.
model_dir
,
ref_feeds
,
ref_fetches
);
}
...
...
@@ -259,9 +262,10 @@ void MainThreadsImageClassification(bool use_gpu) {
ASSERT_EQ
(
local_outputs
.
size
(),
1UL
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
();
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
());
float
*
ref_data
=
refs
[
tid
].
data
<
float
>
();
EXPECT_EQ
((
size_t
)
refs
[
tid
].
numel
(),
len
/
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
auto
ref_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
refs
[
tid
]);
float
*
ref_data
=
ref_tensor
.
data
<
float
>
();
EXPECT_EQ
((
size_t
)
ref_tensor
.
numel
(),
len
/
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
ref_tensor
.
numel
();
++
i
)
{
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
ACC_DIFF
);
}
});
...
...
paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc
浏览文件 @
48f41a7f
...
...
@@ -40,10 +40,10 @@ TEST(inference, fit_a_line) {
cpu_feeds
[
i
].
push_back
(
input
);
}
std
::
vector
<
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>>
cpu_fetchs1
;
std
::
vector
<
std
::
vector
<
paddle
::
framework
::
FetchType
*>>
cpu_fetchs1
;
cpu_fetchs1
.
resize
(
num_threads
);
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
auto
*
output
=
new
paddle
::
framework
::
LoDTensor
();
auto
*
output
=
new
paddle
::
framework
::
FetchType
();
cpu_fetchs1
[
i
].
push_back
(
output
);
}
...
...
@@ -58,10 +58,10 @@ TEST(inference, fit_a_line) {
}
#ifdef PADDLE_WITH_CUDA
std
::
vector
<
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>>
cpu_fetchs2
;
std
::
vector
<
std
::
vector
<
paddle
::
framework
::
FetchType
*>>
cpu_fetchs2
;
cpu_fetchs2
.
resize
(
num_threads
);
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
auto
*
output
=
new
paddle
::
framework
::
LoDTensor
();
auto
*
output
=
new
paddle
::
framework
::
FetchType
();
cpu_fetchs2
[
i
].
push_back
(
output
);
}
...
...
@@ -76,7 +76,9 @@ TEST(inference, fit_a_line) {
}
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
CheckError
<
float
>
(
*
cpu_fetchs1
[
i
][
0
],
*
cpu_fetchs2
[
i
][
0
]);
CheckError
<
float
>
(
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
*
cpu_fetchs1
[
i
][
0
]),
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
*
cpu_fetchs2
[
i
][
0
]));
delete
cpu_fetchs2
[
i
][
0
];
}
#endif
...
...
paddle/fluid/inference/tests/book/test_inference_image_classification.cc
浏览文件 @
48f41a7f
...
...
@@ -50,9 +50,9 @@ TEST(inference, image_classification) {
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
cpu_feeds
.
push_back
(
&
input
);
paddle
::
framework
::
LoDTensor
output1
;
paddle
::
framework
::
FetchType
output1
;
if
(
!
FLAGS_skip_cpu
)
{
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs1
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs1
;
cpu_fetchs1
.
push_back
(
&
output1
);
// Run inference on CPU
...
...
@@ -60,12 +60,12 @@ TEST(inference, image_classification) {
LOG
(
INFO
)
<<
"Batch size is "
<<
FLAGS_batch_size
;
TestInference
<
paddle
::
platform
::
CPUPlace
,
false
,
true
>
(
dirname
,
cpu_feeds
,
cpu_fetchs1
,
FLAGS_repeat
,
is_combined
);
LOG
(
INFO
)
<<
output1
.
dims
();
LOG
(
INFO
)
<<
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output1
)
.
dims
();
}
#ifdef PADDLE_WITH_CUDA
paddle
::
framework
::
LoDTensor
output2
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs2
;
paddle
::
framework
::
FetchType
output2
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs2
;
cpu_fetchs2
.
push_back
(
&
output2
);
// Run inference on CUDA GPU
...
...
@@ -73,17 +73,18 @@ TEST(inference, image_classification) {
LOG
(
INFO
)
<<
"Batch size is "
<<
FLAGS_batch_size
;
TestInference
<
paddle
::
platform
::
CUDAPlace
,
false
,
true
>
(
dirname
,
cpu_feeds
,
cpu_fetchs2
,
FLAGS_repeat
,
is_combined
);
LOG
(
INFO
)
<<
output2
.
dims
();
LOG
(
INFO
)
<<
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output2
)
.
dims
();
if
(
!
FLAGS_skip_cpu
)
{
CheckError
<
float
>
(
output1
,
output2
);
CheckError
<
float
>
(
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output1
),
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output2
));
}
// float16 inference requires cuda GPUs with >= 5.3 compute capability
if
(
!
FLAGS_fp16_dirname
.
empty
()
&&
paddle
::
platform
::
GetCUDAComputeCapability
(
0
)
>=
53
)
{
paddle
::
framework
::
LoDTensor
output3
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs3
;
paddle
::
framework
::
FetchType
output3
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs3
;
cpu_fetchs3
.
push_back
(
&
output3
);
LOG
(
INFO
)
<<
"--- GPU Runs in float16 mode: ---"
;
...
...
@@ -92,7 +93,8 @@ TEST(inference, image_classification) {
TestInference
<
paddle
::
platform
::
CUDAPlace
,
false
,
true
>
(
FLAGS_fp16_dirname
,
cpu_feeds
,
cpu_fetchs3
,
FLAGS_repeat
);
CheckError
<
float
>
(
output2
,
output3
);
CheckError
<
float
>
(
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output2
),
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output3
));
}
#endif
}
paddle/fluid/inference/tests/book/test_inference_label_semantic_roles.cc
浏览文件 @
48f41a7f
...
...
@@ -63,25 +63,27 @@ TEST(inference, label_semantic_roles) {
cpu_feeds
.
push_back
(
&
ctx_p2
);
cpu_feeds
.
push_back
(
&
mark
);
paddle
::
framework
::
LoDTensor
output1
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs1
;
paddle
::
framework
::
FetchType
output1
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs1
;
cpu_fetchs1
.
push_back
(
&
output1
);
// Run inference on CPU
TestInference
<
paddle
::
platform
::
CPUPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs1
);
LOG
(
INFO
)
<<
output1
.
lod
();
LOG
(
INFO
)
<<
output1
.
dims
();
auto
output1_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output1
);
LOG
(
INFO
)
<<
output1_tensor
.
lod
();
LOG
(
INFO
)
<<
output1_tensor
.
dims
();
#ifdef PADDLE_WITH_CUDA
paddle
::
framework
::
LoDTensor
output2
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs2
;
paddle
::
framework
::
FetchType
output2
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs2
;
cpu_fetchs2
.
push_back
(
&
output2
);
// Run inference on CUDA GPU
TestInference
<
paddle
::
platform
::
CUDAPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs2
);
LOG
(
INFO
)
<<
output2
.
lod
();
LOG
(
INFO
)
<<
output2
.
dims
();
auto
output2_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output2
);
LOG
(
INFO
)
<<
output2_tensor
.
lod
();
LOG
(
INFO
)
<<
output2_tensor
.
dims
();
CheckError
<
float
>
(
output1
,
output2
);
CheckError
<
float
>
(
output1
_tensor
,
output2_tensor
);
#endif
}
paddle/fluid/inference/tests/book/test_inference_nlp.cc
浏览文件 @
48f41a7f
...
...
@@ -118,8 +118,8 @@ void ThreadRunInfer(
inference_program
->
GetFetchTargetNames
();
PADDLE_ENFORCE_EQ
(
fetch_target_names
.
size
(),
1UL
);
std
::
map
<
std
::
string
,
paddle
::
framework
::
LoDTensor
*>
fetch_targets
;
paddle
::
framework
::
LoDTensor
outtensor
;
std
::
map
<
std
::
string
,
paddle
::
framework
::
FetchType
*>
fetch_targets
;
paddle
::
framework
::
FetchType
outtensor
;
fetch_targets
[
fetch_target_names
[
0
]]
=
&
outtensor
;
std
::
map
<
std
::
string
,
const
paddle
::
framework
::
LoDTensor
*>
feed_targets
;
...
...
@@ -150,7 +150,8 @@ void ThreadRunInfer(
std
::
string
fetch_target_name
=
op
->
Input
(
"X"
)[
0
];
int
idx
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
"col"
));
*
fetch_targets
[
fetch_target_name
]
=
paddle
::
framework
::
GetFetchVariable
(
*
scope
,
"fetch"
,
idx
);
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
paddle
::
framework
::
GetFetchVariable
(
*
scope
,
"fetch"
,
idx
));
}
}
...
...
@@ -215,8 +216,8 @@ TEST(inference, nlp) {
const
std
::
vector
<
std
::
string
>&
fetch_target_names
=
inference_program
->
GetFetchTargetNames
();
PADDLE_ENFORCE_EQ
(
fetch_target_names
.
size
(),
1UL
);
std
::
map
<
std
::
string
,
paddle
::
framework
::
LoDTensor
*>
fetch_targets
;
paddle
::
framework
::
LoDTensor
outtensor
;
std
::
map
<
std
::
string
,
paddle
::
framework
::
FetchType
*>
fetch_targets
;
paddle
::
framework
::
FetchType
outtensor
;
fetch_targets
[
fetch_target_names
[
0
]]
=
&
outtensor
;
// prepare feed
...
...
paddle/fluid/inference/tests/book/test_inference_recognize_digits.cc
浏览文件 @
48f41a7f
...
...
@@ -41,28 +41,30 @@ TEST(inference, recognize_digits) {
cpu_feeds
.
push_back
(
&
input
);
for
(
auto
is_combined
:
{
false
,
true
})
{
paddle
::
framework
::
LoDTensor
output1
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs1
;
paddle
::
framework
::
FetchType
output1
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs1
;
cpu_fetchs1
.
push_back
(
&
output1
);
// Run inference on CPU
LOG
(
INFO
)
<<
"--- CPU Runs: is_combined="
<<
is_combined
<<
" ---"
;
TestInference
<
paddle
::
platform
::
CPUPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs1
,
FLAGS_repeat
,
is_combined
);
LOG
(
INFO
)
<<
output1
.
dims
();
auto
output1_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output1
);
LOG
(
INFO
)
<<
output1_tensor
.
dims
();
#ifdef PADDLE_WITH_CUDA
paddle
::
framework
::
LoDTensor
output2
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs2
;
paddle
::
framework
::
FetchType
output2
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs2
;
cpu_fetchs2
.
push_back
(
&
output2
);
// Run inference on CUDA GPU
LOG
(
INFO
)
<<
"--- GPU Runs: is_combined="
<<
is_combined
<<
" ---"
;
TestInference
<
paddle
::
platform
::
CUDAPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs2
,
FLAGS_repeat
,
is_combined
);
LOG
(
INFO
)
<<
output2
.
dims
();
auto
output2_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output2
);
LOG
(
INFO
)
<<
output2_tensor
.
dims
();
CheckError
<
float
>
(
output1
,
output2
);
CheckError
<
float
>
(
output1
_tensor
,
output2_tensor
);
#endif
}
}
paddle/fluid/inference/tests/book/test_inference_recommender_system.cc
浏览文件 @
48f41a7f
...
...
@@ -65,23 +65,25 @@ TEST(inference, recommender_system) {
cpu_feeds
.
push_back
(
&
category_id
);
cpu_feeds
.
push_back
(
&
movie_title
);
paddle
::
framework
::
LoDTensor
output1
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs1
;
paddle
::
framework
::
FetchType
output1
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs1
;
cpu_fetchs1
.
push_back
(
&
output1
);
// Run inference on CPU
TestInference
<
paddle
::
platform
::
CPUPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs1
);
LOG
(
INFO
)
<<
output1
.
dims
();
auto
output1_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output1
);
LOG
(
INFO
)
<<
output1_tensor
.
dims
();
#ifdef PADDLE_WITH_CUDA
paddle
::
framework
::
LoDTensor
output2
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs2
;
paddle
::
framework
::
FetchType
output2
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs2
;
cpu_fetchs2
.
push_back
(
&
output2
);
// Run inference on CUDA GPU
TestInference
<
paddle
::
platform
::
CUDAPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs2
);
LOG
(
INFO
)
<<
output2
.
dims
();
auto
output2_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output2
);
LOG
(
INFO
)
<<
output2_tensor
.
dims
();
CheckError
<
float
>
(
output1
,
output2
);
CheckError
<
float
>
(
output1
_tensor
,
output2_tensor
);
#endif
}
paddle/fluid/inference/tests/book/test_inference_rnn_encoder_decoder.cc
浏览文件 @
48f41a7f
...
...
@@ -41,25 +41,27 @@ TEST(inference, rnn_encoder_decoder) {
cpu_feeds
.
push_back
(
&
word_data
);
cpu_feeds
.
push_back
(
&
trg_word
);
paddle
::
framework
::
LoDTensor
output1
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs1
;
paddle
::
framework
::
FetchType
output1
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs1
;
cpu_fetchs1
.
push_back
(
&
output1
);
// Run inference on CPU
TestInference
<
paddle
::
platform
::
CPUPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs1
);
LOG
(
INFO
)
<<
output1
.
lod
();
LOG
(
INFO
)
<<
output1
.
dims
();
auto
output1_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output1
);
LOG
(
INFO
)
<<
output1_tensor
.
lod
();
LOG
(
INFO
)
<<
output1_tensor
.
dims
();
#ifdef PADDLE_WITH_CUDA
paddle
::
framework
::
LoDTensor
output2
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs2
;
paddle
::
framework
::
FetchType
output2
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs2
;
cpu_fetchs2
.
push_back
(
&
output2
);
// Run inference on CUDA GPU
TestInference
<
paddle
::
platform
::
CUDAPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs2
);
LOG
(
INFO
)
<<
output2
.
lod
();
LOG
(
INFO
)
<<
output2
.
dims
();
auto
output2_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output2
);
LOG
(
INFO
)
<<
output2_tensor
.
lod
();
LOG
(
INFO
)
<<
output2_tensor
.
dims
();
CheckError
<
float
>
(
output1
,
output2
);
CheckError
<
float
>
(
output1
_tensor
,
output2_tensor
);
#endif
}
paddle/fluid/inference/tests/book/test_inference_understand_sentiment.cc
浏览文件 @
48f41a7f
...
...
@@ -39,25 +39,27 @@ TEST(inference, understand_sentiment) {
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_feeds
;
cpu_feeds
.
push_back
(
&
words
);
paddle
::
framework
::
LoDTensor
output1
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs1
;
paddle
::
framework
::
FetchType
output1
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs1
;
cpu_fetchs1
.
push_back
(
&
output1
);
// Run inference on CPU
TestInference
<
paddle
::
platform
::
CPUPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs1
);
LOG
(
INFO
)
<<
output1
.
lod
();
LOG
(
INFO
)
<<
output1
.
dims
();
auto
output1_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output1
);
LOG
(
INFO
)
<<
output1_tensor
.
lod
();
LOG
(
INFO
)
<<
output1_tensor
.
dims
();
#ifdef PADDLE_WITH_CUDA
paddle
::
framework
::
LoDTensor
output2
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs2
;
paddle
::
framework
::
FetchType
output2
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs2
;
cpu_fetchs2
.
push_back
(
&
output2
);
// Run inference on CUDA GPU
TestInference
<
paddle
::
platform
::
CUDAPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs2
);
LOG
(
INFO
)
<<
output2
.
lod
();
LOG
(
INFO
)
<<
output2
.
dims
();
auto
output2_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output2
);
LOG
(
INFO
)
<<
output2_tensor
.
lod
();
LOG
(
INFO
)
<<
output2_tensor
.
dims
();
CheckError
<
float
>
(
output1
,
output2
);
CheckError
<
float
>
(
output1
_tensor
,
output2_tensor
);
#endif
}
paddle/fluid/inference/tests/book/test_inference_word2vec.cc
浏览文件 @
48f41a7f
...
...
@@ -44,25 +44,27 @@ TEST(inference, word2vec) {
cpu_feeds
.
push_back
(
&
third_word
);
cpu_feeds
.
push_back
(
&
fourth_word
);
paddle
::
framework
::
LoDTensor
output1
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs1
;
paddle
::
framework
::
FetchType
output1
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs1
;
cpu_fetchs1
.
push_back
(
&
output1
);
// Run inference on CPU
TestInference
<
paddle
::
platform
::
CPUPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs1
);
LOG
(
INFO
)
<<
output1
.
lod
();
LOG
(
INFO
)
<<
output1
.
dims
();
auto
output1_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output1
);
LOG
(
INFO
)
<<
output1_tensor
.
lod
();
LOG
(
INFO
)
<<
output1_tensor
.
dims
();
#ifdef PADDLE_WITH_CUDA
paddle
::
framework
::
LoDTensor
output2
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
cpu_fetchs2
;
paddle
::
framework
::
FetchType
output2
;
std
::
vector
<
paddle
::
framework
::
FetchType
*>
cpu_fetchs2
;
cpu_fetchs2
.
push_back
(
&
output2
);
// Run inference on CUDA GPU
TestInference
<
paddle
::
platform
::
CUDAPlace
>
(
dirname
,
cpu_feeds
,
cpu_fetchs2
);
LOG
(
INFO
)
<<
output2
.
lod
();
LOG
(
INFO
)
<<
output2
.
dims
();
auto
output2_tensor
=
boost
::
get
<
paddle
::
framework
::
LoDTensor
>
(
output2
);
LOG
(
INFO
)
<<
output2_tensor
.
lod
();
LOG
(
INFO
)
<<
output2_tensor
.
dims
();
CheckError
<
float
>
(
output1
,
output2
);
CheckError
<
float
>
(
output1
_tensor
,
output2_tensor
);
#endif
}
paddle/fluid/inference/tests/test_helper.h
浏览文件 @
48f41a7f
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <map>
#include <memory>
#include <random>
#include <string>
#include <vector>
...
...
@@ -142,7 +143,7 @@ std::vector<std::vector<int64_t>> GetFeedTargetShapes(
template
<
typename
Place
,
bool
CreateVars
=
true
,
bool
PrepareContext
=
false
>
void
TestInference
(
const
std
::
string
&
dirname
,
const
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>&
cpu_feeds
,
const
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>&
cpu_fetchs
,
const
std
::
vector
<
paddle
::
framework
::
FetchType
*>&
cpu_fetchs
,
const
int
repeat
=
1
,
const
bool
is_combined
=
false
)
{
// 1. Define place, executor, scope
auto
place
=
Place
();
...
...
@@ -194,7 +195,7 @@ void TestInference(const std::string& dirname,
}
// 5. Define Tensor to get the outputs: set up maps for fetch targets
std
::
map
<
std
::
string
,
paddle
::
framework
::
LoDTensor
*>
fetch_targets
;
std
::
map
<
std
::
string
,
paddle
::
framework
::
FetchType
*>
fetch_targets
;
for
(
size_t
i
=
0
;
i
<
fetch_target_names
.
size
();
++
i
)
{
fetch_targets
[
fetch_target_names
[
i
]]
=
cpu_fetchs
[
i
];
}
...
...
paddle/fluid/operators/controlflow/feed_op.cc
浏览文件 @
48f41a7f
...
...
@@ -58,7 +58,7 @@ class FeedOp : public framework::OperatorBase {
VLOG
(
3
)
<<
"Feed variable "
<<
feed_var_name
<<
"'s "
<<
col
<<
" column to variable "
<<
out_name
;
auto
&
feed_list
=
feed_var
->
Get
<
framework
::
Feed
Fetch
List
>
();
auto
&
feed_list
=
feed_var
->
Get
<
framework
::
FeedList
>
();
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
col
),
feed_list
.
size
(),
platform
::
errors
::
InvalidArgument
(
...
...
@@ -68,7 +68,7 @@ class FeedOp : public framework::OperatorBase {
col
,
feed_list
.
size
()));
auto
&
feed_item
=
feed_list
.
at
(
static_cast
<
size_t
>
(
col
));
auto
*
out_item
=
out_var
->
GetMutable
<
framework
::
Feed
Fetch
Type
>
();
auto
*
out_item
=
out_var
->
GetMutable
<
framework
::
FeedType
>
();
if
(
platform
::
is_same_place
(
feed_item
.
place
(),
place
))
{
out_item
->
ShareDataWith
(
feed_item
);
...
...
paddle/fluid/operators/controlflow/fetch_op.cc
浏览文件 @
48f41a7f
...
...
@@ -21,6 +21,39 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
// FIXME(yuyang18): Should we assume the fetch operator always generate
// CPU outputs?
static
void
DataCopy
(
const
framework
::
LoDTensor
&
src_item
,
const
std
::
string
&
fetch_var_name
,
framework
::
LoDTensor
*
dst_item
)
{
if
(
src_item
.
IsInitialized
()
&&
src_item
.
numel
()
>
0
)
{
#ifdef PADDLE_WITH_MKLDNN
// Conversion from MKL-DNN to Paddle
if
(
src_item
.
layout
()
==
framework
::
DataLayout
::
kMKLDNN
)
{
framework
::
Tensor
out
;
// Convert to desired Paddle layout, apart from grads of filter
// as params are not a subject to paddle's data_format
framework
::
innerTransDataLayoutFromMKLDNN
(
src_item
.
layout
(),
fetch_var_name
==
framework
::
GradVarName
(
"Filter"
)
?
framework
::
DataLayout
::
kNCHW
:
paddle
::
platform
::
get_cur_paddle_data_layout
(),
src_item
,
&
out
,
platform
::
CPUPlace
());
TensorCopySync
(
out
,
platform
::
CPUPlace
(),
dst_item
);
}
else
{
TensorCopySync
(
src_item
,
platform
::
CPUPlace
(),
dst_item
);
}
#else
TensorCopySync
(
src_item
,
platform
::
CPUPlace
(),
dst_item
);
#endif
}
else
{
// Not copy, if the src tensor is empty.
dst_item
->
clear
();
dst_item
->
Resize
({
0
});
}
dst_item
->
set_lod
(
src_item
.
lod
());
}
class
FetchOp
:
public
framework
::
OperatorBase
{
public:
FetchOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
...
...
@@ -66,42 +99,26 @@ class FetchOp : public framework::OperatorBase {
VLOG
(
3
)
<<
"Fetch variable "
<<
fetch_var_name
<<
" to variable "
<<
out_name
<<
"'s "
<<
col
<<
" column."
;
auto
*
fetch_list
=
out_var
->
GetMutable
<
framework
::
FeedFetchList
>
();
auto
&
src_item
=
fetch_var
->
Get
<
framework
::
FeedFetchType
>
();
auto
*
fetch_list
=
out_var
->
GetMutable
<
framework
::
FetchList
>
();
if
(
static_cast
<
size_t
>
(
col
)
>=
fetch_list
->
size
())
{
fetch_list
->
resize
(
col
+
1
);
}
auto
&
dst_item
=
fetch_list
->
at
(
col
);
// FIXME(yuyang18): Should we assume the fetch operator always generate
// CPU outputs?
if
(
src_item
.
IsInitialized
()
&&
src_item
.
numel
()
>
0
)
{
#ifdef PADDLE_WITH_MKLDNN
// Conversion from MKL-DNN to Paddle
if
(
src_item
.
layout
()
==
framework
::
DataLayout
::
kMKLDNN
)
{
framework
::
Tensor
out
;
// Convert to desired Paddle layout, apart from grads of filter
// as params are not a subject to paddle's data_format
framework
::
innerTransDataLayoutFromMKLDNN
(
src_item
.
layout
(),
fetch_var_name
==
framework
::
GradVarName
(
"Filter"
)
?
framework
::
DataLayout
::
kNCHW
:
paddle
::
platform
::
get_cur_paddle_data_layout
(),
src_item
,
&
out
,
platform
::
CPUPlace
());
TensorCopySync
(
out
,
platform
::
CPUPlace
(),
&
dst_item
);
}
else
{
TensorCopySync
(
src_item
,
platform
::
CPUPlace
(),
&
dst_item
);
}
#else
TensorCopySync
(
src_item
,
platform
::
CPUPlace
(),
&
dst_item
);
#endif
if
(
fetch_var
->
IsType
<
framework
::
LoDTensor
>
())
{
auto
&
src_item
=
fetch_var
->
Get
<
framework
::
LoDTensor
>
();
auto
*
dst_item
=
&
(
boost
::
get
<
framework
::
LoDTensor
>
(
fetch_list
->
at
(
col
)));
DataCopy
(
src_item
,
fetch_var_name
,
dst_item
);
}
else
{
// Not copy, if the src tensor is empty.
dst_item
.
clear
();
dst_item
.
Resize
({
0
});
auto
&
src_item
=
fetch_var
->
Get
<
framework
::
LoDTensorArray
>
();
framework
::
LoDTensorArray
tmp
(
src_item
.
size
());
fetch_list
->
at
(
col
)
=
tmp
;
auto
&
dst_item
=
boost
::
get
<
framework
::
LoDTensorArray
>
(
fetch_list
->
at
(
col
));
for
(
size_t
i
=
0
;
i
<
src_item
.
size
();
++
i
)
{
DataCopy
(
src_item
[
i
],
fetch_var_name
,
&
dst_item
[
i
]);
}
}
dst_item
.
set_lod
(
src_item
.
lod
());
}
};
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
48f41a7f
...
...
@@ -97,7 +97,9 @@ DECLARE_bool(use_mkldnn);
// disable auto conversion to list in Python
PYBIND11_MAKE_OPAQUE
(
paddle
::
framework
::
LoDTensorArray
);
PYBIND11_MAKE_OPAQUE
(
paddle
::
framework
::
LoDTensor2DArray
);
PYBIND11_MAKE_OPAQUE
(
paddle
::
framework
::
FetchUnmergedList
);
PYBIND11_MAKE_OPAQUE
(
paddle
::
framework
::
FetchList
);
PYBIND11_MAKE_OPAQUE
(
paddle
::
framework
::
FetchType
);
namespace
paddle
{
namespace
pybind
{
...
...
@@ -966,6 +968,9 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"get_lod_tensor_array"
,
[](
Variable
&
self
)
{
return
self
.
GetMutable
<
LoDTensorArray
>
();
},
py
::
return_value_policy
::
reference
)
.
def
(
"get_fetch_list"
,
[](
Variable
&
self
)
{
return
self
.
GetMutable
<
FetchList
>
();
},
py
::
return_value_policy
::
reference
)
#if (defined(PADDLE_WITH_NCCL))
.
def
(
"get_communicator"
,
[](
Variable
&
self
)
->
platform
::
Communicator
*
{
...
...
@@ -1443,7 +1448,7 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"run_prepared_ctx"
,
[](
Executor
&
self
,
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
std
::
map
<
std
::
string
,
const
LoDTensor
*>
*
feed_targets
,
std
::
map
<
std
::
string
,
LoDTensor
*>
*
fetch_targets
,
std
::
map
<
std
::
string
,
FetchType
*>
*
fetch_targets
,
bool
create_local_scope
=
true
,
bool
create_vars
=
true
,
const
std
::
string
&
feed_holder_name
=
"feed"
,
const
std
::
string
&
fetch_holder_name
=
"fetch"
)
{
...
...
@@ -1503,7 +1508,16 @@ All parameter, weight, gradient are variables in Paddle.
#endif
m
.
def
(
"set_feed_variable"
,
framework
::
SetFeedVariable
);
m
.
def
(
"get_fetch_variable"
,
framework
::
GetFetchVariable
);
m
.
def
(
"get_fetch_variable"
,
[](
const
Scope
&
scope
,
const
std
::
string
&
var_name
,
size_t
index
)
->
py
::
object
{
auto
&
var
=
framework
::
GetFetchVariable
(
scope
,
var_name
,
index
);
if
(
data_is_lod_tensor
(
var
))
{
return
py
::
cast
(
boost
::
get
<
LoDTensor
>
(
var
));
}
else
{
return
py
::
cast
(
boost
::
get
<
LoDTensorArray
>
(
var
));
}
});
m
.
def
(
"get_variable_tensor"
,
framework
::
GetVariableTensor
);
m
.
def
(
"_is_program_version_supported"
,
IsProgramVersionSupported
);
...
...
@@ -1583,16 +1597,70 @@ All parameter, weight, gradient are variables in Paddle.
},
py
::
return_value_policy
::
take_ownership
);
py
::
class_
<
LoDTensor2DArray
>
(
m
,
"LoDTensor2DArray"
,
R"DOC(
LoDTensor2DArray is 2-D array of LoDTensor.
py
::
class_
<
FetchList
>
(
m
,
"FetchList"
,
R"DOC( FetchList is a
vector of boost::variant<LoDTensor, LoDTensorArray>.
)DOC"
)
.
def
(
"_move_to_list"
,
[](
FetchList
&
self
)
->
py
::
list
{
py
::
list
res
(
self
.
size
());
for
(
size_t
i
=
0
;
i
<
self
.
size
();
++
i
)
{
if
(
data_is_lod_tensor
(
self
[
i
]))
{
auto
&
data
=
boost
::
get
<
LoDTensor
>
(
self
[
i
]);
res
[
i
]
=
py
::
cast
(
std
::
move
(
data
));
}
else
{
auto
&
data
=
boost
::
get
<
LoDTensorArray
>
(
self
[
i
]);
py
::
list
tmp
(
data
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
size
();
++
j
)
{
tmp
[
j
]
=
py
::
cast
(
std
::
move
(
data
[
j
]));
}
res
[
i
]
=
std
::
move
(
tmp
);
}
}
self
.
clear
();
return
res
;
},
py
::
return_value_policy
::
take_ownership
)
.
def
(
"append"
,
[](
FetchList
&
self
,
const
LoDTensor
&
t
)
{
self
.
emplace_back
();
auto
&
lod_tensor
=
boost
::
get
<
LoDTensor
>
(
self
.
back
());
lod_tensor
.
ShareDataWith
(
t
);
lod_tensor
.
set_lod
(
t
.
lod
());
},
py
::
arg
(
"var"
))
.
def
(
"append"
,
[](
FetchList
&
self
,
const
LoDTensorArray
&
t
)
{
self
.
emplace_back
();
auto
&
lod_tensor_array
=
boost
::
get
<
LoDTensorArray
>
(
self
.
back
());
for
(
size_t
i
=
0
;
i
<
t
.
size
();
++
i
)
{
lod_tensor_array
[
i
].
ShareDataWith
(
t
[
i
]);
lod_tensor_array
[
i
].
set_lod
(
t
[
i
].
lod
());
}
},
py
::
arg
(
"var"
));
py
::
class_
<
FetchUnmergedList
>
(
m
,
"FetchUnmergedList"
,
R"DOC(
FetchUnmergedList is 2-D array of FetchType(boost::variant(LoDTensor, LoDTensorArray)).
)DOC"
)
.
def
(
"_move_to_list"
,
[](
LoDTensor2DArray
&
self
)
->
py
::
list
{
[](
FetchUnmergedList
&
self
)
->
py
::
list
{
py
::
list
res
(
self
.
size
());
for
(
size_t
i
=
0
;
i
<
self
.
size
();
++
i
)
{
py
::
list
tmp
(
self
[
i
].
size
());
for
(
size_t
j
=
0
;
j
<
self
[
i
].
size
();
++
j
)
{
tmp
[
j
]
=
py
::
cast
(
std
::
move
(
self
[
i
][
j
]));
if
(
data_is_lod_tensor
(
self
[
i
][
j
]))
{
auto
&
var
=
boost
::
get
<
LoDTensor
>
(
self
[
i
][
j
]);
tmp
[
j
]
=
py
::
cast
(
std
::
move
(
var
));
}
else
{
auto
&
var
=
boost
::
get
<
LoDTensorArray
>
(
self
[
i
][
j
]);
py
::
list
tmp_array
(
var
.
size
());
for
(
size_t
k
=
0
;
k
<
var
.
size
();
++
k
)
{
tmp_array
[
k
]
=
std
::
move
(
var
[
k
]);
}
tmp
[
j
]
=
std
::
move
(
tmp_array
);
}
}
res
[
i
]
=
std
::
move
(
tmp
);
self
[
i
].
clear
();
...
...
@@ -2326,8 +2394,8 @@ All parameter, weight, gradient are variables in Paddle.
ret
=
self
.
Run
(
fetch_tensors
,
return_merged
);
}
if
(
return_merged
)
{
return
py
::
cast
(
std
::
move
(
boost
::
get
<
paddle
::
framework
::
Feed
FetchList
>
(
ret
)));
return
py
::
cast
(
std
::
move
(
boost
::
get
<
paddle
::
framework
::
FetchList
>
(
ret
)));
}
else
{
return
py
::
cast
(
std
::
move
(
boost
::
get
<
paddle
::
framework
::
FetchUnmergedList
>
(
ret
)));
...
...
python/paddle/fluid/executor.py
浏览文件 @
48f41a7f
...
...
@@ -931,14 +931,14 @@ class Executor(object):
return_merged(bool): This parameter indicates whether fetched variables (the variables
specified in the fetch list) should be merged according to the execution device dimension.
If :code:`return_merged` is False, the type of the return value is a two-dimensional list
of :code:`Tensor`
( :code:`return_numpy` is False) or a two-dimensional list of
:code:`numpy.ndarray` ( :code:`return_numpy` is True). If :code:`return_merged` is True,
the type of the return value is an one-dimensional list of :code:`Tensor`
( :code:`return_nump
y`
is False) or an one-dimensional list of :code:`numpy.ndarray` ( :code:`return_numpy` is True).
Please see Examples 2 for more details. If the lengths of fetched results are variant, please
set :code:`return_merged` as False, which denotes that the fetched results will not be merged.
The default is True, but it is just for the compatibility, and may use False as default value
in the future version.
of :code:`Tensor`
/ :code:`LoDTensorArray` ( :code:`return_numpy` is False) or a two-dimensional
list of
:code:`numpy.ndarray` ( :code:`return_numpy` is True). If :code:`return_merged` is True,
the type of the return value is an one-dimensional list of :code:`Tensor`
/ :code:`LoDTensorArra
y`
( :code:`return_numpy` is False) or an one-dimensional list of :code:`numpy.ndarray`
( :code:`return_numpy` is True). Please see Examples 2 for more details. If the lengths of fetched
results are variant, please set :code:`return_merged` as False, which denotes that the fetched
results will not be merged. The default is True, but it is just for the compatibility, and may
use False as default value
in the future version.
use_prune(bool): This parameter indicates whether the input :code:`Program` will be pruned.
If the parameter is True, the program will be pruned accroding to the given feed and fetch_list,
which means the operators and variables in program that generate :code:`feed` and are not
...
...
@@ -980,13 +980,17 @@ class Executor(object):
loss = fluid.layers.mean(hidden)
adam = fluid.optimizer.Adam()
adam.minimize(loss)
i = fluid.layers.zeros(shape=[1], dtype='int64')
array = fluid.layers.array_write(x=loss, i=i)
# Run the startup program once and only once.
exe.run(fluid.default_startup_program())
x = numpy.random.random(size=(10, 1)).astype('float32')
outs = exe.run(feed={'X': x},
fetch_list=[loss.name])
loss_val, array_val = exe.run(feed={'X': x},
fetch_list=[loss.name, array.name])
print(array_val)
# [array([0.02153828], dtype=float32)]
Examples 2:
.. code-block:: python
...
...
@@ -1226,7 +1230,7 @@ class Executor(object):
else
:
self
.
_default_executor
.
run_prepared_ctx
(
ctx
,
scope
,
False
,
False
,
False
)
arr
=
scope
.
find_var
(
fetch_var_name
).
get_
lod_tensor_array
()
arr
=
scope
.
find_var
(
fetch_var_name
).
get_
fetch_list
()
tensors
=
arr
.
_move_to_list
()
if
return_numpy
:
return
as_numpy
(
tensors
)
...
...
python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
浏览文件 @
48f41a7f
...
...
@@ -58,8 +58,11 @@ def convolutional_neural_network(use_py_reader):
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
acc
=
fluid
.
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
return
img
,
label
,
prediction
,
avg_loss
,
acc
,
py_reader
i
=
fluid
.
layers
.
zeros
(
shape
=
[
1
],
dtype
=
'int64'
)
array
=
fluid
.
layers
.
array_write
(
x
=
prediction
,
i
=
i
)
fluid
.
layers
.
increment
(
i
)
fluid
.
layers
.
array_write
(
x
=
acc
,
i
=
i
,
array
=
array
)
return
array
,
img
,
label
,
prediction
,
avg_loss
,
acc
,
py_reader
def
test
():
...
...
@@ -69,7 +72,7 @@ def test():
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
BATCH_SIZE
)
img
,
label
,
prediction
,
avg_loss
,
acc
,
py_reader
=
convolutional_neural_network
(
array
,
img
,
label
,
prediction
,
avg_loss
,
acc
,
py_reader
=
convolutional_neural_network
(
use_py_reader
=
False
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
img
,
label
],
place
=
place
)
...
...
@@ -102,7 +105,7 @@ def train(use_cuda, thread_num, cpu_num):
print
(
"paddle is not compiled with cuda, exit!"
)
return
img
,
label
,
prediction
,
avg_loss
,
acc
,
py_reader
=
convolutional_neural_network
(
array
,
img
,
label
,
prediction
,
avg_loss
,
acc
,
py_reader
=
convolutional_neural_network
(
use_py_reader
=
True
)
print
(
"build convolutional neural network done."
)
...
...
@@ -150,7 +153,12 @@ def train(use_cuda, thread_num, cpu_num):
py_reader
.
start
()
try
:
while
True
:
loss_val
=
pe
.
run
(
fetch_list
=
[
avg_loss
.
name
])
array_v
,
acc_v
,
prediction_v
,
loss_val
=
pe
.
run
(
fetch_list
=
[
array
,
acc
,
prediction
,
avg_loss
.
name
])
assert
numpy
.
allclose
(
array_v
[
0
],
prediction_v
)
==
True
assert
numpy
.
allclose
(
array_v
[
1
],
acc_v
)
==
True
loss_val
=
numpy
.
mean
(
loss_val
)
if
step
%
10
==
0
:
print
(
"Pass %d, Batch %d, Cost %f, queue size %d"
%
...
...
python/paddle/fluid/tests/unittests/test_executor_and_mul.py
浏览文件 @
48f41a7f
...
...
@@ -19,25 +19,40 @@ import unittest
import
numpy
import
paddle.fluid.core
as
core
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.layers
import
mul
,
data
from
paddle.fluid.layers
import
mul
,
data
,
zeros
,
array_write
,
increment
class
TestExecutor
(
unittest
.
TestCase
):
def
test_mul
(
self
):
i
=
zeros
(
shape
=
[
1
],
dtype
=
'int64'
)
a
=
data
(
name
=
'a'
,
shape
=
[
784
],
dtype
=
'float32'
)
array
=
array_write
(
x
=
a
,
i
=
i
)
i
=
increment
(
i
)
b
=
data
(
name
=
'b'
,
shape
=
[
784
,
100
],
dtype
=
'float32'
,
append_batch_size
=
False
)
array_write
(
x
=
b
,
i
=
i
,
array
=
array
)
i
=
increment
(
i
)
out
=
mul
(
x
=
a
,
y
=
b
)
array_write
(
x
=
out
,
i
=
i
,
array
=
array
)
a_np
=
numpy
.
random
.
random
((
100
,
784
)).
astype
(
'float32'
)
b_np
=
numpy
.
random
.
random
((
784
,
100
)).
astype
(
'float32'
)
exe
=
Executor
()
outs
=
exe
.
run
(
feed
=
{
'a'
:
a_np
,
'b'
:
b_np
},
fetch_list
=
[
out
])
out
=
outs
[
0
]
self
.
assertEqual
((
100
,
100
),
out
.
shape
)
self
.
assertTrue
(
numpy
.
allclose
(
out
,
numpy
.
dot
(
a_np
,
b_np
)))
res
,
res_array
=
exe
.
run
(
feed
=
{
'a'
:
a_np
,
'b'
:
b_np
},
fetch_list
=
[
out
,
array
])
self
.
assertEqual
((
100
,
100
),
res
.
shape
)
self
.
assertTrue
(
numpy
.
allclose
(
res
,
numpy
.
dot
(
a_np
,
b_np
)))
self
.
assertTrue
(
numpy
.
allclose
(
res_array
[
0
],
a_np
))
self
.
assertTrue
(
numpy
.
allclose
(
res_array
[
1
],
b_np
))
self
.
assertTrue
(
numpy
.
allclose
(
res_array
[
2
],
res
))
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_feed_fetch_method.py
浏览文件 @
48f41a7f
...
...
@@ -31,7 +31,9 @@ class TestFeedFetch(unittest.TestCase):
core
.
set_feed_variable
(
scope
,
input_tensor
,
"feed"
,
0
)
output_tensor
=
core
.
get_fetch_variable
(
scope
,
"feed"
,
0
)
output
=
scope
.
var
(
"fetch"
).
get_fetch_list
()
output
.
append
(
input_tensor
)
output_tensor
=
core
.
get_fetch_variable
(
scope
,
"fetch"
,
0
)
output_lod
=
output_tensor
.
recursive_sequence_lengths
()
self
.
assertEqual
(
2
,
output_lod
[
0
][
0
])
...
...
python/paddle/fluid/tests/unittests/test_fetch_lod_tensor_array.py
0 → 100644
浏览文件 @
48f41a7f
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
as
np
import
unittest
import
random
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
from
simple_nets
import
simple_fc_net_with_inputs
,
simple_fc_net
class
TestFetchLoDTensorArray
(
unittest
.
TestCase
):
def
build_program
(
self
,
main_program
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
program_guard
(
main_program
,
startup_program
):
i
=
layers
.
zeros
(
shape
=
[
1
],
dtype
=
'int64'
)
img
=
fluid
.
data
(
name
=
'image'
,
shape
=
[
-
1
,
784
],
dtype
=
'float32'
)
label
=
fluid
.
data
(
name
=
'label'
,
shape
=
[
-
1
,
1
],
dtype
=
'int64'
)
loss
=
simple_fc_net_with_inputs
(
img
,
label
,
class_num
=
10
)
loss
=
simple_fc_net
()
opt
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
opt
.
minimize
(
loss
)
array
=
layers
.
array_write
(
x
=
img
,
i
=
i
)
i
=
layers
.
increment
(
i
)
layers
.
array_write
(
x
=
label
,
i
=
i
,
array
=
array
)
i
=
layers
.
increment
(
i
)
layers
.
array_write
(
x
=
loss
,
i
=
i
,
array
=
array
)
return
loss
,
array
def
check_network
(
self
,
use_cuda
=
True
):
os
.
environ
[
"CPU_NUM"
]
=
str
(
2
)
main_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
loss
,
array
=
self
.
build_program
(
main_program
,
startup_program
)
batch_size
=
32
image
=
np
.
random
.
normal
(
size
=
(
batch_size
,
784
)).
astype
(
'float32'
)
label
=
np
.
random
.
randint
(
0
,
10
,
(
batch_size
,
1
),
dtype
=
"int64"
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_program
)
feed_dict
=
{
'image'
:
image
,
'label'
:
label
}
build_strategy
=
fluid
.
BuildStrategy
()
binary
=
fluid
.
CompiledProgram
(
main_program
).
with_data_parallel
(
loss_name
=
loss
.
name
,
build_strategy
=
build_strategy
)
device_num
=
fluid
.
core
.
get_cuda_device_count
()
if
use_cuda
else
2
for
_
in
range
(
3
):
loss_v
,
array_v
=
exe
.
run
(
binary
,
feed
=
feed_dict
,
fetch_list
=
[
loss
,
array
],
return_merged
=
False
)
self
.
assertEqual
(
np
.
array
(
loss_v
).
shape
,
(
device_num
,
1
))
self
.
assertEqual
(
np
.
array
(
array_v
[
0
][
0
]).
shape
,
(
batch_size
/
device_num
,
784
))
self
.
assertEqual
(
np
.
array
(
array_v
[
0
][
1
]).
shape
,
(
batch_size
/
device_num
,
1
))
self
.
assertEqual
(
np
.
array
(
array_v
[
0
][
2
]).
shape
,
(
1
,
))
for
_
in
range
(
3
):
loss_v
,
array_v
=
exe
.
run
(
binary
,
feed
=
feed_dict
,
fetch_list
=
[
loss
,
array
],
return_merged
=
True
)
self
.
assertEqual
(
np
.
array
(
loss_v
).
shape
,
(
device_num
,
))
self
.
assertEqual
(
np
.
array
(
array_v
[
0
]).
shape
,
(
batch_size
,
784
))
self
.
assertEqual
(
np
.
array
(
array_v
[
1
]).
shape
,
(
batch_size
,
1
))
self
.
assertTrue
(
np
.
allclose
(
loss_v
,
array_v
[
2
]))
def
test_fetch_lod_tensor_array
(
self
):
if
fluid
.
core
.
is_compiled_with_cuda
():
self
.
check_network
(
use_cuda
=
True
)
self
.
check_network
(
use_cuda
=
False
)
def
test_fetch_unmerged_parallel_graph
(
self
):
fluid
.
core
.
globals
()[
'FLAGS_enable_parallel_graph'
]
=
True
if
fluid
.
core
.
is_compiled_with_cuda
():
self
.
check_network
(
use_cuda
=
True
)
self
.
check_network
(
use_cuda
=
False
)
fluid
.
core
.
globals
()[
'FLAGS_enable_parallel_graph'
]
=
False
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录