Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c8744d11
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c8744d11
编写于
9月 28, 2018
作者:
Y
Yan Chunwei
提交者:
GitHub
9月 28, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fea/infer executor and concurrency performance issue bug fix (#13451)
- add naive executor - fix concurrency performance issue
上级
10a13f9c
变更
31
显示空白变更内容
内联
并排
Showing
31 changed file
with
1387 addition
and
106 deletion
+1387
-106
cmake/external/anakin.cmake
cmake/external/anakin.cmake
+1
-0
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+5
-2
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+6
-6
paddle/fluid/framework/naive_executor.cc
paddle/fluid/framework/naive_executor.cc
+150
-0
paddle/fluid/framework/naive_executor.h
paddle/fluid/framework/naive_executor.h
+63
-0
paddle/fluid/framework/naive_executor_test.cc
paddle/fluid/framework/naive_executor_test.cc
+70
-0
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+8
-2
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+31
-0
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+1
-1
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+1
-1
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+13
-7
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+211
-31
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+49
-10
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+67
-0
paddle/fluid/inference/api/api.cc
paddle/fluid/inference/api/api.cc
+22
-16
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+1
-1
paddle/fluid/inference/api/api_impl.h
paddle/fluid/inference/api/api_impl.h
+13
-9
paddle/fluid/inference/api/api_impl_tester.cc
paddle/fluid/inference/api/api_impl_tester.cc
+3
-3
paddle/fluid/inference/api/details/zero_copy_tensor.cc
paddle/fluid/inference/api/details/zero_copy_tensor.cc
+111
-0
paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
+46
-0
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+138
-0
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+53
-1
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+6
-1
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+4
-1
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+282
-3
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+2
-1
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+3
-1
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+2
-4
paddle/fluid/memory/malloc.cc
paddle/fluid/memory/malloc.cc
+21
-0
paddle/fluid/string/pretty_log.h
paddle/fluid/string/pretty_log.h
+4
-4
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+0
-1
未找到文件。
cmake/external/anakin.cmake
浏览文件 @
c8744d11
...
...
@@ -52,6 +52,7 @@ ExternalProject_Add(
PREFIX
${
ANAKIN_SOURCE_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS
${
CMAKE_ARGS_PREFIX
}
-DUSE_LOGGER=YES
-DUSE_X86_PLACE=YES
-DBUILD_WITH_UNIT_TEST=NO
-DPROTOBUF_ROOT=
${
THIRD_PARTY_PATH
}
/install/protobuf
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
c8744d11
...
...
@@ -56,9 +56,9 @@ else()
cc_test
(
mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor
)
endif
()
if
(
NOT WIN32
)
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version
)
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version
)
else
()
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version
)
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version
)
endif
(
NOT WIN32
)
cc_test
(
lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory
)
...
...
@@ -141,12 +141,15 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library
(
feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog
)
cc_library
(
naive_executor SRCS naive_executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass
)
if
(
WITH_DISTRIBUTE
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc cares grpc++_unsecure grpc_unsecure gpr graph_to_program_pass
)
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set_source_files_properties
(
executor.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
else
()
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass
)
cc_test
(
test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass elementwise_add_op
)
endif
()
if
(
NOT WIN32
)
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
c8744d11
...
...
@@ -28,9 +28,9 @@ cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph grap
pass_library
(
graph_to_program_pass base
)
pass_library
(
graph_viz_pass base
)
pass_library
(
fc_fuse_pass inference
)
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
pass_library
(
conv_relu_mkldnn_fuse_pass inference
)
endif
()
endif
()
pass_library
(
attention_lstm_fuse_pass inference
)
pass_library
(
infer_clean_graph_pass inference
)
pass_library
(
fc_lstm_fuse_pass inference
)
...
...
@@ -49,6 +49,6 @@ cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_r
cc_test
(
graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass
)
cc_test
(
test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector
)
cc_test
(
test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto
)
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
cc_test
(
test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass
)
endif
()
endif
()
paddle/fluid/framework/naive_executor.cc
0 → 100644
浏览文件 @
c8744d11
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/string/pretty_log.h"
namespace
paddle
{
namespace
framework
{
// These code can be shared with Executor.
static
void
InitializeVariable
(
Variable
*
var
,
proto
::
VarType
::
Type
var_type
)
{
if
(
var_type
==
proto
::
VarType
::
LOD_TENSOR
)
{
var
->
GetMutable
<
LoDTensor
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
SELECTED_ROWS
)
{
var
->
GetMutable
<
SelectedRows
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
FEED_MINIBATCH
)
{
var
->
GetMutable
<
FeedFetchList
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
FETCH_LIST
)
{
var
->
GetMutable
<
FeedFetchList
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
STEP_SCOPES
)
{
var
->
GetMutable
<
std
::
vector
<
framework
::
Scope
>>
();
}
else
if
(
var_type
==
proto
::
VarType
::
LOD_RANK_TABLE
)
{
var
->
GetMutable
<
LoDRankTable
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
{
var
->
GetMutable
<
LoDTensorArray
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
PLACE_LIST
)
{
var
->
GetMutable
<
platform
::
PlaceList
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
READER
)
{
var
->
GetMutable
<
ReaderHolder
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
CHANNEL
)
{
var
->
GetMutable
<
ChannelHolder
>
();
}
else
if
(
var_type
==
proto
::
VarType
::
RAW
)
{
// GetMutable will be called in operator
}
else
{
PADDLE_THROW
(
"Variable type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
"LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, RAW]"
,
var_type
);
}
}
void
NaiveExecutor
::
Prepare
(
Scope
*
parent_scope
,
const
ProgramDesc
&
program_desc
,
int
block_id
,
bool
with_feed_fetch_ops
)
{
if
(
!
parent_scope
)
{
scope_
=
new
framework
::
Scope
;
}
else
{
scope_
=
&
parent_scope
->
NewScope
();
}
CreateVariables
(
program_desc
,
scope_
,
block_id
);
CreateOps
(
program_desc
,
block_id
,
with_feed_fetch_ops
);
}
void
NaiveExecutor
::
Run
()
{
for
(
auto
&
op
:
ops_
)
{
VLOG
(
4
)
<<
"run "
<<
op
->
Type
();
op
->
Run
(
*
scope_
,
place_
);
}
}
void
NaiveExecutor
::
CreateVariables
(
const
ProgramDesc
&
desc
,
Scope
*
scope
,
int
block_id
)
{
PADDLE_ENFORCE
(
scope
);
auto
&
global_block
=
desc
.
Block
(
block_id
);
const
Scope
*
ancestor_scope
=
scope
;
while
(
ancestor_scope
->
parent
())
{
ancestor_scope
=
ancestor_scope
->
parent
();
}
if
(
ancestor_scope
!=
scope
)
{
for
(
auto
&
var
:
global_block
.
AllVars
())
{
if
(
var
->
Name
()
==
framework
::
kEmptyVarName
)
{
continue
;
}
// Create persistable vars in ancestor scope.
if
(
var
->
Persistable
())
{
auto
*
ptr
=
const_cast
<
Scope
*>
(
ancestor_scope
)
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
3
)
<<
"Create Variable "
<<
var
->
Name
()
<<
" global, which pointer is "
<<
ptr
;
}
else
{
// Create temporary variables in local scope.
auto
*
ptr
=
scope
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
3
)
<<
"Create Variable "
<<
var
->
Name
()
<<
" locally, which pointer is "
<<
ptr
;
}
}
}
else
{
for
(
auto
&
var
:
global_block
.
AllVars
())
{
auto
*
ptr
=
scope
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
3
)
<<
"Create variable "
<<
var
->
Name
()
<<
", which pointer is "
<<
ptr
;
}
}
}
void
NaiveExecutor
::
CreateOps
(
const
ProgramDesc
&
desc
,
int
block_id
,
bool
with_feed_fetch_ops
)
{
for
(
const
auto
&
op_desc
:
desc
.
Block
(
block_id
).
AllOps
())
{
if
(
!
with_feed_fetch_ops
&&
(
op_desc
->
Type
()
==
"feed"
||
op_desc
->
Type
()
==
"fetch"
))
{
string
::
PrettyLogEndl
(
string
::
Style
::
detail
(),
"--- skip [%s], %s -> %s"
,
op_desc
->
Input
(
"X"
)[
0
],
op_desc
->
Type
(),
op_desc
->
Output
(
"Out"
)[
0
]);
continue
;
}
ops_
.
emplace_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
}
}
LoDTensor
*
NaiveExecutor
::
FindTensor
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
scope_
,
"Need to init scope first"
);
auto
*
var
=
scope_
->
FindVar
(
name
);
PADDLE_ENFORCE
(
var
,
"No variable [%s] in the scope"
);
auto
*
tensor
=
const_cast
<
LoDTensor
*>
(
&
var
->
Get
<
LoDTensor
>
());
return
tensor
;
}
void
NaiveExecutor
::
CleanFeedFetchOps
()
{
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops
;
for
(
auto
&
op
:
ops_
)
{
if
(
op
->
Type
()
!=
"feed"
&&
op
->
Type
()
!=
"fetch"
)
{
ops
.
emplace_back
(
std
::
move
(
op
));
}
}
ops_
.
swap
(
ops
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/naive_executor.h
0 → 100644
浏览文件 @
c8744d11
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
namespace
framework
{
/*
* Simple, intuitive and effective. Only single thread is supported, and
* currently designed for inference.
*/
class
NaiveExecutor
{
public:
explicit
NaiveExecutor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
// Create child scope.
// Create variables.
// @with_feed_fetch_ops: whether to work with the feed and fetch operators.
void
Prepare
(
Scope
*
parent_scope
,
const
ProgramDesc
&
program_desc
,
int
block_id
,
bool
with_feed_fetch_ops
);
// Run all the operators.
void
Run
();
// Get an tensor to operating directly, without the need for feed_ops.
LoDTensor
*
FindTensor
(
const
std
::
string
&
name
);
Scope
*
scope
()
{
return
scope_
;
}
void
CleanFeedFetchOps
();
protected:
void
CreateVariables
(
const
ProgramDesc
&
desc
,
Scope
*
scope
,
int
block_id
);
void
CreateOps
(
const
ProgramDesc
&
desc
,
int
block_id
,
bool
with_feed_fetch_ops
);
private:
const
platform
::
Place
place_
;
// Catch the required resource to avoid recreate.
std
::
vector
<
std
::
unique_ptr
<
OperatorBase
>>
ops_
;
Scope
*
scope_
;
};
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/naive_executor_test.cc
0 → 100644
浏览文件 @
c8744d11
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/naive_executor.h"
#include <gtest/gtest.h>
#include <algorithm>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
namespace
paddle
{
namespace
framework
{
TEST
(
NaiveExecutor
,
Basic
)
{
ProgramDesc
program
;
auto
*
main_block
=
program
.
MutableBlock
(
0
);
auto
*
a
=
main_block
->
Var
(
"a"
);
// input
auto
*
b
=
main_block
->
Var
(
"b"
);
// input
auto
*
c
=
main_block
->
Var
(
"c"
);
// input
a
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
b
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
c
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
auto
*
add
=
main_block
->
AppendOp
();
add
->
SetType
(
"elementwise_add"
);
add
->
SetInput
(
"X"
,
{
"a"
});
add
->
SetInput
(
"Y"
,
{
"b"
});
add
->
SetOutput
(
"Out"
,
{
"c"
});
auto
place
=
platform
::
CPUPlace
();
NaiveExecutor
exe
(
place
);
exe
.
Prepare
(
nullptr
,
program
,
0
,
false
/*with feed fetch ops*/
);
auto
*
a_tensor
=
exe
.
FindTensor
(
"a"
);
auto
*
b_tensor
=
exe
.
FindTensor
(
"b"
);
auto
*
c_tensor
=
exe
.
FindTensor
(
"c"
);
a_tensor
->
Resize
({
1
,
4
});
b_tensor
->
Resize
({
1
,
4
});
c_tensor
->
Resize
({
1
,
4
});
b_tensor
->
mutable_data
<
float
>
(
place
);
a_tensor
->
mutable_data
<
float
>
(
place
);
float
a_arr
[]
=
{
0
,
1
,
2
,
3
};
float
b_arr
[]
=
{
0.0
,
.1
,
.2
,
.3
};
std
::
copy_n
(
a_arr
,
4
,
a_tensor
->
mutable_data
<
float
>
(
place
));
std
::
copy_n
(
b_arr
,
4
,
b_tensor
->
mutable_data
<
float
>
(
place
));
exe
.
Run
();
auto
*
c_data
=
c_tensor
->
mutable_data
<
float
>
(
place
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
EXPECT_NEAR
(
c_data
[
i
],
1.1
*
i
,
1e-3
);
}
}
}
// namespace framework
}
// namespace paddle
USE_OP
(
elementwise_add
);
paddle/fluid/framework/operator.cc
浏览文件 @
c8744d11
...
...
@@ -154,9 +154,15 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
platform
::
SetDeviceId
(
dev_id
);
#endif
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
if
(
platform
::
IsProfileEnabled
())
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
RecordEvent
record_event
(
Type
(),
pool
.
Get
(
place
));
}
RunImpl
(
scope
,
place
);
if
(
VLOG_IS_ON
(
3
))
{
VLOG
(
3
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
}
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
c8744d11
...
...
@@ -20,6 +20,13 @@ limitations under the License. */
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/string/printf.h"
// The mutex is not needed by training and inference, only for distribution.
#if PADDLE_WITH_DISTRIBUTE
#define WITH_LOCK 1
#else
#define WITH_LOCK 0
#endif
DEFINE_bool
(
benchmark
,
false
,
"Doing memory benchmark. It will make deleting scope synchronized, "
"and add some memory usage logs."
...
...
@@ -49,18 +56,24 @@ int64_t GetEagerDeletionThreshold() {
Scope
::~
Scope
()
{
DropKids
();
}
Scope
&
Scope
::
NewScope
()
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
kids_
.
push_back
(
new
Scope
(
this
));
return
*
kids_
.
back
();
}
Variable
*
Scope
::
Var
(
const
std
::
string
&
name
)
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
return
VarInternal
(
name
);
}
Variable
*
Scope
::
Var
(
std
::
string
*
name
)
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
if
(
name
!=
nullptr
)
{
*
name
=
new_name
;
...
...
@@ -69,29 +82,39 @@ Variable* Scope::Var(std::string* name) {
}
Variable
*
Scope
::
FindVar
(
const
std
::
string
&
name
)
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
return
FindVarInternal
(
name
);
}
const
Scope
*
Scope
::
FindScope
(
const
Variable
*
var
)
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
return
FindScopeInternal
(
var
);
}
void
Scope
::
DropKids
()
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
for
(
Scope
*
s
:
kids_
)
delete
s
;
kids_
.
clear
();
}
bool
Scope
::
HasKid
(
const
Scope
*
scope
)
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
return
it
!=
this
->
kids_
.
end
();
}
std
::
vector
<
std
::
string
>
Scope
::
LocalVarNames
()
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
std
::
vector
<
std
::
string
>
known_vars
;
known_vars
.
reserve
(
this
->
vars_
.
size
());
for
(
auto
&
p
:
vars_
)
{
...
...
@@ -101,7 +124,9 @@ std::vector<std::string> Scope::LocalVarNames() const {
}
void
Scope
::
DeleteScope
(
Scope
*
scope
)
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
PADDLE_ENFORCE
(
it
!=
this
->
kids_
.
end
(),
"Cannot find %p as kid scope"
,
scope
);
this
->
kids_
.
erase
(
it
);
...
...
@@ -114,7 +139,9 @@ void Scope::DeleteScope(Scope* scope) const {
}
void
Scope
::
EraseVars
(
const
std
::
vector
<
std
::
string
>&
var_names
)
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
std
::
set
<
std
::
string
>
var_set
(
var_names
.
begin
(),
var_names
.
end
());
for
(
auto
it
=
vars_
.
begin
();
it
!=
vars_
.
end
();)
{
if
(
var_set
.
find
(
it
->
first
)
!=
var_set
.
end
())
{
...
...
@@ -127,12 +154,16 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void
Scope
::
Rename
(
const
std
::
string
&
origin_name
,
const
std
::
string
&
new_name
)
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
RenameInternal
(
origin_name
,
new_name
);
}
std
::
string
Scope
::
Rename
(
const
std
::
string
&
origin_name
)
const
{
#if WITH_LOCK
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
#endif
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
RenameInternal
(
origin_name
,
new_name
);
return
new_name
;
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
c8744d11
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
c8744d11
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
c8744d11
...
...
@@ -18,10 +18,10 @@ if(APPLE)
endif
(
APPLE
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
${
GLOB_PASS_LIB
}
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager
naive_executor
${
GLOB_PASS_LIB
}
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine
analysis_predictor
)
endif
()
function
(
inference_api_test TARGET_NAME
)
...
...
@@ -43,8 +43,10 @@ function(inference_api_test TARGET_NAME)
endif
(
WITH_TESTING
)
endfunction
(
inference_api_test
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor
)
cc_library
(
zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api
)
cc_library
(
zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc DEPS paddle_inference_api
)
cc_test
(
test_paddle_inference_api
SRCS api_tester.cc
DEPS paddle_inference_api
)
...
...
@@ -52,18 +54,22 @@ cc_test(test_paddle_inference_api
inference_api_test
(
test_api_impl SRC api_impl_tester.cc
ARGS test_word2vec test_image_classification
)
set
(
PYTHON_TESTS_DIR
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/tests
)
cc_test
(
test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_predictor
${
inference_deps
}
paddle_inference_api
ARGS --dirname=
${
PYTHON_TESTS_DIR
}
/book
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
cc_library
(
paddle_inference_tensorrt_subgraph_engine
SRCS api_tensorrt_subgraph_engine.cc
DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api tensorrt_converter
)
DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api tensorrt_converter
zero_copy_tensor_dummy
)
inference_api_test
(
test_api_tensorrt_subgraph_engine SRC api_tensorrt_subgraph_engine_tester.cc ARGS test_word2vec
)
endif
()
if
(
WITH_ANAKIN AND WITH_MKL
)
# only needed in CI
# compile the libinference_anakin_api.a and anakin.so.
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber
)
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml
scope zero_copy_tensor_dummy
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber
scope
)
function
(
anakin_target target_name
)
target_compile_options
(
${
target_name
}
BEFORE PUBLIC
${
ANAKIN_COMPILE_EXTRA_FLAGS
}
)
endfunction
()
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
c8744d11
...
...
@@ -16,11 +16,15 @@
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/timer.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/profiler.h"
...
...
@@ -28,8 +32,11 @@ DECLARE_bool(profile);
namespace
paddle
{
using
contrib
::
AnalysisConfig
;
bool
AnalysisPredictor
::
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
)
{
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
)
{
VLOG
(
3
)
<<
"Predictor::init()"
;
#if !defined(_WIN32)
if
(
FLAGS_profile
)
{
...
...
@@ -43,7 +50,8 @@ bool AnalysisPredictor::Init(
if
(
config_
.
use_gpu
)
{
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
LOG
(
WARNING
)
<<
"ir optimize only supports CPU currently"
;
LOG
(
WARNING
)
<<
"ir optimize only supports CPU currently, enable_ir_optim "
"is turned false."
;
config_
.
enable_ir_optim
=
false
;
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
...
...
@@ -56,37 +64,134 @@ bool AnalysisPredictor::Init(
scope_
.
reset
(
new
paddle
::
framework
::
Scope
());
}
executor_
.
reset
(
new
paddle
::
framework
::
Executor
(
place_
));
executor_
.
reset
(
new
paddle
::
framework
::
Naive
Executor
(
place_
));
// Initialize the inference program
if
(
!
config_
.
model_dir
.
empty
())
{
// Parameters are saved in separate files sited in
// the specified `dirname`.
inference_program_
=
paddle
::
inference
::
Load
(
executor_
.
get
(),
scope_
.
get
(),
config_
.
model_dir
);
}
else
if
(
!
config_
.
prog_file
.
empty
()
&&
!
config_
.
param_file
.
empty
())
{
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
inference_program_
=
paddle
::
inference
::
Load
(
executor_
.
get
(),
scope_
.
get
(),
config_
.
prog_file
,
config_
.
param_file
);
if
(
!
program
)
{
if
(
!
LoadProgramDesc
())
return
false
;
OptimizeInferenceProgram
();
}
else
{
LOG
(
ERROR
)
<<
"fail to load inference model from "
<<
config_
.
model_dir
;
inference_program_
=
program
;
}
executor_
->
Prepare
(
scope_
.
get
(),
*
inference_program_
,
0
,
config_
.
use_feed_fetch_ops
);
// Get the feed_target_names and fetch_target_names
PrepareFeedFetch
();
return
true
;
}
bool
AnalysisPredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
VLOG
(
3
)
<<
"Predictor::predict"
;
inference
::
Timer
timer
;
timer
.
tic
();
// set feed variable
std
::
vector
<
framework
::
LoDTensor
>
feeds
;
framework
::
Scope
*
scope
=
sub_scope_
?
sub_scope_
:
scope_
.
get
();
if
(
!
SetFeed
(
inputs
,
scope
))
{
LOG
(
ERROR
)
<<
"fail to set feed"
;
return
false
;
}
// Run the inference program
// if share variables, we need not create variables
executor_
->
Run
();
OptimizeInferenceProgram
();
if
(
config_
.
_use_mkldnn
)
{
executor_
->
EnableMKLDNN
(
*
inference_program_
);
// get fetch variable
if
(
!
GetFetch
(
output_data
,
scope
))
{
LOG
(
ERROR
)
<<
"fail to get fetches"
;
return
false
;
}
ctx_
=
executor_
->
Prepare
(
*
inference_program_
,
0
);
VLOG
(
3
)
<<
"predict cost: "
<<
timer
.
toc
()
<<
"ms"
;
return
true
;
}
VLOG
(
5
)
<<
"to create variables"
;
PADDLE_ENFORCE
(
scope_
.
get
());
executor_
->
CreateVariables
(
*
inference_program_
,
sub_scope_
?
sub_scope_
:
scope_
.
get
(),
0
);
// Get the feed_target_names and fetch_target_names
PrepareFeedFetch
();
bool
AnalysisPredictor
::
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
framework
::
Scope
*
scope
)
{
VLOG
(
3
)
<<
"Predictor::set_feed"
;
if
(
inputs
.
size
()
!=
feeds_
.
size
())
{
LOG
(
ERROR
)
<<
"wrong feed input size, need "
<<
feeds_
.
size
()
<<
" but get "
<<
inputs
.
size
();
return
false
;
}
// Cache the inputs memory for better concurrency performance.
feed_tensors_
.
resize
(
inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
auto
&
input
=
feed_tensors_
[
i
];
framework
::
DDim
ddim
=
framework
::
make_ddim
(
inputs
[
i
].
shape
);
void
*
input_ptr
;
if
(
inputs
[
i
].
dtype
==
PaddleDType
::
INT64
)
{
input_ptr
=
input
.
mutable_data
<
int64_t
>
(
ddim
,
platform
::
CPUPlace
());
}
else
if
(
inputs
[
i
].
dtype
==
PaddleDType
::
FLOAT32
)
{
input_ptr
=
input
.
mutable_data
<
float
>
(
ddim
,
platform
::
CPUPlace
());
}
else
{
LOG
(
ERROR
)
<<
"unsupported feed type "
<<
inputs
[
i
].
dtype
;
return
false
;
}
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std
::
memcpy
(
static_cast
<
void
*>
(
input_ptr
),
inputs
[
i
].
data
.
data
(),
inputs
[
i
].
data
.
length
());
// TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
framework
::
LoD
lod
;
for
(
auto
&
level
:
inputs
[
i
].
lod
)
{
lod
.
emplace_back
(
level
);
}
input
.
set_lod
(
lod
);
int
idx
=
-
1
;
if
(
config_
.
specify_input_name
)
{
idx
=
feed_names_
[
inputs
[
i
].
name
];
}
else
{
idx
=
boost
::
get
<
int
>
(
feeds_
[
i
]
->
GetAttr
(
"col"
));
}
framework
::
SetFeedVariable
(
scope
,
input
,
"feed"
,
idx
);
}
return
true
;
}
template
<
typename
T
>
void
AnalysisPredictor
::
GetFetchOne
(
const
framework
::
LoDTensor
&
fetch
,
PaddleTensor
*
output
)
{
// set shape.
auto
shape
=
framework
::
vectorize
(
fetch
.
dims
());
output
->
shape
.
assign
(
shape
.
begin
(),
shape
.
end
());
// set data.
const
T
*
data
=
fetch
.
data
<
T
>
();
int
num_elems
=
inference
::
VecReduceToInt
(
shape
);
output
->
data
.
Resize
(
num_elems
*
sizeof
(
T
));
// The fetched tensor output by fetch op, should always in CPU memory, so just
// copy.
memcpy
(
output
->
data
.
data
(),
data
,
num_elems
*
sizeof
(
T
));
// set lod
output
->
lod
.
clear
();
for
(
auto
&
level
:
fetch
.
lod
())
{
output
->
lod
.
emplace_back
(
level
.
begin
(),
level
.
end
());
}
}
bool
AnalysisPredictor
::
GetFetch
(
std
::
vector
<
PaddleTensor
>
*
outputs
,
framework
::
Scope
*
scope
)
{
VLOG
(
3
)
<<
"Predictor::get_fetch"
;
outputs
->
resize
(
fetchs_
.
size
());
for
(
size_t
i
=
0
;
i
<
fetchs_
.
size
();
++
i
)
{
int
idx
=
boost
::
get
<
int
>
(
fetchs_
[
i
]
->
GetAttr
(
"col"
));
PADDLE_ENFORCE
((
size_t
)
idx
==
i
);
framework
::
LoDTensor
&
fetch
=
framework
::
GetFetchVariable
(
*
scope
,
"fetch"
,
idx
);
auto
type
=
fetch
.
type
();
auto
output
=
&
(
outputs
->
at
(
i
));
if
(
type
==
typeid
(
float
))
{
GetFetchOne
<
float
>
(
fetch
,
output
);
output
->
dtype
=
PaddleDType
::
FLOAT32
;
}
else
if
(
type
==
typeid
(
int64_t
))
{
GetFetchOne
<
int64_t
>
(
fetch
,
output
);
output
->
dtype
=
PaddleDType
::
INT64
;
}
else
{
LOG
(
ERROR
)
<<
"unknown type, only support float32 and int64 now."
;
}
}
return
true
;
}
...
...
@@ -107,6 +212,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
new
std
::
string
(
config_
.
prog_file
));
argument_
.
fluid_model_param_path
.
reset
(
new
std
::
string
(
config_
.
param_file
));
}
argument_
.
origin_program_desc
.
reset
(
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
PADDLE_ENFORCE
(
...
...
@@ -127,9 +233,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
contrib
::
AnalysisConfig
&
config
)
{
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
AnalysisConfig
&
config
)
{
VLOG
(
3
)
<<
"create AnalysisConfig"
;
if
(
config
.
use_gpu
)
{
// 1. GPU memeroy
...
...
@@ -150,15 +255,90 @@ CreatePaddlePredictor<contrib::AnalysisConfig, PaddleEngineKind::kAnalysis>(
}
std
::
unique_ptr
<
PaddlePredictor
>
predictor
(
new
AnalysisPredictor
(
config
));
if
(
!
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
())
->
Init
(
nullptr
))
{
if
(
!
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
())
->
Init
(
nullptr
))
{
return
nullptr
;
}
return
predictor
;
}
void
AnalysisPredictor
::
PrepareFeedFetch
()
{
for
(
auto
*
op
:
inference_program_
->
Block
(
0
).
AllOps
())
{
if
(
op
->
Type
()
==
"feed"
)
{
int
idx
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
"col"
));
if
(
feeds_
.
size
()
<=
static_cast
<
size_t
>
(
idx
))
{
feeds_
.
resize
(
idx
+
1
);
}
feeds_
[
idx
]
=
op
;
feed_names_
[
op
->
Output
(
"Out"
)[
0
]]
=
idx
;
}
else
if
(
op
->
Type
()
==
"fetch"
)
{
int
idx
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
"col"
));
if
(
fetchs_
.
size
()
<=
static_cast
<
size_t
>
(
idx
))
{
fetchs_
.
resize
(
idx
+
1
);
}
fetchs_
[
idx
]
=
op
;
}
}
}
std
::
unique_ptr
<
ZeroCopyTensor
>
AnalysisPredictor
::
GetInputTensor
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
executor_
->
scope
()
->
FindVar
(
name
),
"no name called %s"
,
name
);
std
::
unique_ptr
<
ZeroCopyTensor
>
res
(
new
ZeroCopyTensor
(
static_cast
<
void
*>
(
executor_
->
scope
())));
res
->
input_or_output_
=
true
;
res
->
SetName
(
name
);
return
res
;
}
std
::
unique_ptr
<
ZeroCopyTensor
>
AnalysisPredictor
::
GetOutputTensor
(
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
executor_
->
scope
()
->
FindVar
(
name
),
"no name called %s"
,
name
);
std
::
unique_ptr
<
ZeroCopyTensor
>
res
(
new
ZeroCopyTensor
(
static_cast
<
void
*>
(
executor_
->
scope
())));
res
->
input_or_output_
=
false
;
res
->
SetName
(
name
);
return
res
;
}
bool
AnalysisPredictor
::
ZeroCopyRun
()
{
executor_
->
Run
();
return
true
;
}
bool
AnalysisPredictor
::
LoadProgramDesc
()
{
// Initialize the inference program
std
::
unique_ptr
<
framework
::
Executor
>
tmp_exe
(
new
framework
::
Executor
(
platform
::
CPUPlace
()));
if
(
!
config_
.
model_dir
.
empty
())
{
// Parameters are saved in separate files sited in
// the specified `dirname`.
inference_program_
=
paddle
::
inference
::
Load
(
static_cast
<
framework
::
Executor
*>
(
tmp_exe
.
get
()),
scope_
.
get
(),
config_
.
model_dir
);
}
else
if
(
!
config_
.
prog_file
.
empty
()
&&
!
config_
.
param_file
.
empty
())
{
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
inference_program_
=
paddle
::
inference
::
Load
(
static_cast
<
framework
::
Executor
*>
(
tmp_exe
.
get
()),
scope_
.
get
(),
config_
.
prog_file
,
config_
.
param_file
);
}
else
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"not valid model path '%s' or program path '%s'."
,
config_
.
model_dir
,
config_
.
param_file
);
return
false
;
}
return
true
;
}
std
::
unique_ptr
<
PaddlePredictor
>
AnalysisPredictor
::
Clone
()
{
auto
*
x
=
new
AnalysisPredictor
(
config_
);
x
->
Init
(
scope_
,
inference_program_
);
return
std
::
unique_ptr
<
PaddlePredictor
>
(
x
);
}
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
>
(
const
contrib
::
AnalysisConfig
&
config
)
{
const
contrib
::
AnalysisConfig
&
config
)
{
return
CreatePaddlePredictor
<
contrib
::
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
}
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
c8744d11
...
...
@@ -12,42 +12,81 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/string/printf.h"
namespace
paddle
{
using
inference
::
analysis
::
Argument
;
using
inference
::
analysis
::
Analyzer
;
using
framework
::
proto
::
ProgramDesc
;
using
framework
::
NaiveExecutor
;
using
contrib
::
AnalysisConfig
;
/* This predictor is based on the original native predictor with IR and Analysis
* support. It will optimize IR and Parameters in the runtime.
* TODO(Superjomn) Replace the Navive predictor?
*/
class
AnalysisPredictor
:
public
Native
PaddlePredictor
{
class
AnalysisPredictor
:
public
PaddlePredictor
{
public:
explicit
AnalysisPredictor
(
const
contrib
::
AnalysisConfig
&
config
)
:
NativePaddlePredictor
(
config
),
config_
(
config
)
{}
explicit
AnalysisPredictor
(
const
AnalysisConfig
&
config
)
:
config_
(
config
)
{}
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
);
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
=
nullptr
);
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
override
{
return
NativePaddlePredictor
::
Run
(
inputs
,
output_data
,
batch_size
);
}
bool
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
=
-
1
)
override
;
std
::
unique_ptr
<
ZeroCopyTensor
>
GetInputTensor
(
const
std
::
string
&
name
)
override
;
std
::
unique_ptr
<
ZeroCopyTensor
>
GetOutputTensor
(
const
std
::
string
&
name
)
override
;
bool
ZeroCopyRun
()
override
;
void
PrepareFeedFetch
();
void
OptimizeInferenceProgram
();
Argument
&
analysis_argument
()
{
return
argument_
;
}
Argument
&
analysis_argument
()
{
return
argument_
;
}
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
override
;
framework
::
Scope
*
scope
()
{
return
executor_
->
scope
();
}
framework
::
ProgramDesc
&
program
()
{
return
*
inference_program_
;
}
protected:
bool
LoadProgramDesc
();
bool
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
input_datas
,
framework
::
Scope
*
scope
);
bool
GetFetch
(
std
::
vector
<
PaddleTensor
>
*
output_data
,
framework
::
Scope
*
scope
);
template
<
typename
T
>
void
GetFetchOne
(
const
framework
::
LoDTensor
&
fetchs
,
PaddleTensor
*
output_data
);
private:
contrib
::
AnalysisConfig
config_
;
Argument
argument_
;
std
::
unique_ptr
<
NaiveExecutor
>
executor_
;
platform
::
Place
place_
;
std
::
shared_ptr
<
framework
::
Scope
>
scope_
;
framework
::
Scope
*
sub_scope_
{
nullptr
};
std
::
shared_ptr
<
framework
::
ProgramDesc
>
inference_program_
;
std
::
vector
<
framework
::
OpDesc
*>
feeds_
;
std
::
map
<
std
::
string
,
size_t
>
feed_names_
;
std
::
vector
<
framework
::
OpDesc
*>
fetchs_
;
// Memory buffer for feed inputs. The temporary LoDTensor will cause serious
// concurrency problems, so cache them.
std
::
vector
<
framework
::
LoDTensor
>
feed_tensors_
;
};
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor_tester.cc
0 → 100644
浏览文件 @
c8744d11
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
DEFINE_string
(
dirname
,
""
,
"dirname to tests."
);
namespace
paddle
{
namespace
inference
{
using
contrib
::
AnalysisConfig
;
TEST
(
AnalysisPredictor
,
ZeroCopy
)
{
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"/word2vec.inference.model"
;
config
.
use_feed_fetch_ops
=
false
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
auto
w0
=
predictor
->
GetInputTensor
(
"firstw"
);
auto
w1
=
predictor
->
GetInputTensor
(
"secondw"
);
auto
w2
=
predictor
->
GetInputTensor
(
"thirdw"
);
auto
w3
=
predictor
->
GetInputTensor
(
"forthw"
);
w0
->
Reshape
({
4
,
1
});
w1
->
Reshape
({
4
,
1
});
w2
->
Reshape
({
4
,
1
});
w3
->
Reshape
({
4
,
1
});
auto
*
w0_data
=
w0
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
auto
*
w1_data
=
w1
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
auto
*
w2_data
=
w2
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
auto
*
w3_data
=
w3
->
mutable_data
<
int64_t
>
(
PaddlePlace
::
kCPU
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
w0_data
[
i
]
=
i
;
w1_data
[
i
]
=
i
;
w2_data
[
i
]
=
i
;
w3_data
[
i
]
=
i
;
}
predictor
->
ZeroCopyRun
();
auto
out
=
predictor
->
GetOutputTensor
(
"fc_1.tmp_2"
);
PaddlePlace
place
;
int
size
=
0
;
auto
*
out_data
=
out
->
data
<
float
>
(
&
place
,
&
size
);
LOG
(
INFO
)
<<
"output size: "
<<
size
/
sizeof
(
float
);
LOG
(
INFO
)
<<
"output_data: "
<<
out_data
;
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/api/api.cc
浏览文件 @
c8744d11
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle_inference_api.h"
namespace
paddle
{
...
...
@@ -26,7 +32,7 @@ int PaddleDtypeSize(PaddleDType dtype) {
}
}
PaddleBuf
::
PaddleBuf
(
PaddleBuf
&&
other
)
PaddleBuf
::
PaddleBuf
(
PaddleBuf
&&
other
)
:
data_
(
other
.
data_
),
length_
(
other
.
length_
),
memory_owned_
(
other
.
memory_owned_
)
{
...
...
@@ -35,9 +41,9 @@ PaddleBuf::PaddleBuf(PaddleBuf&& other)
other
.
length_
=
0
;
}
PaddleBuf
::
PaddleBuf
(
const
PaddleBuf
&
other
)
{
*
this
=
other
;
}
PaddleBuf
::
PaddleBuf
(
const
PaddleBuf
&
other
)
{
*
this
=
other
;
}
PaddleBuf
&
PaddleBuf
::
operator
=
(
const
PaddleBuf
&
other
)
{
PaddleBuf
&
PaddleBuf
::
operator
=
(
const
PaddleBuf
&
other
)
{
if
(
!
other
.
memory_owned_
)
{
data_
=
other
.
data_
;
length_
=
other
.
length_
;
...
...
@@ -51,7 +57,7 @@ PaddleBuf& PaddleBuf::operator=(const PaddleBuf& other) {
return
*
this
;
}
PaddleBuf
&
PaddleBuf
::
operator
=
(
PaddleBuf
&&
other
)
{
PaddleBuf
&
PaddleBuf
::
operator
=
(
PaddleBuf
&&
other
)
{
// only the buffer with external memory can be copied
data_
=
other
.
data_
;
length_
=
other
.
length_
;
...
...
@@ -75,7 +81,7 @@ void PaddleBuf::Resize(size_t length) {
}
}
void
PaddleBuf
::
Reset
(
void
*
data
,
size_t
length
)
{
void
PaddleBuf
::
Reset
(
void
*
data
,
size_t
length
)
{
Free
();
memory_owned_
=
false
;
data_
=
data
;
...
...
@@ -85,7 +91,7 @@ void PaddleBuf::Reset(void* data, size_t length) {
void
PaddleBuf
::
Free
()
{
if
(
memory_owned_
&&
data_
)
{
PADDLE_ENFORCE_GT
(
length_
,
0
);
free
(
static_cast
<
char
*>
(
data_
));
free
(
static_cast
<
char
*>
(
data_
));
data_
=
nullptr
;
length_
=
0
;
}
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
c8744d11
...
...
@@ -145,7 +145,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
VLOG
(
4
)
<<
"Run prepared context"
;
executor_
->
RunPreparedContext
(
ctx_
.
get
(),
scope
,
false
,
/* don't create local scope each time*/
false
/* don't create variable ea
t
ch time */
);
false
/* don't create variable each time */
);
VLOG
(
4
)
<<
"Finish prepared context"
;
// get fetch variable
if
(
!
GetFetch
(
output_data
,
scope
))
{
...
...
paddle/fluid/inference/api/api_impl.h
浏览文件 @
c8744d11
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
...
...
@@ -30,6 +30,8 @@
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/profiler.h"
...
...
@@ -52,6 +54,8 @@ class NativePaddlePredictor : public PaddlePredictor {
~
NativePaddlePredictor
()
override
;
framework
::
Scope
*
scope
()
{
return
sub_scope_
?
sub_scope_
:
scope_
.
get
();
}
protected:
bool
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
input_datas
,
framework
::
Scope
*
scope
);
...
...
paddle/fluid/inference/api/api_impl_tester.cc
浏览文件 @
c8744d11
...
...
@@ -43,7 +43,7 @@ PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
NativeConfig
GetConfig
()
{
NativeConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
config
.
model_dir
=
FLAGS_dirname
+
"
/
word2vec.inference.model"
;
LOG
(
INFO
)
<<
"dirname "
<<
config
.
model_dir
;
config
.
fraction_of_gpu_memory
=
0.15
;
#ifdef PADDLE_WITH_CUDA
...
...
@@ -110,7 +110,7 @@ void MainImageClassification(bool use_gpu) {
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
model_dir
=
FLAGS_dirname
+
"image_classification_resnet.inference.model"
;
FLAGS_dirname
+
"
/
image_classification_resnet.inference.model"
;
const
bool
is_combined
=
false
;
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
...
...
@@ -214,7 +214,7 @@ void MainThreadsImageClassification(bool use_gpu) {
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
model_dir
=
FLAGS_dirname
+
"image_classification_resnet.inference.model"
;
FLAGS_dirname
+
"
/
image_classification_resnet.inference.model"
;
auto
main_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
std
::
vector
<
framework
::
LoDTensor
>
jobs
(
num_jobs
);
...
...
paddle/fluid/inference/api/details/zero_copy_tensor.cc
0 → 100644
浏览文件 @
c8744d11
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
void
ZeroCopyTensor
::
Reshape
(
const
std
::
vector
<
int
>
&
shape
)
{
PADDLE_ENFORCE
(
!
name_
.
empty
(),
"Need to SetName first, so that the corresponding tensor can "
"be retrieved."
);
PADDLE_ENFORCE
(
input_or_output_
,
"Can't reshape the output tensor, it is readonly"
);
PADDLE_ENFORCE
(
scope_
);
auto
*
scope
=
static_cast
<
framework
::
Scope
*>
(
scope_
);
auto
*
var
=
scope
->
FindVar
(
name_
);
PADDLE_ENFORCE
(
var
,
"No tensor called [%s] in the runtime scope"
,
name_
);
auto
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
tensor
->
Resize
(
framework
::
make_ddim
(
shape
));
}
template
<
typename
T
>
T
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
)
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
switch
(
static_cast
<
int
>
(
place
))
{
case
static_cast
<
int
>
(
PaddlePlace
::
kCPU
):
{
return
tensor
->
mutable_data
<
T
>
(
platform
::
CPUPlace
());
}
case
static_cast
<
int
>
(
PaddlePlace
::
kGPU
):
{
return
tensor
->
mutable_data
<
T
>
(
platform
::
CUDAPlace
());
}
default:
PADDLE_THROW
(
"Unsupported place: %d"
,
static_cast
<
int
>
(
place
));
break
;
}
return
nullptr
;
}
template
<
typename
T
>
T
*
ZeroCopyTensor
::
data
(
PaddlePlace
*
place
,
int
*
size
)
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
auto
*
res
=
tensor
->
data
<
T
>
();
if
(
platform
::
is_cpu_place
(
tensor
->
place
()))
{
*
place
=
PaddlePlace
::
kCPU
;
}
else
if
(
platform
::
is_gpu_place
(
tensor
->
place
()))
{
*
place
=
PaddlePlace
::
kGPU
;
}
else
{
*
place
=
PaddlePlace
::
kUNK
;
}
*
size
=
tensor
->
numel
();
return
res
;
}
template
float
*
ZeroCopyTensor
::
data
<
float
>(
PaddlePlace
*
place
,
int
*
size
);
template
int64_t
*
ZeroCopyTensor
::
data
<
int64_t
>(
PaddlePlace
*
place
,
int
*
size
);
template
float
*
ZeroCopyTensor
::
mutable_data
<
float
>(
PaddlePlace
place
);
template
int64_t
*
ZeroCopyTensor
::
mutable_data
<
int64_t
>(
PaddlePlace
place
);
void
*
ZeroCopyTensor
::
FindTensor
()
const
{
PADDLE_ENFORCE
(
!
name_
.
empty
(),
"Need to SetName first, so that the corresponding tensor can "
"be retrieved."
);
PADDLE_ENFORCE
(
scope_
);
auto
*
scope
=
static_cast
<
framework
::
Scope
*>
(
scope_
);
auto
*
var
=
scope
->
FindVar
(
name_
);
PADDLE_ENFORCE
(
var
,
"No tensor called [%s] in the runtime scope"
,
name_
);
auto
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
return
tensor
;
}
std
::
vector
<
int64_t
>
ZeroCopyTensor
::
shape
()
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
PADDLE_ENFORCE
(
tensor
,
"not found tensor called %s in the scope"
,
name_
);
return
framework
::
vectorize
(
tensor
->
dims
());
}
void
ZeroCopyTensor
::
SetLoD
(
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
x
)
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
framework
::
LoD
lod
;
for
(
auto
&
level
:
x
)
{
lod
.
emplace_back
(
level
);
}
tensor
->
set_lod
(
lod
);
}
std
::
vector
<
std
::
vector
<
size_t
>>
ZeroCopyTensor
::
lod
()
const
{
std
::
vector
<
std
::
vector
<
size_t
>>
res
;
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
for
(
auto
&
level
:
tensor
->
lod
())
{
res
.
emplace_back
(
level
);
}
return
res
;
}
}
// namespace paddle
paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
0 → 100644
浏览文件 @
c8744d11
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace
paddle
{
void
ZeroCopyTensor
::
Reshape
(
const
std
::
vector
<
int
>
&
shape
)
{}
template
<
typename
T
>
T
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
)
{
return
nullptr
;
}
template
<
typename
T
>
T
*
ZeroCopyTensor
::
data
(
PaddlePlace
*
place
,
int
*
size
)
{
return
nullptr
;
}
template
float
*
ZeroCopyTensor
::
data
<
float
>(
PaddlePlace
*
place
,
int
*
size
);
template
int64_t
*
ZeroCopyTensor
::
data
<
int64_t
>(
PaddlePlace
*
place
,
int
*
size
);
template
float
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
);
template
int64_t
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
);
void
*
ZeroCopyTensor
::
FindTensor
()
const
{
return
nullptr
;
}
std
::
vector
<
int64_t
>
ZeroCopyTensor
::
shape
()
{
return
{};
}
void
ZeroCopyTensor
::
SetLoD
(
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
x
)
{}
std
::
vector
<
std
::
vector
<
size_t
>>
ZeroCopyTensor
::
lod
()
const
{
return
std
::
vector
<
std
::
vector
<
size_t
>>
();
}
}
// namespace paddle
paddle/fluid/inference/api/helper.h
浏览文件 @
c8744d11
...
...
@@ -21,8 +21,10 @@
#include <sstream>
#include <string>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/timer.h"
#include "paddle/fluid/string/printf.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -93,6 +95,20 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
template
<
typename
T
>
static
int
ZeroCopyTensorAssignData
(
ZeroCopyTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
)
{
int
size
{
0
};
auto
*
ptr
=
tensor
->
mutable_data
<
T
>
(
PaddlePlace
::
kCPU
);
int
c
=
0
;
for
(
const
auto
&
f
:
data
)
{
for
(
T
v
:
f
)
{
ptr
[
c
++
]
=
v
;
}
}
return
size
;
}
static
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
std
::
stringstream
os
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
...
...
@@ -138,5 +154,127 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
}
}
template
<
typename
T
>
std
::
string
LoDTensorSummary
(
const
framework
::
LoDTensor
&
tensor
)
{
std
::
stringstream
ss
;
ss
<<
"
\n
---- tensor ---"
<<
'\n'
;
ss
<<
"lod: ["
;
for
(
const
auto
&
level
:
tensor
.
lod
())
{
ss
<<
"[ "
;
for
(
auto
i
:
level
)
{
ss
<<
i
<<
", "
;
}
ss
<<
"]"
;
}
ss
<<
"]
\n
"
;
ss
<<
"shape: ["
;
int
size
=
1
;
for
(
int
i
=
0
;
i
<
tensor
.
dims
().
size
();
i
++
)
{
int
dim
=
tensor
.
dims
()[
i
];
ss
<<
dim
<<
", "
;
size
*=
dim
;
}
ss
<<
"]
\n
"
;
ss
<<
"data: "
;
for
(
int
i
=
0
;
i
<
std
::
min
(
20
,
size
);
i
++
)
{
ss
<<
tensor
.
data
<
T
>
()[
i
]
<<
" "
;
}
ss
<<
"
\n
"
;
return
ss
.
str
();
}
static
bool
CompareLoD
(
const
framework
::
LoD
&
a
,
const
framework
::
LoD
&
b
)
{
if
(
a
.
size
()
!=
b
.
size
())
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"lod size not match %d != %d"
,
a
.
size
(),
b
.
size
());
return
false
;
}
for
(
size_t
i
=
0
;
i
<
a
.
size
();
i
++
)
{
auto
&
al
=
a
[
i
];
auto
&
bl
=
b
[
i
];
if
(
al
.
size
()
!=
bl
.
size
())
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"level size %d != %d"
,
al
.
size
(),
bl
.
size
());
return
false
;
}
}
return
true
;
}
static
bool
CompareShape
(
const
std
::
vector
<
int64_t
>
&
a
,
const
std
::
vector
<
int64_t
>
&
b
)
{
if
(
a
.
size
()
!=
b
.
size
())
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"shape size not match %d != %d"
,
a
.
size
(),
b
.
size
());
return
false
;
}
for
(
size_t
i
=
0
;
i
<
a
.
size
();
i
++
)
{
if
(
a
[
i
]
!=
b
[
i
])
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"shape %d-th element not match %d != %d"
,
i
,
a
[
i
],
b
[
i
]);
return
false
;
}
}
return
true
;
}
static
bool
CompareTensorData
(
const
framework
::
LoDTensor
&
a
,
const
framework
::
LoDTensor
&
b
)
{
auto
a_shape
=
framework
::
vectorize
(
a
.
dims
());
auto
b_shape
=
framework
::
vectorize
(
b
.
dims
());
size_t
a_size
=
std
::
accumulate
(
a_shape
.
begin
(),
a_shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
b_size
=
std
::
accumulate
(
b_shape
.
begin
(),
b_shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
if
(
a_size
!=
b_size
)
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor data size not match, %d != %d"
,
a_size
,
b_size
);
}
for
(
size_t
i
=
0
;
i
<
a_size
;
i
++
)
{
if
(
a
.
type
()
==
typeid
(
float
))
{
const
auto
*
a_data
=
a
.
data
<
float
>
();
const
auto
*
b_data
=
b
.
data
<
float
>
();
if
(
std
::
abs
(
a_data
[
i
]
-
b_data
[
i
])
>
1e-3
)
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor data %d-th element not match, %f != %f"
,
i
,
a_data
[
i
],
b_data
[
i
]);
return
false
;
}
}
else
if
(
a
.
type
()
==
typeid
(
int64_t
))
{
const
auto
*
a_data
=
a
.
data
<
int64_t
>
();
const
auto
*
b_data
=
b
.
data
<
int64_t
>
();
if
(
std
::
abs
(
a_data
[
i
]
-
b_data
[
i
])
>
1e-3
)
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor data %d-th element not match, %f != %f"
,
i
,
a_data
[
i
],
b_data
[
i
]);
return
false
;
}
}
}
return
true
;
}
static
bool
CompareTensor
(
const
framework
::
LoDTensor
&
a
,
const
framework
::
LoDTensor
&
b
)
{
if
(
!
CompareLoD
(
a
.
lod
(),
b
.
lod
()))
{
return
false
;
}
if
(
!
CompareShape
(
framework
::
vectorize
(
a
.
dims
()),
framework
::
vectorize
(
b
.
dims
())))
{
return
false
;
}
if
(
!
CompareTensorData
(
a
,
b
))
{
return
false
;
}
return
true
;
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
c8744d11
...
...
@@ -101,6 +101,40 @@ struct PaddleTensor {
std
::
vector
<
std
::
vector
<
size_t
>>
lod
;
// Tensor+LoD equals LoDTensor
};
enum
class
PaddlePlace
{
kUNK
=
-
1
,
kCPU
,
kGPU
};
// Tensor without copy, currently only supports AnalysisPredictor.
class
ZeroCopyTensor
{
public:
void
Reshape
(
const
std
::
vector
<
int
>&
shape
);
// Get the memory in CPU or GPU with specific data type, should Reshape first
// to tell the data size.
// Once can directly call this data to feed the data.
// This is for write the input tensor.
template
<
typename
T
>
T
*
mutable_data
(
PaddlePlace
place
);
// Get the memory directly, will return the place and memory size by pointer.
// This is for reading the output tensor.
template
<
typename
T
>
T
*
data
(
PaddlePlace
*
place
,
int
*
size
);
std
::
vector
<
int64_t
>
shape
();
void
SetLoD
(
const
std
::
vector
<
std
::
vector
<
size_t
>>&
x
);
std
::
vector
<
std
::
vector
<
size_t
>>
lod
()
const
;
protected:
ZeroCopyTensor
(
void
*
scope
)
:
scope_
{
scope
}
{}
void
SetName
(
const
std
::
string
&
name
)
{
name_
=
name
;
}
void
*
FindTensor
()
const
;
private:
std
::
string
name_
;
bool
input_or_output_
;
friend
class
AnalysisPredictor
;
void
*
scope_
{
nullptr
};
};
/*
* A simple Inference API for Paddle.
*/
...
...
@@ -120,6 +154,19 @@ class PaddlePredictor {
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
=
0
;
// Zero copy input and output optimization.
// Get the input or output tensors, and operate on their memory directly,
// without copy.
virtual
std
::
unique_ptr
<
ZeroCopyTensor
>
GetInputTensor
(
const
std
::
string
&
name
)
{
return
nullptr
;
}
virtual
std
::
unique_ptr
<
ZeroCopyTensor
>
GetOutputTensor
(
const
std
::
string
&
name
)
{
return
nullptr
;
}
virtual
bool
ZeroCopyRun
()
{
return
false
;
}
// Clone a predictor that share the model weights, the Cloned predictor should
// be thread-safe.
virtual
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
=
0
;
...
...
@@ -218,7 +265,12 @@ struct AnalysisConfig : public NativeConfig {
IrPassMode
ir_mode
{
IrPassMode
::
kExclude
};
std
::
vector
<
std
::
string
>
ir_passes
;
// NOTE this is just for internal development, please not use it.
// NOT stable yet.
bool
use_feed_fetch_ops
{
true
};
// NOTE this is just for internal development, please not use it. NOT
// stable
// yet.
bool
_use_mkldnn
{
false
};
};
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
c8744d11
...
...
@@ -18,6 +18,8 @@ namespace paddle {
namespace
inference
{
namespace
analysis
{
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
std
::
vector
<
int64_t
>
data
;
std
::
vector
<
size_t
>
lod
;
...
...
@@ -78,6 +80,7 @@ struct DataRecord {
}
}
}
DataRecord
NextBatch
()
{
DataRecord
data
;
data
.
data
=
batched_datas
[
batch_iter
];
...
...
@@ -155,7 +158,9 @@ TEST(Analyzer_LAC, fuse_statis) {
SetConfig
(
&
cfg
);
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
()),
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
c8744d11
...
...
@@ -16,6 +16,7 @@
namespace
paddle
{
namespace
inference
{
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
word_data_all
,
mention_data_all
;
...
...
@@ -145,7 +146,9 @@ TEST(Analyzer_Chinese_ner, fuse_statis) {
SetConfig
(
&
cfg
);
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
()),
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
c8744d11
...
...
@@ -12,12 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
DEFINE_bool
(
with_precision_check
,
true
,
"turn on test"
);
namespace
paddle
{
namespace
inference
{
using
namespace
framework
;
// NOLINT
using
namespace
contrib
;
// NOLINT
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>>>
link_step_data_all
;
...
...
@@ -29,10 +33,12 @@ struct DataRecord {
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
Load
(
path
);
}
DataRecord
NextBatch
()
{
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
...
...
@@ -101,6 +107,7 @@ struct DataRecord {
num_samples
=
num_lines
;
}
};
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
int
batch_size
)
{
PaddleTensor
lod_attention_tensor
,
init_zero_tensor
,
lod_tensor_tensor
,
...
...
@@ -149,7 +156,55 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
void
SetConfig
(
contrib
::
AnalysisConfig
*
cfg
)
{
void
PrepareZeroCopyInputs
(
ZeroCopyTensor
*
lod_attention_tensor
,
ZeroCopyTensor
*
cell_init_tensor
,
ZeroCopyTensor
*
data_tensor
,
ZeroCopyTensor
*
hidden_init_tensor
,
ZeroCopyTensor
*
week_tensor
,
ZeroCopyTensor
*
minute_tensor
,
DataRecord
*
data_record
,
int
batch_size
)
{
auto
one_batch
=
data_record
->
NextBatch
();
std
::
vector
<
int
>
rnn_link_data_shape
(
{
static_cast
<
int
>
(
one_batch
.
rnn_link_data
.
size
()),
static_cast
<
int
>
(
one_batch
.
rnn_link_data
.
front
().
size
())});
lod_attention_tensor
->
Reshape
({
1
,
2
});
lod_attention_tensor
->
SetLoD
({
one_batch
.
lod1
,
one_batch
.
lod2
});
cell_init_tensor
->
Reshape
({
batch_size
,
15
});
cell_init_tensor
->
SetLoD
({
one_batch
.
lod3
});
hidden_init_tensor
->
Reshape
({
batch_size
,
15
});
hidden_init_tensor
->
SetLoD
({
one_batch
.
lod3
});
data_tensor
->
Reshape
(
rnn_link_data_shape
);
data_tensor
->
SetLoD
({
one_batch
.
lod1
});
week_tensor
->
Reshape
(
{
static_cast
<
int
>
(
one_batch
.
rnn_week_datas
.
size
()),
static_cast
<
int
>
(
one_batch
.
rnn_week_datas
.
front
().
size
())});
week_tensor
->
SetLoD
({
one_batch
.
lod3
});
minute_tensor
->
Reshape
(
{
static_cast
<
int
>
(
one_batch
.
rnn_minute_datas
.
size
()),
static_cast
<
int
>
(
one_batch
.
rnn_minute_datas
.
front
().
size
())});
minute_tensor
->
SetLoD
({
one_batch
.
lod3
});
// assign data
float
arr0
[]
=
{
0
,
0
};
std
::
vector
<
float
>
zeros
(
batch_size
*
15
,
0
);
std
::
copy_n
(
arr0
,
2
,
lod_attention_tensor
->
mutable_data
<
float
>
(
PaddlePlace
::
kCPU
));
std
::
copy_n
(
arr0
,
2
,
data_tensor
->
mutable_data
<
float
>
(
PaddlePlace
::
kCPU
));
std
::
copy_n
(
zeros
.
begin
(),
zeros
.
size
(),
cell_init_tensor
->
mutable_data
<
float
>
(
PaddlePlace
::
kCPU
));
std
::
copy_n
(
zeros
.
begin
(),
zeros
.
size
(),
hidden_init_tensor
->
mutable_data
<
float
>
(
PaddlePlace
::
kCPU
));
ZeroCopyTensorAssignData
(
data_tensor
,
one_batch
.
rnn_link_data
);
ZeroCopyTensorAssignData
(
week_tensor
,
one_batch
.
rnn_week_datas
);
ZeroCopyTensorAssignData
(
minute_tensor
,
one_batch
.
rnn_minute_datas
);
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
use_gpu
=
false
;
...
...
@@ -187,7 +242,9 @@ TEST(Analyzer_rnn1, fuse_statis) {
SetConfig
(
&
cfg
);
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
()),
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
...
...
@@ -214,7 +271,229 @@ TEST(Analyzer_rnn1, multi_thread) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
4
/* num_threads */
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
}
bool
CompareTensors
(
framework
::
Scope
&
a_scope
,
framework
::
Scope
&
b_scope
,
const
std
::
vector
<
std
::
string
>
&
tensors
)
{
for
(
auto
&
x
:
tensors
)
{
auto
*
a_var
=
a_scope
.
FindVar
(
x
);
auto
*
b_var
=
b_scope
.
FindVar
(
x
);
if
(
a_var
&&
b_var
)
{
if
(
a_var
->
Type
()
==
typeid
(
framework
::
LoDTensor
)
||
a_var
->
Type
()
==
typeid
(
framework
::
Tensor
))
{
LOG
(
INFO
)
<<
"comparing tensor "
<<
x
;
auto
&
a_t
=
a_var
->
Get
<
framework
::
LoDTensor
>
();
auto
&
b_t
=
b_var
->
Get
<
framework
::
LoDTensor
>
();
if
(
!
inference
::
CompareTensor
(
a_t
,
b_t
))
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor %s not match in two scopes"
,
x
);
}
}
else
{
LOG
(
INFO
)
<<
"skip no tensor "
<<
x
;
}
}
else
{
LOG
(
INFO
)
<<
"skip tensor "
<<
x
;
}
}
return
true
;
}
// Validate that the AnalysisPredictor + ZeroCopyTensor really works by testing
// on the complex RNN1 model.
TEST
(
Analyzer_rnn1
,
ZeroCopy
)
{
AnalysisConfig
config
;
SetConfig
(
&
config
);
config
.
use_feed_fetch_ops
=
false
;
PaddlePlace
place
;
int
output_size
{
0
};
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
config
.
use_feed_fetch_ops
=
true
;
auto
native_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
config
.
use_feed_fetch_ops
=
true
;
// the analysis predictor needs feed/fetch.
auto
analysis_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
#define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__);
NEW_TENSOR
(
data_lod_attention
);
NEW_TENSOR
(
cell_init
);
NEW_TENSOR
(
data
);
NEW_TENSOR
(
week
);
NEW_TENSOR
(
minute
);
NEW_TENSOR
(
hidden_init
);
// Prepare data for AnalysisPredictor
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
PrepareZeroCopyInputs
(
data_lod_attention_tensor
.
get
(),
cell_init_tensor
.
get
(),
data_tensor
.
get
(),
hidden_init_tensor
.
get
(),
week_tensor
.
get
(),
minute_tensor
.
get
(),
&
data
,
FLAGS_batch_size
);
// Prepare data for NativePredictor
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
native_inputs
;
SetInput
(
&
native_inputs
);
std
::
vector
<
PaddleTensor
>
native_outputs
;
std
::
vector
<
PaddleTensor
>
analysis_outputs
;
auto
output_tensor
=
predictor
->
GetOutputTensor
(
"final_output.tmp_1"
);
// Run analysis predictor
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
predictor
.
get
(),
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
ASSERT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
ASSERT_EQ
(
fuse_statis
.
at
(
"seq_concat_fc_fuse"
),
1
);
ASSERT_EQ
(
num_ops
,
13
);
// After graph optimization, only 13 operators exists.
Timer
timer
;
double
total_time
{
0
};
double
native_total_time
{
0
};
double
analysis_total_time
{
0.
};
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
timer
.
tic
();
predictor
->
ZeroCopyRun
();
total_time
+=
timer
.
toc
();
}
auto
*
output_data
=
output_tensor
->
data
<
float
>
(
&
place
,
&
output_size
);
ASSERT_GT
(
output_size
,
0
);
// more than one output!
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
// Run native predictor.
timer
.
tic
();
ASSERT_TRUE
(
native_predictor
->
Run
(
native_inputs
.
front
(),
&
native_outputs
));
native_total_time
+=
timer
.
toc
();
}
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
timer
.
tic
();
ASSERT_TRUE
(
analysis_predictor
->
Run
(
native_inputs
.
front
(),
&
analysis_outputs
));
analysis_total_time
+=
timer
.
toc
();
}
if
(
!
FLAGS_with_precision_check
)
{
return
;
}
int
native_output_size
=
VecReduceToInt
(
native_outputs
.
front
().
shape
);
EXPECT_EQ
(
native_output_size
,
output_size
);
// Compare tensors between analysis and zerocopy
auto
*
p0
=
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
*
p1
=
static_cast
<
AnalysisPredictor
*>
(
analysis_predictor
.
get
());
auto
*
p2
=
static_cast
<
NativePaddlePredictor
*>
(
native_predictor
.
get
());
std
::
vector
<
std
::
string
>
tensor_names
;
for
(
auto
&
var_desc
:
p0
->
program
().
Block
(
0
).
AllVars
())
{
tensor_names
.
push_back
(
var_desc
->
Name
());
}
LOG
(
INFO
)
<<
"Comparing tensors"
;
ASSERT_TRUE
(
CompareTensors
(
*
p0
->
scope
(),
*
p1
->
scope
(),
{
"final_output.tmp_1"
}));
ASSERT_TRUE
(
CompareTensors
(
*
p0
->
scope
(),
*
p2
->
scope
(),
{
"final_output.tmp_1"
}));
LOG
(
INFO
)
<<
"output1 "
<<
inference
::
LoDTensorSummary
<
float
>
(
p0
->
scope
()
->
FindVar
(
"final_output.tmp_1"
)
->
Get
<
framework
::
LoDTensor
>
());
LOG
(
INFO
)
<<
"output2 "
<<
inference
::
LoDTensorSummary
<
float
>
(
p1
->
scope
()
->
FindVar
(
"final_output.tmp_1"
)
->
Get
<
framework
::
LoDTensor
>
());
LOG
(
INFO
)
<<
"output3 "
<<
inference
::
LoDTensorSummary
<
float
>
(
p2
->
scope
()
->
FindVar
(
"final_output.tmp_1"
)
->
Get
<
framework
::
LoDTensor
>
());
for
(
int
i
=
0
;
i
<
output_size
;
i
++
)
{
LOG
(
INFO
)
<<
output_data
[
i
]
<<
" "
<<
static_cast
<
float
*>
(
native_outputs
.
front
().
data
.
data
())[
i
]
<<
" "
<<
static_cast
<
float
*>
(
analysis_outputs
.
front
().
data
.
data
())[
i
];
EXPECT_NEAR
(
output_data
[
i
],
static_cast
<
float
*>
(
native_outputs
.
front
().
data
.
data
())[
i
],
1e-3
);
}
LOG
(
INFO
)
<<
"batch_size: "
<<
FLAGS_batch_size
;
LOG
(
INFO
)
<<
"zero average time: "
<<
total_time
/
(
FLAGS_repeat
*
FLAGS_batch_size
);
LOG
(
INFO
)
<<
"analysis average time: "
<<
analysis_total_time
/
(
FLAGS_repeat
*
FLAGS_batch_size
);
LOG
(
INFO
)
<<
"native average time: "
<<
native_total_time
/
(
FLAGS_repeat
*
FLAGS_batch_size
);
}
TEST
(
Analyzer_rnn1
,
ZeroCopyMultiThread
)
{
AnalysisConfig
config
;
SetConfig
(
&
config
);
config
.
use_feed_fetch_ops
=
false
;
#define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__);
auto
base_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
double
total_time_of_threads
{
0
};
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
));
}
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
threads
.
emplace_back
([
config
,
&
total_time_of_threads
,
&
predictors
,
tid
]
{
// auto predictor = base_predictor->Clone();
auto
&
predictor
=
predictors
[
tid
];
NEW_TENSOR
(
data_lod_attention
);
NEW_TENSOR
(
cell_init
);
NEW_TENSOR
(
data
);
NEW_TENSOR
(
week
);
NEW_TENSOR
(
minute
);
NEW_TENSOR
(
hidden_init
);
// Prepare data for AnalysisPredictor
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
Timer
timer
;
double
total_time
{
0
};
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
PrepareZeroCopyInputs
(
data_lod_attention_tensor
.
get
(),
cell_init_tensor
.
get
(),
data_tensor
.
get
(),
hidden_init_tensor
.
get
(),
week_tensor
.
get
(),
minute_tensor
.
get
(),
&
data
,
FLAGS_batch_size
);
timer
.
tic
();
predictor
->
ZeroCopyRun
();
total_time
+=
timer
.
toc
();
}
total_time_of_threads
+=
total_time
;
LOG
(
INFO
)
<<
"thread time: "
<<
total_time
/
FLAGS_repeat
;
});
}
for
(
auto
&
t
:
threads
)
{
t
.
join
();
}
LOG
(
INFO
)
<<
"average time: "
<<
total_time_of_threads
/
FLAGS_num_threads
/
FLAGS_repeat
;
}
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
c8744d11
...
...
@@ -182,7 +182,8 @@ TEST(Analyzer_seq_conv1, fuse_statis) {
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
GetFuseStatis
(
predictor
.
get
(),
&
num_ops
);
}
// Compare result of NativeConfig and AnalysisConfig
...
...
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
c8744d11
...
...
@@ -19,6 +19,7 @@ limitations under the License. */
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
using
contrib
::
AnalysisConfig
;
struct
Record
{
std
::
vector
<
float
>
data
;
...
...
@@ -114,7 +115,8 @@ TEST(Analyzer_vis, fuse_statis) {
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
int
num_ops
;
GetFuseStatis
(
cfg
,
&
num_ops
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
GetFuseStatis
(
predictor
.
get
(),
&
num_ops
);
}
// Compare result of NativeConfig and AnalysisConfig
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
c8744d11
...
...
@@ -86,11 +86,9 @@ std::unique_ptr<PaddlePredictor> CreateTestPredictor(
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
return
VecReduceToInt
(
out
.
shape
);
}
std
::
unordered_map
<
std
::
string
,
int
>
GetFuseStatis
(
AnalysisConfig
config
,
std
::
unordered_map
<
std
::
string
,
int
>
GetFuseStatis
(
PaddlePredictor
*
predictor
,
int
*
num_ops
)
{
auto
predictor
=
CreateTestPredictor
(
config
);
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
*
analysis_predictor
=
static_cast
<
AnalysisPredictor
*>
(
predictor
);
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
...
...
paddle/fluid/memory/malloc.cc
浏览文件 @
c8744d11
...
...
@@ -36,6 +36,8 @@ namespace memory {
using
BuddyAllocator
=
detail
::
BuddyAllocator
;
BuddyAllocator
*
GetCPUBuddyAllocator
()
{
// We tried thread_local for inference::RNN1 model, but that not works much
// for multi-thread test.
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
*
a
=
nullptr
;
...
...
@@ -48,6 +50,25 @@ BuddyAllocator* GetCPUBuddyAllocator() {
return
a
;
}
// We compared the NaiveAllocator with BuddyAllocator in CPU memory allocation,
// seems they are almost the same overhead.
struct
NaiveAllocator
{
void
*
Alloc
(
size_t
size
)
{
return
malloc
(
size
);
}
void
Free
(
void
*
p
)
{
PADDLE_ENFORCE
(
p
);
free
(
p
);
}
static
NaiveAllocator
*
Instance
()
{
static
NaiveAllocator
x
;
return
&
x
;
}
private:
std
::
mutex
lock_
;
};
template
<
>
void
*
Alloc
<
platform
::
CPUPlace
>
(
platform
::
CPUPlace
place
,
size_t
size
)
{
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
...
...
paddle/fluid/string/pretty_log.h
浏览文件 @
c8744d11
...
...
@@ -56,13 +56,13 @@ struct Style {
};
template
<
typename
...
Args
>
static
void
PrettyLogEndl
(
const
std
::
string
&
style
,
const
char
*
fmt
,
const
Args
&
...
args
)
{
static
void
PrettyLogEndl
(
const
std
::
string
&
style
,
const
char
*
fmt
,
const
Args
&
...
args
)
{
std
::
cerr
<<
style
<<
Sprintf
(
fmt
,
args
...)
<<
reset
()
<<
std
::
endl
;
}
template
<
typename
...
Args
>
static
void
PrettyLog
(
const
std
::
string
&
style
,
const
char
*
fmt
,
const
Args
&
...
args
)
{
static
void
PrettyLog
(
const
std
::
string
&
style
,
const
char
*
fmt
,
const
Args
&
...
args
)
{
std
::
cerr
<<
style
<<
Sprintf
(
fmt
,
args
...)
<<
reset
();
}
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
c8744d11
...
...
@@ -28,7 +28,6 @@ list(REMOVE_ITEM TEST_OPS test_cond_op) # FIXME(qijun): https://github.com/Paddl
list
(
REMOVE_ITEM TEST_OPS op_test
)
# op_test is a helper python file, not a test
list
(
REMOVE_ITEM TEST_OPS decorators
)
# decorators is a helper python file, not a test
if
(
APPLE
)
if
(
NOT WITH_DISTRIBUTE
)
list
(
REMOVE_ITEM TEST_OPS test_desc_clone
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录