Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
0a42986c
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0a42986c
编写于
11月 08, 2020
作者:
W
Wilber
提交者:
GitHub
11月 09, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Cherry-pick. (#28454)
上级
78d68d59
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
308 addition
and
111 deletion
+308
-111
cmake/external/lite.cmake
cmake/external/lite.cmake
+71
-25
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+1
-1
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+4
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+2
-0
paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
.../fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
+10
-4
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+1
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+31
-5
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+11
-0
paddle/fluid/inference/lite/CMakeLists.txt
paddle/fluid/inference/lite/CMakeLists.txt
+1
-1
paddle/fluid/inference/lite/engine.cc
paddle/fluid/inference/lite/engine.cc
+31
-19
paddle/fluid/inference/lite/engine.h
paddle/fluid/inference/lite/engine.h
+16
-11
paddle/fluid/inference/lite/tensor_utils.cc
paddle/fluid/inference/lite/tensor_utils.cc
+72
-26
paddle/fluid/inference/lite/test_engine.cc
paddle/fluid/inference/lite/test_engine.cc
+4
-4
paddle/fluid/inference/lite/test_tensor_utils.cc
paddle/fluid/inference/lite/test_tensor_utils.cc
+33
-4
paddle/fluid/inference/tests/api/lite_resnet50_test.cc
paddle/fluid/inference/tests/api/lite_resnet50_test.cc
+6
-2
paddle/fluid/operators/lite/lite_engine_op.h
paddle/fluid/operators/lite/lite_engine_op.h
+4
-4
paddle/fluid/operators/lite/lite_engine_op_test.cc
paddle/fluid/operators/lite/lite_engine_op_test.cc
+3
-3
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+3
-1
python/setup.py.in
python/setup.py.in
+4
-0
未找到文件。
cmake/external/lite.cmake
浏览文件 @
0a42986c
...
@@ -12,8 +12,8 @@
...
@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
if
(
NOT LINUX
OR NOT WITH_MKL
)
if
(
NOT LINUX
)
message
(
"Paddle-lite will not build because the required Linux
and MKL
do not exist."
)
message
(
"Paddle-lite will not build because the required Linux do not exist."
)
set
(
WITH_LITE OFF
)
set
(
WITH_LITE OFF
)
return
()
return
()
endif
()
endif
()
...
@@ -22,9 +22,11 @@ if(XPU_SDK_ROOT)
...
@@ -22,9 +22,11 @@ if(XPU_SDK_ROOT)
set
(
LITE_WITH_XPU ON
)
set
(
LITE_WITH_XPU ON
)
include_directories
(
"
${
XPU_SDK_ROOT
}
/XTDK/include"
)
include_directories
(
"
${
XPU_SDK_ROOT
}
/XTDK/include"
)
include_directories
(
"
${
XPU_SDK_ROOT
}
/XTCL/include"
)
include_directories
(
"
${
XPU_SDK_ROOT
}
/XTCL/include"
)
add_definitions
(
-D
PADDLE
_WITH_XPU
)
add_definitions
(
-D
LITE_SUBGRAPH
_WITH_XPU
)
LINK_DIRECTORIES
(
"
${
XPU_SDK_ROOT
}
/XTDK/shlib/"
)
LINK_DIRECTORIES
(
"
${
XPU_SDK_ROOT
}
/XTDK/shlib/"
)
LINK_DIRECTORIES
(
"
${
XPU_SDK_ROOT
}
/XTDK/runtime/shlib/"
)
LINK_DIRECTORIES
(
"
${
XPU_SDK_ROOT
}
/XTDK/runtime/shlib/"
)
set
(
XPURT_LIB
${
XPU_SDK_ROOT
}
/XTDK/runtime/shlib/libxpurt.so
)
set
(
XPUAPI_LIB
${
XPU_SDK_ROOT
}
/XTDK/shlib/libxpuapi.so
)
endif
()
endif
()
if
(
NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR
)
if
(
NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR
)
...
@@ -42,30 +44,30 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
...
@@ -42,30 +44,30 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
endif
()
endif
()
# No quotes, so cmake can resolve it as a command with arguments.
# No quotes, so cmake can resolve it as a command with arguments.
set
(
LITE_BUILD_COMMAND $
(
MAKE
)
publish_inference -j
)
if
(
WITH_ARM
)
set
(
LITE_OPTIONAL_ARGS -DWITH_MKL=ON
set
(
LITE_BUILD_COMMAND $
(
MAKE
)
publish_inference -j
)
-DLITE_WITH_CUDA=
${
WITH_GPU
}
message
(
WARNING
"BUILD_COMMAND:
${
LITE_BUILD_COMMAND
}
"
)
-DWITH_MKLDNN=OFF
set
(
LITE_OPTIONAL_ARGS -DWITH_MKL=OFF
-DLITE_WITH_X86=ON
-DLITE_WITH_CUDA=OFF
-DLITE_WITH_PROFILE=OFF
-DWITH_MKLDNN=OFF
-DWITH_LITE=OFF
-DLITE_WITH_X86=OFF
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON
-DWITH_PYTHON=OFF
-DLITE_WITH_PROFILE=OFF
-DWITH_TESTING=OFF
-DARM_TARGET_OS=armlinux
-DLITE_BUILD_EXTRA=ON
-DWITH_LITE=ON
-DCUDNN_ROOT=
${
CUDNN_ROOT
}
-DWITH_PYTHON=OFF
-DLITE_WITH_STATIC_CUDA=OFF
-DWITH_TESTING=OFF
-DCUDA_ARCH_NAME=
${
CUDA_ARCH_NAME
}
-DLITE_BUILD_EXTRA=ON
-DLITE_WITH_XPU=
${
LITE_WITH_XPU
}
-DLITE_WITH_XPU=
${
LITE_WITH_XPU
}
-DXPU_SDK_ROOT=
${
XPU_SDK_ROOT
}
-DXPU_SDK_ROOT=
${
XPU_SDK_ROOT
}
-DLITE_WITH_ARM=OFF
)
-DLITE_WITH_ARM=ON
)
ExternalProject_Add
(
ExternalProject_Add
(
${
LITE_PROJECT
}
${
LITE_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/PaddlePaddle/Paddle-Lite.git"
GIT_REPOSITORY
"https://github.com/PaddlePaddle/Paddle-Lite.git"
GIT_TAG
${
LITE_GIT_TAG
}
GIT_TAG
${
LITE_GIT_TAG
}
PREFIX
${
LITE_SOURCES_DIR
}
PREFIX
${
LITE_SOURCES_DIR
}
PATCH_COMMAND mkdir -p
${
LITE_SOURCES_DIR
}
/src/extern_lite-build/lite/gen_code && touch
${
LITE_SOURCES_DIR
}
/src/extern_lite-build/lite/gen_code/__generated_code__.cc
UPDATE_COMMAND
""
UPDATE_COMMAND
""
BUILD_COMMAND
${
LITE_BUILD_COMMAND
}
BUILD_COMMAND
${
LITE_BUILD_COMMAND
}
INSTALL_COMMAND
""
INSTALL_COMMAND
""
...
@@ -81,7 +83,51 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
...
@@ -81,7 +83,51 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DCMAKE_BUILD_TYPE=
${
THIRD_PARTY_BUILD_TYPE
}
-DCMAKE_BUILD_TYPE=
${
THIRD_PARTY_BUILD_TYPE
}
${
EXTERNAL_OPTIONAL_ARGS
}
${
EXTERNAL_OPTIONAL_ARGS
}
${
LITE_OPTIONAL_ARGS
}
${
LITE_OPTIONAL_ARGS
}
)
)
set
(
LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8
)
else
()
set
(
LITE_BUILD_COMMAND $
(
MAKE
)
publish_inference -j
)
set
(
LITE_OUTPUT_BIN_DIR inference_lite_lib
)
set
(
LITE_OPTIONAL_ARGS -DWITH_MKL=ON
-DLITE_WITH_CUDA=
${
WITH_GPU
}
-DWITH_MKLDNN=OFF
-DLITE_WITH_X86=ON
-DLITE_WITH_PROFILE=OFF
-DWITH_LITE=OFF
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF
-DWITH_PYTHON=OFF
-DWITH_TESTING=OFF
-DLITE_BUILD_EXTRA=ON
-DCUDNN_ROOT=
${
CUDNN_ROOT
}
-DLITE_WITH_STATIC_CUDA=OFF
-DCUDA_ARCH_NAME=
${
CUDA_ARCH_NAME
}
-DLITE_WITH_XPU=
${
LITE_WITH_XPU
}
-DXPU_SDK_ROOT=
${
XPU_SDK_ROOT
}
-DLITE_WITH_ARM=OFF
)
ExternalProject_Add
(
${
LITE_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/PaddlePaddle/Paddle-Lite.git"
GIT_TAG
${
LITE_GIT_TAG
}
PREFIX
${
LITE_SOURCES_DIR
}
UPDATE_COMMAND
""
BUILD_COMMAND
${
LITE_BUILD_COMMAND
}
INSTALL_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
LITE_CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=
${
THIRD_PARTY_BUILD_TYPE
}
${
EXTERNAL_OPTIONAL_ARGS
}
${
LITE_OPTIONAL_ARGS
}
)
endif
()
ExternalProject_Get_property
(
${
LITE_PROJECT
}
BINARY_DIR
)
ExternalProject_Get_property
(
${
LITE_PROJECT
}
BINARY_DIR
)
ExternalProject_Get_property
(
${
LITE_PROJECT
}
SOURCE_DIR
)
ExternalProject_Get_property
(
${
LITE_PROJECT
}
SOURCE_DIR
)
set
(
LITE_BINARY_DIR
${
BINARY_DIR
}
)
set
(
LITE_BINARY_DIR
${
BINARY_DIR
}
)
...
@@ -103,8 +149,8 @@ function(external_lite_libs alias path)
...
@@ -103,8 +149,8 @@ function(external_lite_libs alias path)
endif
()
endif
()
endfunction
()
endfunction
()
external_lite_libs
(
lite_full_static
${
LITE_BINARY_DIR
}
/
inference_lite_lib
/cxx/lib/libpaddle_full_api_shared.so
)
external_lite_libs
(
lite_full_static
${
LITE_BINARY_DIR
}
/
${
LITE_OUTPUT_BIN_DIR
}
/cxx/lib/libpaddle_full_api_shared.so
)
set
(
LITE_SHARED_LIB
${
LITE_BINARY_DIR
}
/
inference_lite_lib
/cxx/lib/libpaddle_full_api_shared.so
)
set
(
LITE_SHARED_LIB
${
LITE_BINARY_DIR
}
/
${
LITE_OUTPUT_BIN_DIR
}
/cxx/lib/libpaddle_full_api_shared.so
)
add_definitions
(
-DPADDLE_WITH_LITE
)
add_definitions
(
-DPADDLE_WITH_LITE
)
add_definitions
(
-DLITE_WITH_LOG
)
add_definitions
(
-DLITE_WITH_LOG
)
cmake/inference_lib.cmake
浏览文件 @
0a42986c
...
@@ -125,7 +125,7 @@ function(copy_part_of_thrid_party TARGET DST)
...
@@ -125,7 +125,7 @@ function(copy_part_of_thrid_party TARGET DST)
if
(
LITE_BINARY_DIR
)
if
(
LITE_BINARY_DIR
)
set
(
dst_dir
"
${
DST
}
/third_party/install/lite"
)
set
(
dst_dir
"
${
DST
}
/third_party/install/lite"
)
copy
(
${
TARGET
}
copy
(
${
TARGET
}
SRCS
${
LITE_BINARY_DIR
}
/
inference_lite_lib
/*
SRCS
${
LITE_BINARY_DIR
}
/
${
LITE_OUTPUT_BIN_DIR
}
/*
DSTS
${
dst_dir
}
)
DSTS
${
dst_dir
}
)
endif
()
endif
()
endfunction
()
endfunction
()
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
0a42986c
...
@@ -219,6 +219,10 @@ struct Argument {
...
@@ -219,6 +219,10 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
fusion_statis
,
FusionStatis
,
fusion_statis_t
);
DECL_ARGUMENT_FIELD
(
fusion_statis
,
FusionStatis
,
fusion_statis_t
);
// Only used in paddle-lite subgraph.
DECL_ARGUMENT_FIELD
(
cpu_math_library_num_threads
,
CpuMathLibraryNumThreads
,
int
);
private:
private:
std
::
unordered_set
<
std
::
string
>
valid_fields_
;
std
::
unordered_set
<
std
::
string
>
valid_fields_
;
};
};
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
0a42986c
...
@@ -151,6 +151,8 @@ void IRPassManager::CreatePasses(Argument *argument,
...
@@ -151,6 +151,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"use_xpu"
,
new
bool
(
argument
->
use_xpu
()));
pass
->
Set
(
"use_xpu"
,
new
bool
(
argument
->
use_xpu
()));
pass
->
Set
(
"xpu_l3_workspace_size"
,
pass
->
Set
(
"xpu_l3_workspace_size"
,
new
int
(
argument
->
xpu_l3_workspace_size
()));
new
int
(
argument
->
xpu_l3_workspace_size
()));
pass
->
Set
(
"cpu_math_library_num_threads"
,
new
int
(
argument
->
cpu_math_library_num_threads
()));
}
}
disable_logs_
=
argument
->
disable_logs
();
disable_logs_
=
argument
->
disable_logs
();
if
(
pass_name
==
"fc_fuse_pass"
)
{
if
(
pass_name
==
"fc_fuse_pass"
)
{
...
...
paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
浏览文件 @
0a42986c
...
@@ -244,6 +244,7 @@ void LiteSubgraphPass::SetUpEngine(
...
@@ -244,6 +244,7 @@ void LiteSubgraphPass::SetUpEngine(
bool
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
bool
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
bool
use_xpu
=
Get
<
bool
>
(
"use_xpu"
);
bool
use_xpu
=
Get
<
bool
>
(
"use_xpu"
);
int
xpu_l3_workspace_size
=
Get
<
int
>
(
"xpu_l3_workspace_size"
);
int
xpu_l3_workspace_size
=
Get
<
int
>
(
"xpu_l3_workspace_size"
);
int
cpu_math_library_num_threads
=
Get
<
int
>
(
"cpu_math_library_num_threads"
);
lite_api
::
TargetType
target_type
;
lite_api
::
TargetType
target_type
;
if
(
use_gpu
)
{
if
(
use_gpu
)
{
...
@@ -251,7 +252,11 @@ void LiteSubgraphPass::SetUpEngine(
...
@@ -251,7 +252,11 @@ void LiteSubgraphPass::SetUpEngine(
}
else
if
(
use_xpu
)
{
}
else
if
(
use_xpu
)
{
target_type
=
TARGET
(
kXPU
);
target_type
=
TARGET
(
kXPU
);
}
else
{
}
else
{
#ifdef PADDLE_WITH_ARM
target_type
=
TARGET
(
kARM
);
#else
target_type
=
TARGET
(
kX86
);
target_type
=
TARGET
(
kX86
);
#endif
}
}
paddle
::
lite_api
::
PrecisionType
precision_type
=
paddle
::
lite_api
::
PrecisionType
precision_type
=
...
@@ -263,11 +268,12 @@ void LiteSubgraphPass::SetUpEngine(
...
@@ -263,11 +268,12 @@ void LiteSubgraphPass::SetUpEngine(
// Notice: The ordering here determines the device where the
// Notice: The ordering here determines the device where the
// input tensor of the Lite engine is located, and then affects
// input tensor of the Lite engine is located, and then affects
// whether tensor sharing is feasible.
// whether tensor sharing is feasible.
paddle
::
lite
::
Place
({
target_type
,
precision_type
}),
paddle
::
lite
_api
::
Place
({
target_type
,
precision_type
}),
paddle
::
lite
::
Place
({
target_type
,
PRECISION
(
kInt64
)}),
paddle
::
lite
_api
::
Place
({
target_type
,
PRECISION
(
kInt64
)}),
paddle
::
lite
::
Place
({
target_type
,
PRECISION
(
kFloat
)}),
paddle
::
lite
_api
::
Place
({
target_type
,
PRECISION
(
kFloat
)}),
paddle
::
lite
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kFloat
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kFloat
)}),
};
};
config
.
cpu_math_library_num_threads
=
cpu_math_library_num_threads
;
config
.
xpu_l3_workspace_size
=
xpu_l3_workspace_size
;
config
.
xpu_l3_workspace_size
=
xpu_l3_workspace_size
;
if
(
dump_model
)
{
if
(
dump_model
)
{
lite
::
StrToBinaryFile
(
"./model.bin"
,
config
.
model
);
lite
::
StrToBinaryFile
(
"./model.bin"
,
config
.
model
);
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
0a42986c
...
@@ -356,7 +356,7 @@ void AnalysisConfig::Update() {
...
@@ -356,7 +356,7 @@ void AnalysisConfig::Update() {
}
}
if
(
use_xpu_
)
{
if
(
use_xpu_
)
{
#ifndef
PADDLE
_WITH_XPU
#ifndef
LITE_SUBGRAPH
_WITH_XPU
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"You tried to use an XPU device, but Paddle was not compiled "
"You tried to use an XPU device, but Paddle was not compiled "
"with XPU-runtime."
));
"with XPU-runtime."
));
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
0a42986c
...
@@ -232,8 +232,17 @@ bool AnalysisPredictor::PrepareExecutor() {
...
@@ -232,8 +232,17 @@ bool AnalysisPredictor::PrepareExecutor() {
void
AnalysisPredictor
::
MkldnnPreSet
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
)
{
void
AnalysisPredictor
::
MkldnnPreSet
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
)
{
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
VLOG
(
2
)
<<
"AnalysisPredictor::Run get_cur_mkldnn_session_id="
std
::
vector
<
std
::
vector
<
int
>>
inputs_shape
;
<<
platform
::
get_cur_mkldnn_session_id
();
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
inputs_shape
.
emplace_back
(
inputs
[
i
].
shape
);
}
MkldnnPreSet
(
inputs_shape
);
#endif
}
void
AnalysisPredictor
::
MkldnnPreSet
(
const
std
::
vector
<
std
::
vector
<
int
>>
&
inputs_shape
)
{
#ifdef PADDLE_WITH_MKLDNN
// In cache clearing mode.
// In cache clearing mode.
if
(
config_
.
mkldnn_cache_capacity_
>
0
)
{
if
(
config_
.
mkldnn_cache_capacity_
>
0
)
{
VLOG
(
2
)
<<
"In mkldnn cache clear mode."
;
VLOG
(
2
)
<<
"In mkldnn cache clear mode."
;
...
@@ -243,9 +252,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
...
@@ -243,9 +252,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
config_
.
mkldnn_cache_capacity_
);
config_
.
mkldnn_cache_capacity_
);
// Set current_input_shape for caching dynamic shape.
// Set current_input_shape for caching dynamic shape.
std
::
stringstream
ss
;
std
::
stringstream
ss
;
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
inputs
_shape
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
inputs
[
i
].
shape
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
inputs
_shape
[
i
]
.
size
();
++
j
)
{
ss
<<
inputs
[
i
].
shape
[
j
]
<<
"-"
;
ss
<<
inputs
_shape
[
i
]
[
j
]
<<
"-"
;
}
}
}
}
VLOG
(
2
)
<<
"Set input shape="
<<
ss
.
str
();
VLOG
(
2
)
<<
"Set input shape="
<<
ss
.
str
();
...
@@ -445,6 +454,8 @@ void AnalysisPredictor::PrepareArgument() {
...
@@ -445,6 +454,8 @@ void AnalysisPredictor::PrepareArgument() {
}
}
if
(
config_
.
lite_engine_enabled
())
{
if
(
config_
.
lite_engine_enabled
())
{
argument_
.
SetCpuMathLibraryNumThreads
(
config_
.
cpu_math_library_num_threads
());
argument_
.
SetLitePrecisionMode
(
config_
.
lite_precision_mode_
);
argument_
.
SetLitePrecisionMode
(
config_
.
lite_precision_mode_
);
argument_
.
SetLitePassesFilter
(
config_
.
lite_passes_filter_
);
argument_
.
SetLitePassesFilter
(
config_
.
lite_passes_filter_
);
argument_
.
SetLiteOpsFilter
(
config_
.
lite_ops_filter_
);
argument_
.
SetLiteOpsFilter
(
config_
.
lite_ops_filter_
);
...
@@ -656,6 +667,18 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
...
@@ -656,6 +667,18 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
bool
AnalysisPredictor
::
ZeroCopyRun
()
{
bool
AnalysisPredictor
::
ZeroCopyRun
()
{
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
#ifdef PADDLE_WITH_MKLDNN
if
(
config_
.
use_mkldnn_
)
{
std
::
vector
<
std
::
vector
<
int
>>
shape_vector
;
auto
names
=
GetInputNames
();
for
(
size_t
i
=
0
;
i
<
names
.
size
();
++
i
)
{
auto
in_tensor
=
GetInputTensor
(
names
[
i
]);
shape_vector
.
emplace_back
(
in_tensor
->
shape
());
}
MkldnnPreSet
(
shape_vector
);
}
#endif
executor_
->
Run
();
executor_
->
Run
();
// Fix TensorArray reuse not cleaned bug.
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_
.
CollectTensorArrays
(
sub_scope_
);
tensor_array_batch_cleaner_
.
CollectTensorArrays
(
sub_scope_
);
...
@@ -664,6 +687,9 @@ bool AnalysisPredictor::ZeroCopyRun() {
...
@@ -664,6 +687,9 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
// conflict when integrating it into deployment service.
paddle
::
platform
::
SetNumThreads
(
1
);
paddle
::
platform
::
SetNumThreads
(
1
);
#ifdef PADDLE_WITH_MKLDNN
if
(
config_
.
use_mkldnn_
)
MkldnnPostReset
();
#endif
#if defined(PADDLE_WITH_MKLML) && defined(_LINUX)
#if defined(PADDLE_WITH_MKLML) && defined(_LINUX)
// Frees unused memory allocated by the Intel® MKL Memory Allocator to
// Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See:
// avoid memory leak. See:
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
0a42986c
...
@@ -311,6 +311,17 @@ class AnalysisPredictor : public PaddlePredictor {
...
@@ -311,6 +311,17 @@ class AnalysisPredictor : public PaddlePredictor {
/// \param[in] inputs tensors
/// \param[in] inputs tensors
///
///
void
MkldnnPreSet
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
);
void
MkldnnPreSet
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
);
///
/// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
///
/// Used in AnalysisPredictor::Run(), do not support
/// AnalysisPredictor::ZeroCopyRun() now.
///
/// \param[in] inputs tensor shape
///
void
MkldnnPreSet
(
const
std
::
vector
<
std
::
vector
<
int
>>
&
inputs_shape
);
///
///
/// \brief PostReset for Mkldnn multi-thread and dynamic shape input.
/// \brief PostReset for Mkldnn multi-thread and dynamic shape input.
///
///
...
...
paddle/fluid/inference/lite/CMakeLists.txt
浏览文件 @
0a42986c
...
@@ -4,6 +4,6 @@ endif()
...
@@ -4,6 +4,6 @@ endif()
cc_library
(
lite_op_teller SRCS op_teller.cc DEPS lite_full_static framework_proto device_context boost xxhash
)
cc_library
(
lite_op_teller SRCS op_teller.cc DEPS lite_full_static framework_proto device_context boost xxhash
)
cc_library
(
lite_engine SRCS engine.cc DEPS lite_full_static framework_proto
${
XPU_DEPS
}
)
cc_library
(
lite_engine SRCS engine.cc DEPS lite_full_static framework_proto
${
XPU_DEPS
}
)
cc_library
(
lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost device_context
)
cc_library
(
lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost device_context
${
XPU_DEPS
}
)
cc_test
(
test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf framework_proto glog gtest analysis
)
cc_test
(
test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf framework_proto glog gtest analysis
)
cc_test
(
test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils
)
cc_test
(
test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils
)
paddle/fluid/inference/lite/engine.cc
浏览文件 @
0a42986c
...
@@ -16,12 +16,16 @@
...
@@ -16,12 +16,16 @@
#define LITE_WITH_CUDA 1
#define LITE_WITH_CUDA 1
#endif
#endif
#ifdef
PADDLE
_WITH_XPU
#ifdef
LITE_SUBGRAPH
_WITH_XPU
#define LITE_WITH_XPU 1
#define LITE_WITH_XPU 1
#endif
#endif
#ifndef PADDLE_WITH_ARM
#define LITE_WITH_X86 1
#endif
#include "paddle/fluid/inference/lite/engine.h"
#include "paddle/fluid/inference/lite/engine.h"
#include
"lite/api/paddle_use_passes.h"
#include
<utility>
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -36,32 +40,40 @@ bool EngineManager::Has(const std::string& name) const {
...
@@ -36,32 +40,40 @@ bool EngineManager::Has(const std::string& name) const {
return
engines_
.
at
(
name
).
get
()
!=
nullptr
;
return
engines_
.
at
(
name
).
get
()
!=
nullptr
;
}
}
paddle
::
lite
::
Predictor
*
EngineManager
::
Get
(
const
std
::
string
&
name
)
const
{
paddle
::
lite_api
::
PaddlePredictor
*
EngineManager
::
Get
(
const
std
::
string
&
name
)
const
{
return
engines_
.
at
(
name
).
get
();
return
engines_
.
at
(
name
).
get
();
}
}
paddle
::
lite
::
Predictor
*
EngineManager
::
Create
(
const
std
::
string
&
name
,
paddle
::
lite_api
::
PaddlePredictor
*
EngineManager
::
Create
(
const
EngineConfig
&
cfg
)
{
const
std
::
string
&
name
,
const
EngineConfig
&
cfg
)
{
if
(
cfg
.
valid_places
.
front
().
target
==
TARGET
(
kCUDA
))
{
// config info for predictor.
#ifdef PADDLE_WITH_CUDA
paddle
::
lite_api
::
CxxConfig
lite_cxx_config
;
paddle
::
lite
::
Env
<
TARGET
(
kCUDA
)
>::
Init
();
lite_cxx_config
.
set_model_buffer
(
cfg
.
model
.
c_str
(),
cfg
.
model
.
size
(),
cfg
.
param
.
c_str
(),
cfg
.
param
.
size
());
lite_cxx_config
.
set_valid_places
(
cfg
.
valid_places
);
#ifdef PADDLE_WITH_ARM
set_threads
.
set_threads
(
cfg
.
cpu_math_library_num_threads
);
#else
lite_cxx_config
.
set_x86_math_library_num_threads
(
cfg
.
cpu_math_library_num_threads
);
#endif
#endif
}
else
if
(
cfg
.
valid_places
.
front
().
target
==
TARGET
(
kXPU
))
{
#ifdef
PADDLE
_WITH_XPU
#ifdef
LITE_SUBGRAPH
_WITH_XPU
paddle
::
lite
::
TargetWrapper
<
TARGET
(
kXPU
)
>::
workspace_l3_size_per_thread
=
lite_cxx_config
.
set_xpu_workspace_l3_size_per_thread
(
cfg
.
xpu_l3_workspace_size
;
cfg
.
xpu_l3_workspace_size
)
;
#endif
#endif
}
auto
*
p
=
new
paddle
::
lite
::
Predictor
();
// create predictor
p
->
Build
(
""
,
cfg
.
model
,
cfg
.
param
,
cfg
.
valid_places
,
cfg
.
neglected_passes
,
std
::
shared_ptr
<
paddle
::
lite_api
::
PaddlePredictor
>
p
=
cfg
.
model_type
,
cfg
.
model_from_memory
);
paddle
::
lite_api
::
CreatePaddlePredictor
(
lite_cxx_config
);
engines_
[
name
]
.
reset
(
p
);
engines_
[
name
]
=
std
::
move
(
p
);
return
p
;
return
engines_
[
name
].
get
()
;
}
}
void
EngineManager
::
DeleteAll
()
{
void
EngineManager
::
DeleteAll
()
{
for
(
auto
&
item
:
engines_
)
{
for
(
auto
&
item
:
engines_
)
{
item
.
second
.
reset
(
nullptr
);
item
.
second
.
reset
();
}
}
}
}
...
...
paddle/fluid/inference/lite/engine.h
浏览文件 @
0a42986c
...
@@ -23,12 +23,9 @@
...
@@ -23,12 +23,9 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wall"
#pragma GCC diagnostic ignored "-Wall"
#include "lite/api/cxx_api.h"
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_place.h"
#include "lite/api/paddle_place.h"
#include "lite/core/context.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/core/device_info.h"
#include "lite/core/memory.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#pragma GCC diagnostic pop
#pragma GCC diagnostic pop
namespace
paddle
{
namespace
paddle
{
...
@@ -38,25 +35,33 @@ namespace lite {
...
@@ -38,25 +35,33 @@ namespace lite {
struct
EngineConfig
{
struct
EngineConfig
{
std
::
string
model
;
std
::
string
model
;
std
::
string
param
;
std
::
string
param
;
paddle
::
lite
::
Place
prefer_place
;
std
::
vector
<
paddle
::
lite_api
::
Place
>
valid_places
;
std
::
vector
<
paddle
::
lite
::
Place
>
valid_places
;
std
::
vector
<
std
::
string
>
neglected_passes
;
std
::
vector
<
std
::
string
>
neglected_passes
;
lite_api
::
LiteModelType
model_type
{
lite_api
::
LiteModelType
::
kProtobuf
};
lite_api
::
LiteModelType
model_type
{
lite_api
::
LiteModelType
::
kProtobuf
};
bool
model_from_memory
{
true
};
bool
model_from_memory
{
true
};
// for xpu
size_t
xpu_l3_workspace_size
;
size_t
xpu_l3_workspace_size
;
// for x86 or arm
int
cpu_math_library_num_threads
{
1
};
// for cuda
bool
use_multi_stream
{
false
};
};
};
class
EngineManager
{
class
EngineManager
{
public:
public:
bool
Empty
()
const
;
bool
Empty
()
const
;
bool
Has
(
const
std
::
string
&
name
)
const
;
bool
Has
(
const
std
::
string
&
name
)
const
;
paddle
::
lite
::
Predictor
*
Get
(
const
std
::
string
&
name
)
const
;
paddle
::
lite
_api
::
Paddle
Predictor
*
Get
(
const
std
::
string
&
name
)
const
;
paddle
::
lite
::
Predictor
*
Create
(
const
std
::
string
&
name
,
paddle
::
lite
_api
::
Paddle
Predictor
*
Create
(
const
std
::
string
&
name
,
const
EngineConfig
&
cfg
);
const
EngineConfig
&
cfg
);
void
DeleteAll
();
void
DeleteAll
();
private:
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
paddle
::
lite
::
Predictor
>>
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
paddle
::
lite_api
::
PaddlePredictor
>>
engines_
;
engines_
;
};
};
...
...
paddle/fluid/inference/lite/tensor_utils.cc
浏览文件 @
0a42986c
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/lite/tensor_utils.h"
#include "paddle/fluid/inference/lite/tensor_utils.h"
#include <functional>
#include <map>
#include <map>
#include <memory>
#include <memory>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type.h"
...
@@ -45,6 +46,7 @@ platform::Place GetNativePlace(const TargetType& type, int id = 0) {
...
@@ -45,6 +46,7 @@ platform::Place GetNativePlace(const TargetType& type, int id = 0) {
switch
(
type
)
{
switch
(
type
)
{
case
TargetType
::
kHost
:
case
TargetType
::
kHost
:
case
TargetType
::
kX86
:
case
TargetType
::
kX86
:
case
TargetType
::
kARM
:
return
platform
::
CPUPlace
();
return
platform
::
CPUPlace
();
case
TargetType
::
kCUDA
:
case
TargetType
::
kCUDA
:
return
platform
::
CUDAPlace
(
id
);
return
platform
::
CUDAPlace
(
id
);
...
@@ -134,16 +136,55 @@ void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data,
...
@@ -134,16 +136,55 @@ void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data,
}
}
}
}
void
InitDstTensor
(
paddle
::
lite
::
Tensor
*
dst
,
const
framework
::
LoDTensor
&
src
)
{
void
*
GetLiteTensorDataPtr
(
paddle
::
lite_api
::
Tensor
*
src
,
PrecisionType
precision_type
,
TargetType
target_type
)
{
void
*
res
{
nullptr
};
switch
(
precision_type
)
{
case
PrecisionType
::
kFloat
:
res
=
static_cast
<
void
*>
(
src
->
mutable_data
<
float
>
(
target_type
));
break
;
case
PrecisionType
::
kInt8
:
res
=
static_cast
<
void
*>
(
src
->
mutable_data
<
int8_t
>
(
target_type
));
break
;
case
PrecisionType
::
kInt32
:
res
=
static_cast
<
void
*>
(
src
->
mutable_data
<
int32_t
>
(
target_type
));
break
;
case
PrecisionType
::
kInt64
:
res
=
static_cast
<
void
*>
(
src
->
mutable_data
<
int64_t
>
(
target_type
));
break
;
default:
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Unsupported precision type. Now only supports FP32, INT8, INT32 and "
"INT64."
));
break
;
}
return
res
;
}
int64_t
GetLiteTensorNumel
(
const
paddle
::
lite_api
::
Tensor
&
tensor
)
{
auto
shape
=
tensor
.
shape
();
int64_t
numel
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
int64_t
>
());
return
numel
;
}
void
InitDstTensor
(
paddle
::
lite_api
::
Tensor
*
dst
,
const
framework
::
LoDTensor
&
src
)
{
// Currently, Lite needs to explicitly specify the target type of
// Currently, Lite needs to explicitly specify the target type of
// the input tensor.
// the input tensor.
constexpr
int
empty_size
=
0
;
constexpr
int
empty_size
=
0
;
dst
->
mutable_data
(
GetLiteTargetType
(
src
.
place
()),
empty_size
);
dst
->
Resize
({
empty_size
});
dst
->
set_precision
(
GetLitePrecisionType
(
src
.
type
()));
GetLiteTensorDataPtr
(
dst
,
GetLitePrecisionType
(
src
.
type
()),
SetLoD
(
dst
->
mutable_lod
(),
src
.
lod
());
GetLiteTargetType
(
src
.
place
()));
dst
->
SetPrecision
(
GetLitePrecisionType
(
src
.
type
()));
paddle
::
lite
::
LoD
lite_lod
;
SetLoD
(
&
lite_lod
,
src
.
lod
());
dst
->
SetLoD
(
lite_lod
);
}
}
void
InitDstTensor
(
framework
::
LoDTensor
*
dst
,
const
paddle
::
lite
::
Tensor
&
src
)
{
void
InitDstTensor
(
framework
::
LoDTensor
*
dst
,
const
paddle
::
lite_api
::
Tensor
&
src
)
{
constexpr
framework
::
proto
::
VarType
::
Type
dtype
=
constexpr
framework
::
proto
::
VarType
::
Type
dtype
=
framework
::
proto
::
VarType_Type_FP32
;
framework
::
proto
::
VarType_Type_FP32
;
dst
->
mutable_data
(
inference
::
lite
::
utils
::
GetNativePlace
(
src
.
target
()),
dst
->
mutable_data
(
inference
::
lite
::
utils
::
GetNativePlace
(
src
.
target
()),
...
@@ -152,7 +193,8 @@ void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
...
@@ -152,7 +193,8 @@ void InitDstTensor(framework::LoDTensor* dst, const paddle::lite::Tensor& src) {
}
}
template
<
>
template
<
>
void
TensorCopyAsync
(
paddle
::
lite
::
Tensor
*
dst
,
const
framework
::
LoDTensor
&
src
,
void
TensorCopyAsync
(
paddle
::
lite_api
::
Tensor
*
dst
,
const
framework
::
LoDTensor
&
src
,
const
platform
::
DeviceContext
&
ctx
)
{
const
platform
::
DeviceContext
&
ctx
)
{
InitDstTensor
(
dst
,
src
);
InitDstTensor
(
dst
,
src
);
const
platform
::
Place
&
src_place
=
src
.
place
();
const
platform
::
Place
&
src_place
=
src
.
place
();
...
@@ -161,52 +203,56 @@ void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src,
...
@@ -161,52 +203,56 @@ void TensorCopyAsync(paddle::lite::Tensor* dst, const framework::LoDTensor& src,
static_cast
<
size_t
>
(
src
.
numel
())
*
framework
::
SizeOfType
(
src
.
type
());
static_cast
<
size_t
>
(
src
.
numel
())
*
framework
::
SizeOfType
(
src
.
type
());
dst
->
Resize
(
framework
::
vectorize
(
src
.
dims
()));
dst
->
Resize
(
framework
::
vectorize
(
src
.
dims
()));
const
void
*
src_data
=
src
.
data
<
void
>
();
const
void
*
src_data
=
src
.
data
<
void
>
();
void
*
dst_data
=
dst
->
mutable_data
(
bytes
);
void
*
dst_data
{
nullptr
};
dst_data
=
GetLiteTensorDataPtr
(
dst
,
GetLitePrecisionType
(
src
.
type
()),
GetLiteTargetType
(
src
.
place
()));
VLOG
(
3
)
<<
"[CopyAsync fluid -> lite] Bytes = "
<<
bytes
<<
", src = "
<<
&
src
VLOG
(
3
)
<<
"[CopyAsync fluid -> lite] Bytes = "
<<
bytes
<<
", src = "
<<
&
src
<<
", dst = "
<<
dst
<<
", src_type = "
<<
src
.
type
();
<<
", dst = "
<<
dst
<<
", src_type = "
<<
src
.
type
();
MemoryCopyAsync
(
dst_place
,
dst_data
,
src_place
,
src_data
,
bytes
,
ctx
);
MemoryCopyAsync
(
dst_place
,
dst_data
,
src_place
,
src_data
,
bytes
,
ctx
);
VLOG
(
3
)
<<
"[Lite memory size] Bytes = "
<<
dst
->
memory_size
()
;
VLOG
(
3
)
<<
"[Lite memory size] Bytes = "
<<
bytes
;
}
}
template
<
>
template
<
>
void
TensorCopyAsync
(
framework
::
LoDTensor
*
dst
,
const
paddle
::
lite
::
Tensor
&
src
,
void
TensorCopyAsync
(
framework
::
LoDTensor
*
dst
,
const
paddle
::
lite_api
::
Tensor
&
src
,
const
platform
::
DeviceContext
&
ctx
)
{
const
platform
::
DeviceContext
&
ctx
)
{
dst
->
Resize
(
paddle
::
framework
::
make_ddim
(
src
.
dims
().
Vectoriz
e
()));
dst
->
Resize
(
paddle
::
framework
::
make_ddim
(
src
.
shap
e
()));
InitDstTensor
(
dst
,
src
);
InitDstTensor
(
dst
,
src
);
const
platform
::
Place
&
src_place
=
GetNativePlace
(
src
.
target
());
const
platform
::
Place
&
src_place
=
GetNativePlace
(
src
.
target
());
const
platform
::
Place
&
dst_place
=
dst
->
place
();
const
platform
::
Place
&
dst_place
=
dst
->
place
();
const
size_t
bytes
=
int64_t
src_numel
=
GetLiteTensorNumel
(
src
);
static_cast
<
size_t
>
(
src
.
numel
())
*
framework
::
SizeOfType
(
dst
->
type
());
const
size_t
bytes
=
src_numel
*
framework
::
SizeOfType
(
dst
->
type
());
const
void
*
src_data
=
src
.
raw_data
();
const
void
*
src_data
=
src
.
data
<
void
>
();
// When Lite is ready, the source type needs to be modified here.
// When Lite is ready, the source type needs to be modified here.
void
*
dst_data
=
dst
->
mutable_data
(
dst_place
,
dst
->
type
());
void
*
dst_data
=
dst
->
mutable_data
(
dst_place
,
dst
->
type
());
VLOG
(
3
)
<<
"[CopyAsync lite -> fluid] Bytes = "
<<
bytes
<<
", src = "
<<
&
src
VLOG
(
3
)
<<
"[CopyAsync lite -> fluid] Bytes = "
<<
bytes
<<
", src = "
<<
&
src
<<
", dst = "
<<
dst
<<
", src_type = "
<<
dst
->
type
();
<<
", dst = "
<<
dst
<<
", src_type = "
<<
dst
->
type
();
MemoryCopyAsync
(
dst_place
,
dst_data
,
src_place
,
src_data
,
bytes
,
ctx
);
MemoryCopyAsync
(
dst_place
,
dst_data
,
src_place
,
src_data
,
bytes
,
ctx
);
VLOG
(
3
)
<<
"[Lite memory size] Bytes = "
<<
src
.
memory_size
()
;
VLOG
(
3
)
<<
"[Lite memory size] Bytes = "
<<
bytes
;
}
}
template
<
>
template
<
>
void
TensorDataShare
(
paddle
::
lite
::
Tensor
*
dst
,
framework
::
LoDTensor
*
src
)
{
void
TensorDataShare
(
paddle
::
lite_api
::
Tensor
*
dst
,
framework
::
LoDTensor
*
src
)
{
const
size_t
bytes
=
static_cast
<
size_t
>
(
src
->
numel
())
*
framework
::
SizeOfType
(
src
->
type
());
auto
buf
=
std
::
make_shared
<
paddle
::
lite
::
Buffer
>
(
paddle
::
lite
::
Buffer
(
src
->
data
<
void
>
(),
GetLiteTargetType
(
src
->
place
()),
src
->
memory_size
()));
dst
->
Resize
(
framework
::
vectorize
(
src
->
dims
()));
dst
->
Resize
(
framework
::
vectorize
(
src
->
dims
()));
dst
->
set_precision
(
GetLitePrecisionType
(
src
->
type
()));
dst
->
ShareExternalMemory
(
src
->
data
<
void
>
(),
src
->
memory_size
(),
SetLoD
(
dst
->
mutable_lod
(),
src
->
lod
());
GetLiteTargetType
(
src
->
place
()));
dst
->
ResetBuffer
(
buf
,
bytes
);
dst
->
SetPrecision
(
GetLitePrecisionType
(
src
->
type
()));
paddle
::
lite
::
LoD
lite_lod
;
SetLoD
(
&
lite_lod
,
src
->
lod
());
dst
->
SetLoD
(
lite_lod
);
}
}
template
<
>
template
<
>
void
TensorDataShare
(
framework
::
LoDTensor
*
dst
,
paddle
::
lite
::
Tensor
*
src
)
{
void
TensorDataShare
(
framework
::
LoDTensor
*
dst
,
paddle
::
lite
_api
::
Tensor
*
src
)
{
constexpr
framework
::
proto
::
VarType
::
Type
dtype
=
constexpr
framework
::
proto
::
VarType
::
Type
dtype
=
framework
::
proto
::
VarType_Type_FP32
;
framework
::
proto
::
VarType_Type_FP32
;
void
*
src_raw_data
=
src
->
raw_data
();
void
*
src_raw_data
=
GetLiteTensorDataPtr
(
src
,
GetLitePrecisionType
(
dtype
),
src
->
target
());
size_t
memory_size
=
GetLiteTensorNumel
(
*
src
)
*
sizeof
(
float
);
std
::
shared_ptr
<
memory
::
allocation
::
Allocation
>
holder
(
std
::
shared_ptr
<
memory
::
allocation
::
Allocation
>
holder
(
new
memory
::
allocation
::
Allocation
(
src_raw_data
,
src
->
memory_size
()
,
new
memory
::
allocation
::
Allocation
(
src_raw_data
,
memory_size
,
GetNativePlace
(
src
->
target
())));
GetNativePlace
(
src
->
target
())));
dst
->
Resize
(
paddle
::
framework
::
make_ddim
(
src
->
dims
().
Vectoriz
e
()));
dst
->
Resize
(
paddle
::
framework
::
make_ddim
(
src
->
shap
e
()));
SetLoD
(
dst
->
mutable_lod
(),
src
->
lod
());
SetLoD
(
dst
->
mutable_lod
(),
src
->
lod
());
dst
->
ResetHolderWithType
(
holder
,
dtype
);
dst
->
ResetHolderWithType
(
holder
,
dtype
);
}
}
...
...
paddle/fluid/inference/lite/test_engine.cc
浏览文件 @
0a42986c
...
@@ -101,10 +101,10 @@ TEST(EngineManager, engine) {
...
@@ -101,10 +101,10 @@ TEST(EngineManager, engine) {
config
.
model_from_memory
=
true
;
config
.
model_from_memory
=
true
;
config
.
valid_places
=
{
config
.
valid_places
=
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
paddle
::
lite
::
Place
({
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)}),
#endif
#endif
paddle
::
lite
::
Place
({
TARGET
(
kX86
),
PRECISION
(
kFloat
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kX86
),
PRECISION
(
kFloat
)}),
paddle
::
lite
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kAny
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kAny
)}),
};
};
LOG
(
INFO
)
<<
"Create EngineManager"
;
LOG
(
INFO
)
<<
"Create EngineManager"
;
...
@@ -117,7 +117,7 @@ TEST(EngineManager, engine) {
...
@@ -117,7 +117,7 @@ TEST(EngineManager, engine) {
ASSERT_EQ
(
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Has
(
ASSERT_EQ
(
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Has
(
unique_key
),
unique_key
),
true
);
true
);
paddle
::
lite
::
Predictor
*
engine_0
=
paddle
::
lite
_api
::
Paddle
Predictor
*
engine_0
=
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Get
(
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Get
(
unique_key
);
unique_key
);
CHECK_NOTNULL
(
engine_0
);
CHECK_NOTNULL
(
engine_0
);
...
...
paddle/fluid/inference/lite/test_tensor_utils.cc
浏览文件 @
0a42986c
...
@@ -73,6 +73,33 @@ TEST(LiteEngineOp, GetNativeLayoutType) {
...
@@ -73,6 +73,33 @@ TEST(LiteEngineOp, GetNativeLayoutType) {
EXPECT_ANY_THROW
(
GetNativeLayoutType
(
DataLayoutType
::
kNHWC
));
EXPECT_ANY_THROW
(
GetNativeLayoutType
(
DataLayoutType
::
kNHWC
));
}
}
template
<
typename
T
>
void
test_lite_tensor_data_ptr
(
PrecisionType
precision_type
)
{
void
*
GetLiteTensorDataPtr
(
paddle
::
lite_api
::
Tensor
*
src
,
PrecisionType
precision_type
,
TargetType
target_type
);
const
int
count
=
4
;
paddle
::
lite
::
Tensor
lite_tensor
;
lite_tensor
.
Resize
({
count
});
auto
*
lite_tensor_data
=
lite_tensor
.
mutable_data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
lite_tensor_data
[
i
]
=
i
;
}
paddle
::
lite_api
::
Tensor
lite_api_tensor
(
&
lite_tensor
);
T
*
data
=
static_cast
<
T
*>
(
GetLiteTensorDataPtr
(
&
lite_api_tensor
,
precision_type
,
TargetType
::
kHost
));
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
CHECK_EQ
(
data
[
i
],
static_cast
<
T
>
(
i
))
<<
"the i-th num is not correct."
;
}
}
TEST
(
LiteEngineOp
,
GetLiteTensorDataPtr
)
{
test_lite_tensor_data_ptr
<
int64_t
>
(
PrecisionType
::
kInt64
);
test_lite_tensor_data_ptr
<
int32_t
>
(
PrecisionType
::
kInt32
);
test_lite_tensor_data_ptr
<
int8_t
>
(
PrecisionType
::
kInt8
);
EXPECT_ANY_THROW
(
test_lite_tensor_data_ptr
<
double
>
(
PrecisionType
::
kUnk
));
}
void
test_tensor_copy
(
const
platform
::
DeviceContext
&
ctx
)
{
void
test_tensor_copy
(
const
platform
::
DeviceContext
&
ctx
)
{
// Create LoDTensor.
// Create LoDTensor.
std
::
vector
<
float
>
vector
({
1
,
2
,
3
,
4
});
std
::
vector
<
float
>
vector
({
1
,
2
,
3
,
4
});
...
@@ -83,10 +110,11 @@ void test_tensor_copy(const platform::DeviceContext& ctx) {
...
@@ -83,10 +110,11 @@ void test_tensor_copy(const platform::DeviceContext& ctx) {
lod_tensor
.
set_lod
(
lod
);
lod_tensor
.
set_lod
(
lod
);
// Create lite::Tensor and copy.
// Create lite::Tensor and copy.
paddle
::
lite
::
Tensor
lite_tensor
;
paddle
::
lite
::
Tensor
lite_tensor
;
TensorCopyAsync
(
&
lite_tensor
,
lod_tensor
,
ctx
);
paddle
::
lite_api
::
Tensor
lite_api_tensor
(
&
lite_tensor
);
TensorCopyAsync
(
&
lite_api_tensor
,
lod_tensor
,
ctx
);
// Copy to LoDTensor.
// Copy to LoDTensor.
framework
::
LoDTensor
lod_tensor_n
;
framework
::
LoDTensor
lod_tensor_n
;
TensorCopyAsync
(
&
lod_tensor_n
,
lite_tensor
,
ctx
);
TensorCopyAsync
(
&
lod_tensor_n
,
lite_
api_
tensor
,
ctx
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
platform
::
GpuStreamSync
(
platform
::
GpuStreamSync
(
...
@@ -108,10 +136,11 @@ void test_tensor_share(const platform::DeviceContext& ctx) {
...
@@ -108,10 +136,11 @@ void test_tensor_share(const platform::DeviceContext& ctx) {
lod_tensor
.
set_lod
(
lod
);
lod_tensor
.
set_lod
(
lod
);
// Create lite::Tensor and share.
// Create lite::Tensor and share.
paddle
::
lite
::
Tensor
lite_tensor
;
paddle
::
lite
::
Tensor
lite_tensor
;
TensorDataShare
(
&
lite_tensor
,
&
lod_tensor
);
paddle
::
lite_api
::
Tensor
lite_api_tensor
(
&
lite_tensor
);
TensorDataShare
(
&
lite_api_tensor
,
&
lod_tensor
);
// Copy to LoDTensor.
// Copy to LoDTensor.
framework
::
LoDTensor
lod_tensor_n
;
framework
::
LoDTensor
lod_tensor_n
;
TensorCopyAsync
(
&
lod_tensor_n
,
lite_tensor
,
ctx
);
TensorCopyAsync
(
&
lod_tensor_n
,
lite_
api_
tensor
,
ctx
);
std
::
vector
<
float
>
result
;
std
::
vector
<
float
>
result
;
TensorToVector
(
lod_tensor_n
,
ctx
,
&
result
);
TensorToVector
(
lod_tensor_n
,
ctx
,
&
result
);
ASSERT_EQ
(
result
,
vector
);
ASSERT_EQ
(
result
,
vector
);
...
...
paddle/fluid/inference/tests/api/lite_resnet50_test.cc
浏览文件 @
0a42986c
...
@@ -25,9 +25,13 @@ namespace inference {
...
@@ -25,9 +25,13 @@ namespace inference {
TEST
(
AnalysisPredictor
,
use_gpu
)
{
TEST
(
AnalysisPredictor
,
use_gpu
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/"
+
"model"
;
std
::
string
model_dir
=
FLAGS_infer_model
+
"/"
+
"model"
;
AnalysisConfig
config
;
AnalysisConfig
config
;
#if defined(PADDLE_WITH_CUDA)
config
.
EnableUseGpu
(
100
,
0
);
config
.
EnableUseGpu
(
100
,
0
);
#elif defined(LITE_SUBGRAPH_WITH_XPU)
config
.
EnableXpu
(
100
);
#endif
config
.
SetModel
(
model_dir
+
"/model"
,
model_dir
+
"/params"
);
config
.
SetModel
(
model_dir
+
"/model"
,
model_dir
+
"/params"
);
config
.
EnableLiteEngine
(
paddle
::
AnalysisConfig
::
Precision
::
kFloat32
);
config
.
EnableLiteEngine
(
paddle
::
AnalysisConfig
::
Precision
::
kFloat32
,
true
);
std
::
vector
<
PaddleTensor
>
inputs
;
std
::
vector
<
PaddleTensor
>
inputs
;
auto
predictor
=
CreatePaddlePredictor
(
config
);
auto
predictor
=
CreatePaddlePredictor
(
config
);
...
@@ -39,7 +43,7 @@ TEST(AnalysisPredictor, use_gpu) {
...
@@ -39,7 +43,7 @@ TEST(AnalysisPredictor, use_gpu) {
std
::
vector
<
float
>
input
(
input_num
,
1
);
std
::
vector
<
float
>
input
(
input_num
,
1
);
PaddleTensor
in
;
PaddleTensor
in
;
in
.
shape
=
{
1
,
3
,
318
,
318
};
in
.
shape
=
{
batch
,
channel
,
height
,
width
};
in
.
data
=
in
.
data
=
PaddleBuf
(
static_cast
<
void
*>
(
input
.
data
()),
input_num
*
sizeof
(
float
));
PaddleBuf
(
static_cast
<
void
*>
(
input
.
data
()),
input_num
*
sizeof
(
float
));
in
.
dtype
=
PaddleDType
::
FLOAT32
;
in
.
dtype
=
PaddleDType
::
FLOAT32
;
...
...
paddle/fluid/operators/lite/lite_engine_op.h
浏览文件 @
0a42986c
...
@@ -39,7 +39,7 @@ class LiteEngineOp : public framework::OperatorBase {
...
@@ -39,7 +39,7 @@ class LiteEngineOp : public framework::OperatorBase {
private:
private:
std
::
vector
<
std
::
string
>
in_names_
;
std
::
vector
<
std
::
string
>
in_names_
;
std
::
vector
<
std
::
string
>
out_names_
;
std
::
vector
<
std
::
string
>
out_names_
;
paddle
::
lite
::
Predictor
*
engine_
;
paddle
::
lite
_api
::
Paddle
Predictor
*
engine_
;
framework
::
proto
::
VarType
::
Type
precision_
;
framework
::
proto
::
VarType
::
Type
precision_
;
bool
use_gpu_
;
bool
use_gpu_
;
bool
zero_copy_
;
bool
zero_copy_
;
...
@@ -78,10 +78,10 @@ class LiteEngineOp : public framework::OperatorBase {
...
@@ -78,10 +78,10 @@ class LiteEngineOp : public framework::OperatorBase {
framework
::
LoDTensor
src_t
=
framework
::
LoDTensor
src_t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
in_names_
[
i
]);
in_names_
[
i
]);
paddle
::
lite
::
Tensor
*
dst_t
=
engine_
->
GetInput
(
i
);
paddle
::
lite
_api
::
Tensor
dst_t
=
*
(
engine_
->
GetInput
(
i
)
);
VLOG
(
3
)
<<
"== fluid -> lite ("
<<
in_names_
[
i
]
<<
" -> "
VLOG
(
3
)
<<
"== fluid -> lite ("
<<
in_names_
[
i
]
<<
" -> "
<<
engine_
->
GetInputNames
()[
i
]
<<
")"
;
<<
engine_
->
GetInputNames
()[
i
]
<<
")"
;
inference
::
lite
::
utils
::
TensorCopy
(
dst_t
,
&
src_t
,
*
ctx
,
zero_copy_
);
inference
::
lite
::
utils
::
TensorCopy
(
&
dst_t
,
&
src_t
,
*
ctx
,
zero_copy_
);
}
}
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
dev_place
))
{
if
(
platform
::
is_gpu_place
(
dev_place
))
{
...
@@ -93,7 +93,7 @@ class LiteEngineOp : public framework::OperatorBase {
...
@@ -93,7 +93,7 @@ class LiteEngineOp : public framework::OperatorBase {
engine_
->
Run
();
engine_
->
Run
();
VLOG
(
3
)
<<
"lite engine run done"
;
VLOG
(
3
)
<<
"lite engine run done"
;
for
(
size_t
i
=
0
;
i
<
out_names_
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
out_names_
.
size
();
i
++
)
{
paddle
::
lite
::
Tensor
src_t
=
*
(
engine_
->
GetOutput
(
i
));
paddle
::
lite
_api
::
Tensor
src_t
=
*
(
engine_
->
GetOutput
(
i
));
framework
::
LoDTensor
*
dst_t
=
framework
::
LoDTensor
*
dst_t
=
&
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
&
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
out_names_
[
i
]);
scope
,
out_names_
[
i
]);
...
...
paddle/fluid/operators/lite/lite_engine_op_test.cc
浏览文件 @
0a42986c
...
@@ -84,10 +84,10 @@ TEST(LiteEngineOp, engine_op) {
...
@@ -84,10 +84,10 @@ TEST(LiteEngineOp, engine_op) {
inference
::
lite
::
EngineConfig
config
;
inference
::
lite
::
EngineConfig
config
;
config
.
valid_places
=
{
config
.
valid_places
=
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
paddle
::
lite
::
Place
({
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)}),
#endif
#endif
paddle
::
lite
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kAny
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kX86
),
PRECISION
(
kFloat
)}),
paddle
::
lite
::
Place
({
TARGET
(
kX86
),
PRECISION
(
kFloat
)}),
paddle
::
lite
_api
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kAny
)}),
};
};
serialize_params
(
&
(
config
.
param
),
&
scope
,
repetitive_params
);
serialize_params
(
&
(
config
.
param
),
&
scope
,
repetitive_params
);
config
.
model
=
program
.
Proto
()
->
SerializeAsString
();
config
.
model
=
program
.
Proto
()
->
SerializeAsString
();
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
0a42986c
...
@@ -387,6 +387,8 @@ void BindAnalysisConfig(py::module *m) {
...
@@ -387,6 +387,8 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
"params_file"
,
&
AnalysisConfig
::
params_file
)
.
def
(
"params_file"
,
&
AnalysisConfig
::
params_file
)
.
def
(
"enable_use_gpu"
,
&
AnalysisConfig
::
EnableUseGpu
,
.
def
(
"enable_use_gpu"
,
&
AnalysisConfig
::
EnableUseGpu
,
py
::
arg
(
"memory_pool_init_size_mb"
),
py
::
arg
(
"device_id"
)
=
0
)
py
::
arg
(
"memory_pool_init_size_mb"
),
py
::
arg
(
"device_id"
)
=
0
)
.
def
(
"enable_xpu"
,
&
AnalysisConfig
::
EnableXpu
,
py
::
arg
(
"l3_workspace_size"
))
.
def
(
"disable_gpu"
,
&
AnalysisConfig
::
DisableGpu
)
.
def
(
"disable_gpu"
,
&
AnalysisConfig
::
DisableGpu
)
.
def
(
"use_gpu"
,
&
AnalysisConfig
::
use_gpu
)
.
def
(
"use_gpu"
,
&
AnalysisConfig
::
use_gpu
)
.
def
(
"gpu_device_id"
,
&
AnalysisConfig
::
gpu_device_id
)
.
def
(
"gpu_device_id"
,
&
AnalysisConfig
::
gpu_device_id
)
...
@@ -427,8 +429,8 @@ void BindAnalysisConfig(py::module *m) {
...
@@ -427,8 +429,8 @@ void BindAnalysisConfig(py::module *m) {
.
def
(
"tensorrt_oss_enabled"
,
&
AnalysisConfig
::
tensorrt_oss_enabled
)
.
def
(
"tensorrt_oss_enabled"
,
&
AnalysisConfig
::
tensorrt_oss_enabled
)
.
def
(
"tensorrt_engine_enabled"
,
&
AnalysisConfig
::
tensorrt_engine_enabled
)
.
def
(
"tensorrt_engine_enabled"
,
&
AnalysisConfig
::
tensorrt_engine_enabled
)
.
def
(
"enable_lite_engine"
,
&
AnalysisConfig
::
EnableLiteEngine
,
.
def
(
"enable_lite_engine"
,
&
AnalysisConfig
::
EnableLiteEngine
,
py
::
arg
(
"zero_copy"
)
=
false
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"precision_mode"
)
=
AnalysisConfig
::
Precision
::
kFloat32
,
py
::
arg
(
"zero_copy"
)
=
false
,
py
::
arg
(
"passes_filter"
)
=
std
::
vector
<
std
::
string
>
(),
py
::
arg
(
"passes_filter"
)
=
std
::
vector
<
std
::
string
>
(),
py
::
arg
(
"ops_filter"
)
=
std
::
vector
<
std
::
string
>
())
py
::
arg
(
"ops_filter"
)
=
std
::
vector
<
std
::
string
>
())
.
def
(
"lite_engine_enabled"
,
&
AnalysisConfig
::
lite_engine_enabled
)
.
def
(
"lite_engine_enabled"
,
&
AnalysisConfig
::
lite_engine_enabled
)
...
...
python/setup.py.in
浏览文件 @
0a42986c
...
@@ -261,6 +261,10 @@ else:
...
@@ -261,6 +261,10 @@ else:
if '${WITH_LITE}' == 'ON':
if '${WITH_LITE}' == 'ON':
shutil.copy('${LITE_SHARED_LIB}', libs_path)
shutil.copy('${LITE_SHARED_LIB}', libs_path)
package_data['paddle.libs']+=['libpaddle_full_api_shared' + ext_name]
package_data['paddle.libs']+=['libpaddle_full_api_shared' + ext_name]
if '${XPU_SDK_ROOT}':
shutil.copy('${XPUAPI_LIB}', libs_path)
shutil.copy('${XPURT_LIB}', libs_path)
package_data['paddle.libs'] += ['libxpuapi.so', 'libxpurt.so']
if '${WITH_PSLIB}' == 'ON':
if '${WITH_PSLIB}' == 'ON':
shutil.copy('${PSLIB_LIB}', libs_path)
shutil.copy('${PSLIB_LIB}', libs_path)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录