Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
d199edd8
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d199edd8
编写于
2月 04, 2021
作者:
石
石晓伟
提交者:
GitHub
2月 04, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support xpu with analysis predictor, test=develop (#30832) (#30863)
上级
d1ae7b98
变更
19
显示空白变更内容
内联
并排
Showing
19 changed file
with
379 addition
and
106 deletion
+379
-106
cmake/external/xpu.cmake
cmake/external/xpu.cmake
+52
-37
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+34
-15
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+36
-3
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+0
-1
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+20
-1
paddle/fluid/inference/api/api_impl_tester.cc
paddle/fluid/inference/api/api_impl_tester.cc
+33
-21
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+15
-2
paddle/fluid/inference/api/paddle_api.h
paddle/fluid/inference/api/paddle_api.h
+2
-1
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+13
-0
paddle/fluid/inference/capi/paddle_c_api.h
paddle/fluid/inference/capi/paddle_c_api.h
+7
-0
paddle/fluid/inference/capi/pd_config.cc
paddle/fluid/inference/capi/pd_config.cc
+24
-0
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+4
-1
paddle/fluid/inference/tests/api/analyzer_capi_xpu_tester.cc
paddle/fluid/inference/tests/api/analyzer_capi_xpu_tester.cc
+61
-0
paddle/fluid/inference/tests/api/lite_mul_model_test.cc
paddle/fluid/inference/tests/api/lite_mul_model_test.cc
+18
-0
paddle/fluid/inference/tests/test_helper.h
paddle/fluid/inference/tests/test_helper.h
+12
-0
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+2
-2
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+1
-1
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+5
-1
python/paddle/fluid/tests/book/test_word2vec.py
python/paddle/fluid/tests/book/test_word2vec.py
+40
-20
未找到文件。
cmake/external/xpu.cmake
浏览文件 @
d199edd8
...
@@ -5,36 +5,35 @@ endif()
...
@@ -5,36 +5,35 @@ endif()
INCLUDE
(
ExternalProject
)
INCLUDE
(
ExternalProject
)
SET
(
XPU_PROJECT
"extern_xpu"
)
SET
(
XPU_PROJECT
"extern_xpu"
)
if
(
WITH_AARCH64
)
if
(
NOT XPU_SDK_ROOT
)
if
(
WITH_AARCH64
)
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/aarch64/xpu_2021_01_13.tar.gz"
CACHE STRING
""
FORCE
)
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/aarch64/xpu_2021_01_13.tar.gz"
CACHE STRING
""
FORCE
)
elseif
(
WITH_SUNWAY
)
elseif
(
WITH_SUNWAY
)
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/sunway/xpu_2021_01_13.tar.gz"
CACHE STRING
""
FORCE
)
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/sunway/xpu_2021_01_13.tar.gz"
CACHE STRING
""
FORCE
)
else
()
else
()
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_01_13.tar.gz"
CACHE STRING
""
FORCE
)
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_01_13.tar.gz"
CACHE STRING
""
FORCE
)
endif
()
endif
()
SET
(
XPU_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/xpu"
)
SET
(
XPU_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/xpu"
)
SET
(
XPU_DOWNLOAD_DIR
"
${
XPU_SOURCE_DIR
}
/src/
${
XPU_PROJECT
}
"
)
SET
(
XPU_DOWNLOAD_DIR
"
${
XPU_SOURCE_DIR
}
/src/
${
XPU_PROJECT
}
"
)
SET
(
XPU_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu"
)
SET
(
XPU_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu"
)
SET
(
XPU_API_INC_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu/include"
)
SET
(
XPU_API_INC_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu/include"
)
SET
(
XPU_LIB_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu/lib"
)
SET
(
XPU_LIB_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu/lib"
)
SET
(
XPU_API_LIB_NAME
"libxpuapi.so"
)
SET
(
XPU_API_LIB_NAME
"libxpuapi.so"
)
SET
(
XPU_RT_LIB_NAME
"libxpurt.so"
)
SET
(
XPU_RT_LIB_NAME
"libxpurt.so"
)
SET
(
XPU_API_LIB
"
${
XPU_LIB_DIR
}
/
${
XPU_API_LIB_NAME
}
"
)
SET
(
XPU_API_LIB
"
${
XPU_LIB_DIR
}
/
${
XPU_API_LIB_NAME
}
"
)
SET
(
XPU_RT_LIB
"
${
XPU_LIB_DIR
}
/
${
XPU_RT_LIB_NAME
}
"
)
SET
(
XPU_RT_LIB
"
${
XPU_LIB_DIR
}
/
${
XPU_RT_LIB_NAME
}
"
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
XPU_INSTALL_DIR
}
/lib"
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
XPU_INSTALL_DIR
}
/lib"
)
INCLUDE_DIRECTORIES
(
${
XPU_API_INC_DIR
}
)
FILE
(
WRITE
${
XPU_DOWNLOAD_DIR
}
/CMakeLists.txt
FILE
(
WRITE
${
XPU_DOWNLOAD_DIR
}
/CMakeLists.txt
"PROJECT(XPU)
\n
"
"PROJECT(XPU)
\n
"
"cmake_minimum_required(VERSION 3.0)
\n
"
"cmake_minimum_required(VERSION 3.0)
\n
"
"install(DIRECTORY xpu/include xpu/lib
\n
"
"install(DIRECTORY xpu/include xpu/lib
\n
"
" DESTINATION
${
XPU_INSTALL_DIR
}
)
\n
"
)
" DESTINATION
${
XPU_INSTALL_DIR
}
)
\n
"
)
ExternalProject_Add
(
ExternalProject_Add
(
${
XPU_PROJECT
}
${
XPU_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
PREFIX
${
XPU_SOURCE_DIR
}
PREFIX
${
XPU_SOURCE_DIR
}
...
@@ -45,8 +44,14 @@ ExternalProject_Add(
...
@@ -45,8 +44,14 @@ ExternalProject_Add(
UPDATE_COMMAND
""
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
XPU_INSTALL_ROOT
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
XPU_INSTALL_ROOT
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
XPU_INSTALL_ROOT
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
XPU_INSTALL_ROOT
}
)
)
else
()
SET
(
XPU_API_INC_DIR
"
${
XPU_SDK_ROOT
}
/XTDK/include/"
)
SET
(
XPU_API_LIB
"
${
XPU_SDK_ROOT
}
/XTDK/shlib/libxpuapi.so"
)
SET
(
XPU_RT_LIB
"
${
XPU_SDK_ROOT
}
/XTDK/runtime/shlib/libxpurt.so"
)
endif
()
INCLUDE_DIRECTORIES
(
${
XPU_API_INC_DIR
}
)
ADD_LIBRARY
(
shared_xpuapi SHARED IMPORTED GLOBAL
)
ADD_LIBRARY
(
shared_xpuapi SHARED IMPORTED GLOBAL
)
set_property
(
TARGET shared_xpuapi PROPERTY IMPORTED_LOCATION
"
${
XPU_API_LIB
}
"
)
set_property
(
TARGET shared_xpuapi PROPERTY IMPORTED_LOCATION
"
${
XPU_API_LIB
}
"
)
...
@@ -69,4 +74,14 @@ else(WITH_XPU_BKCL)
...
@@ -69,4 +74,14 @@ else(WITH_XPU_BKCL)
TARGET_LINK_LIBRARIES
(
xpulib
${
XPU_API_LIB
}
${
XPU_RT_LIB
}
)
TARGET_LINK_LIBRARIES
(
xpulib
${
XPU_API_LIB
}
${
XPU_RT_LIB
}
)
endif
(
WITH_XPU_BKCL
)
endif
(
WITH_XPU_BKCL
)
ADD_DEPENDENCIES
(
xpulib
${
XPU_PROJECT
}
)
if
(
NOT XPU_SDK_ROOT
)
ADD_DEPENDENCIES
(
xpulib
${
XPU_PROJECT
}
)
else
()
ADD_CUSTOM_TARGET
(
extern_xpu DEPENDS xpulib
)
endif
()
# Ensure that xpu/api.h can be included without dependency errors.
file
(
GENERATE OUTPUT
${
CMAKE_CURRENT_BINARY_DIR
}
/.xpu_headers_dummy.cc CONTENT
""
)
add_library
(
xpu_headers_dummy STATIC
${
CMAKE_CURRENT_BINARY_DIR
}
/.xpu_headers_dummy.cc
)
add_dependencies
(
xpu_headers_dummy extern_xpu
)
link_libraries
(
xpu_headers_dummy
)
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
d199edd8
...
@@ -33,6 +33,8 @@ PassStrategy *AnalysisConfig::pass_builder() const {
...
@@ -33,6 +33,8 @@ PassStrategy *AnalysisConfig::pass_builder() const {
if
(
use_gpu_
)
{
if
(
use_gpu_
)
{
LOG
(
INFO
)
<<
"Create GPU IR passes"
;
LOG
(
INFO
)
<<
"Create GPU IR passes"
;
pass_builder_
.
reset
(
new
GpuPassStrategy
);
pass_builder_
.
reset
(
new
GpuPassStrategy
);
}
else
if
(
use_xpu_
)
{
pass_builder_
.
reset
(
new
XpuPassStrategy
);
}
else
{
}
else
{
LOG
(
INFO
)
<<
"Create CPU IR passes"
;
LOG
(
INFO
)
<<
"Create CPU IR passes"
;
pass_builder_
.
reset
(
new
CpuPassStrategy
);
pass_builder_
.
reset
(
new
CpuPassStrategy
);
...
@@ -73,7 +75,7 @@ void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
...
@@ -73,7 +75,7 @@ void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
use_gpu_
=
true
;
use_gpu_
=
true
;
memory_pool_init_size_mb_
=
memory_pool_init_size_mb
;
memory_pool_init_size_mb_
=
memory_pool_init_size_mb
;
FLAGS_initial_gpu_memory_in_mb
=
memory_pool_init_size_mb_
;
FLAGS_initial_gpu_memory_in_mb
=
memory_pool_init_size_mb_
;
device_id_
=
device_id
;
gpu_
device_id_
=
device_id
;
#else
#else
LOG
(
ERROR
)
<<
"Please compile with gpu to EnableGpu()"
;
LOG
(
ERROR
)
<<
"Please compile with gpu to EnableGpu()"
;
use_gpu_
=
false
;
use_gpu_
=
false
;
...
@@ -115,7 +117,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
...
@@ -115,7 +117,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// GPU related.
// GPU related.
CP_MEMBER
(
use_gpu_
);
CP_MEMBER
(
use_gpu_
);
CP_MEMBER
(
use_cudnn_
);
CP_MEMBER
(
use_cudnn_
);
CP_MEMBER
(
device_id_
);
CP_MEMBER
(
gpu_device_id_
);
CP_MEMBER
(
xpu_device_id_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
CP_MEMBER
(
enable_memory_optim_
);
CP_MEMBER
(
enable_memory_optim_
);
...
@@ -174,8 +177,14 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
...
@@ -174,8 +177,14 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
thread_local_stream_
);
CP_MEMBER
(
thread_local_stream_
);
if
(
use_gpu_
)
{
if
(
use_gpu_
)
{
PADDLE_ENFORCE_EQ
(
use_xpu_
,
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
GpuPassStrategy
(
pass_builder_
.
reset
(
new
GpuPassStrategy
(
*
static_cast
<
GpuPassStrategy
*>
(
other
.
pass_builder
())));
*
static_cast
<
GpuPassStrategy
*>
(
other
.
pass_builder
())));
}
else
if
(
use_xpu_
)
{
pass_builder_
.
reset
(
new
XpuPassStrategy
(
*
static_cast
<
XpuPassStrategy
*>
(
other
.
pass_builder
())));
}
else
{
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
(
pass_builder_
.
reset
(
new
CpuPassStrategy
(
*
static_cast
<
CpuPassStrategy
*>
(
other
.
pass_builder
())));
*
static_cast
<
CpuPassStrategy
*>
(
other
.
pass_builder
())));
...
@@ -333,6 +342,12 @@ void AnalysisConfig::Update() {
...
@@ -333,6 +342,12 @@ void AnalysisConfig::Update() {
// Append after the Affine_channel_conv_fuse pass.
// Append after the Affine_channel_conv_fuse pass.
pass_builder
()
->
InsertPass
(
3
,
"tensorrt_subgraph_pass"
);
pass_builder
()
->
InsertPass
(
3
,
"tensorrt_subgraph_pass"
);
}
}
}
else
if
(
use_xpu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
XpuPassStrategy
);
}
else
{
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
);
pass_builder_
.
reset
(
new
CpuPassStrategy
);
}
}
...
@@ -341,7 +356,13 @@ void AnalysisConfig::Update() {
...
@@ -341,7 +356,13 @@ void AnalysisConfig::Update() {
if
(
use_gpu
())
{
if
(
use_gpu
())
{
pass_builder_
.
reset
(
new
GpuPassStrategy
(
pass_builder_
.
reset
(
new
GpuPassStrategy
(
*
static_cast
<
GpuPassStrategy
*>
(
pass_builder_
.
get
())));
*
static_cast
<
GpuPassStrategy
*>
(
pass_builder_
.
get
())));
}
else
if
(
use_xpu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
XpuPassStrategy
(
*
static_cast
<
XpuPassStrategy
*>
(
pass_builder_
.
get
())));
}
else
{
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
(
pass_builder_
.
reset
(
new
CpuPassStrategy
(
*
static_cast
<
CpuPassStrategy
*>
(
pass_builder_
.
get
())));
*
static_cast
<
CpuPassStrategy
*>
(
pass_builder_
.
get
())));
...
@@ -420,19 +441,16 @@ void AnalysisConfig::Update() {
...
@@ -420,19 +441,16 @@ void AnalysisConfig::Update() {
}
}
if
(
use_xpu_
)
{
if
(
use_xpu_
)
{
#ifndef LITE_SUBGRAPH_WITH_XPU
#if (defined LITE_SUBGRAPH_WITH_XPU) || (defined PADDLE_WITH_XPU)
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"You tried to use an XPU device, but Paddle was not compiled "
"with XPU-runtime."
));
#endif
if
(
!
use_lite_
)
{
LOG
(
WARNING
)
<<
"Because XPU currently only works in Paddle-Lite "
"subgraph mode, please make sure you have enabled it."
;
}
PADDLE_ENFORCE_EQ
(
use_gpu_
,
false
,
PADDLE_ENFORCE_EQ
(
use_gpu_
,
false
,
platform
::
errors
::
Unavailable
(
platform
::
errors
::
Unavailable
(
"Currently, XPU and GPU cannot be enabled in the "
"Currently, XPU and GPU cannot be enabled in the "
"same analysis configuration."
));
"same analysis configuration."
));
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"You tried to use an XPU device, but Paddle was not compiled "
"with XPU-runtime."
));
#endif
}
}
if
(
ir_debug_
)
{
if
(
ir_debug_
)
{
...
@@ -448,7 +466,8 @@ std::string AnalysisConfig::SerializeInfoCache() {
...
@@ -448,7 +466,8 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
use_gpu_
;
ss
<<
use_gpu_
;
ss
<<
use_fc_padding_
;
ss
<<
use_fc_padding_
;
ss
<<
device_id_
;
ss
<<
gpu_device_id_
;
ss
<<
xpu_device_id_
;
ss
<<
memory_pool_init_size_mb_
;
ss
<<
memory_pool_init_size_mb_
;
ss
<<
use_tensorrt_
;
ss
<<
use_tensorrt_
;
...
@@ -507,7 +526,7 @@ float AnalysisConfig::fraction_of_gpu_memory_for_pool() const {
...
@@ -507,7 +526,7 @@ float AnalysisConfig::fraction_of_gpu_memory_for_pool() const {
// Get the GPU memory details and calculate the fraction of memory for the
// Get the GPU memory details and calculate the fraction of memory for the
// GPU memory pool.
// GPU memory pool.
size_t
gpu_total
,
gpu_available
;
size_t
gpu_total
,
gpu_available
;
platform
::
SetDeviceId
(
device_id_
);
platform
::
SetDeviceId
(
gpu_
device_id_
);
platform
::
GpuMemoryUsage
(
&
gpu_available
,
&
gpu_total
);
platform
::
GpuMemoryUsage
(
&
gpu_available
,
&
gpu_total
);
double
total_gpu_memory
=
gpu_total
/
1024.
/
1024.
;
double
total_gpu_memory
=
gpu_total
/
1024.
/
1024.
;
float
fraction_of_gpu_memory
=
float
fraction_of_gpu_memory
=
...
@@ -548,7 +567,7 @@ NativeConfig AnalysisConfig::ToNativeConfig() const {
...
@@ -548,7 +567,7 @@ NativeConfig AnalysisConfig::ToNativeConfig() const {
config
.
prog_file
=
prog_file_
;
config
.
prog_file
=
prog_file_
;
config
.
param_file
=
params_file_
;
config
.
param_file
=
params_file_
;
config
.
use_gpu
=
use_gpu_
;
config
.
use_gpu
=
use_gpu_
;
config
.
device
=
device_id_
;
config
.
device
=
gpu_
device_id_
;
config
.
fraction_of_gpu_memory
=
fraction_of_gpu_memory_for_pool
();
config
.
fraction_of_gpu_memory
=
fraction_of_gpu_memory_for_pool
();
config
.
specify_input_name
=
specify_input_name_
;
config
.
specify_input_name
=
specify_input_name_
;
return
config
;
return
config
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
d199edd8
...
@@ -103,7 +103,10 @@ bool PaddleTensorToLoDTensor(const PaddleTensor &pt, framework::LoDTensor *t,
...
@@ -103,7 +103,10 @@ bool PaddleTensorToLoDTensor(const PaddleTensor &pt, framework::LoDTensor *t,
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std
::
memcpy
(
static_cast
<
void
*>
(
input_ptr
),
pt
.
data
.
data
(),
std
::
memcpy
(
static_cast
<
void
*>
(
input_ptr
),
pt
.
data
.
data
(),
pt
.
data
.
length
());
pt
.
data
.
length
());
}
else
{
}
else
if
(
platform
::
is_gpu_place
(
place
))
{
PADDLE_ENFORCE_EQ
(
platform
::
is_xpu_place
(
place
),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
auto
*
dev_ctx
=
...
@@ -116,6 +119,18 @@ bool PaddleTensorToLoDTensor(const PaddleTensor &pt, framework::LoDTensor *t,
...
@@ -116,6 +119,18 @@ bool PaddleTensorToLoDTensor(const PaddleTensor &pt, framework::LoDTensor *t,
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Fatal
(
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Fatal
(
"Not compile with CUDA, should not reach here."
));
"Not compile with CUDA, should not reach here."
));
#endif
#endif
}
else
if
(
platform
::
is_xpu_place
(
place
))
{
#ifdef PADDLE_WITH_XPU
auto
dst_xpu_place
=
BOOST_GET_CONST
(
platform
::
XPUPlace
,
place
);
memory
::
Copy
(
dst_xpu_place
,
static_cast
<
void
*>
(
input_ptr
),
platform
::
CPUPlace
(),
pt
.
data
.
data
(),
pt
.
data
.
length
());
#else
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Fatal
(
"Not compile with XPU, should not reach here."
));
#endif
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"The analysis predictor supports CPU, GPU and XPU now."
));
}
}
// TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
// TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
framework
::
LoD
lod
;
framework
::
LoD
lod
;
...
@@ -182,6 +197,12 @@ bool AnalysisPredictor::PrepareScope(
...
@@ -182,6 +197,12 @@ bool AnalysisPredictor::PrepareScope(
++
dev_id
)
{
++
dev_id
)
{
memory
::
Release
(
platform
::
CUDAPlace
(
dev_id
));
memory
::
Release
(
platform
::
CUDAPlace
(
dev_id
));
}
}
#endif
#ifdef PADDLE_WITH_XPU
for
(
int
dev_id
=
0
;
dev_id
<
paddle
::
platform
::
GetXPUDeviceCount
();
++
dev_id
)
{
memory
::
Release
(
platform
::
XPUPlace
(
dev_id
));
}
#endif
#endif
memory
::
Release
(
platform
::
CPUPlace
());
memory
::
Release
(
platform
::
CPUPlace
());
});
});
...
@@ -219,7 +240,9 @@ bool AnalysisPredictor::PrepareProgram(
...
@@ -219,7 +240,9 @@ bool AnalysisPredictor::PrepareProgram(
}
}
bool
AnalysisPredictor
::
CreateExecutor
()
{
bool
AnalysisPredictor
::
CreateExecutor
()
{
if
(
config_
.
use_gpu
())
{
if
(
config_
.
use_gpu
())
{
status_use_gpu_
=
true
;
PADDLE_ENFORCE_EQ
(
config_
.
use_xpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
gpu_device_id
());
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
gpu_device_id
());
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
if
(
config_
.
thread_local_stream_enabled
())
{
if
(
config_
.
thread_local_stream_enabled
())
{
...
@@ -230,6 +253,8 @@ bool AnalysisPredictor::CreateExecutor() {
...
@@ -230,6 +253,8 @@ bool AnalysisPredictor::CreateExecutor() {
ctx
->
ResetThreadContext
(
platform
::
stream
::
Priority
::
kNormal
);
ctx
->
ResetThreadContext
(
platform
::
stream
::
Priority
::
kNormal
);
}
}
#endif
#endif
}
else
if
(
config_
.
use_xpu
())
{
place_
=
paddle
::
platform
::
XPUPlace
(
config_
.
xpu_device_id
());
}
else
{
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
place_
=
paddle
::
platform
::
CPUPlace
();
}
}
...
@@ -734,11 +759,16 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
...
@@ -734,11 +759,16 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
res
->
SetName
(
name
);
res
->
SetName
(
name
);
if
(
platform
::
is_cpu_place
(
place_
))
{
if
(
platform
::
is_cpu_place
(
place_
))
{
res
->
SetPlace
(
PaddlePlace
::
kCPU
);
res
->
SetPlace
(
PaddlePlace
::
kCPU
);
}
else
if
(
platform
::
is_xpu_place
(
place_
))
{
PADDLE_ENFORCE_EQ
(
config_
.
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
auto
xpu_place
=
BOOST_GET_CONST
(
platform
::
XPUPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kXPU
,
xpu_place
.
GetDeviceId
());
}
else
{
}
else
{
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place_
);
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kGPU
,
gpu_place
.
GetDeviceId
());
res
->
SetPlace
(
PaddlePlace
::
kGPU
,
gpu_place
.
GetDeviceId
());
}
}
return
res
;
return
res
;
}
}
...
@@ -755,6 +785,9 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
...
@@ -755,6 +785,9 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
res
->
SetName
(
name
);
res
->
SetName
(
name
);
if
(
platform
::
is_cpu_place
(
place_
))
{
if
(
platform
::
is_cpu_place
(
place_
))
{
res
->
SetPlace
(
PaddlePlace
::
kCPU
);
res
->
SetPlace
(
PaddlePlace
::
kCPU
);
}
else
if
(
platform
::
is_xpu_place
(
place_
))
{
auto
xpu_place
=
BOOST_GET_CONST
(
platform
::
XPUPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kXPU
,
xpu_place
.
GetDeviceId
());
}
else
{
}
else
{
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place_
);
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kGPU
,
gpu_place
.
GetDeviceId
());
res
->
SetPlace
(
PaddlePlace
::
kGPU
,
gpu_place
.
GetDeviceId
());
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
d199edd8
...
@@ -415,7 +415,6 @@ class AnalysisPredictor : public PaddlePredictor {
...
@@ -415,7 +415,6 @@ class AnalysisPredictor : public PaddlePredictor {
private:
private:
// Some status here that help to determine the status inside the predictor.
// Some status here that help to determine the status inside the predictor.
bool
status_is_cloned_
{
false
};
bool
status_is_cloned_
{
false
};
bool
status_use_gpu_
{
false
};
};
};
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
d199edd8
...
@@ -80,7 +80,12 @@ bool NativePaddlePredictor::Init(
...
@@ -80,7 +80,12 @@ bool NativePaddlePredictor::Init(
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
if
(
config_
.
use_gpu
)
{
if
(
config_
.
use_gpu
)
{
PADDLE_ENFORCE_EQ
(
config_
.
use_xpu
,
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
}
else
if
(
config_
.
use_xpu
)
{
place_
=
paddle
::
platform
::
XPUPlace
(
config_
.
device
);
}
else
{
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
place_
=
paddle
::
platform
::
CPUPlace
();
}
}
...
@@ -240,7 +245,11 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
...
@@ -240,7 +245,11 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std
::
memcpy
(
static_cast
<
void
*>
(
input_ptr
),
inputs
[
i
].
data
.
data
(),
std
::
memcpy
(
static_cast
<
void
*>
(
input_ptr
),
inputs
[
i
].
data
.
data
(),
inputs
[
i
].
data
.
length
());
inputs
[
i
].
data
.
length
());
}
else
{
}
else
if
(
platform
::
is_gpu_place
(
place_
))
{
PADDLE_ENFORCE_EQ
(
platform
::
is_xpu_place
(
place_
),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
::
Instance
();
...
@@ -253,6 +262,16 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
...
@@ -253,6 +262,16 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
#else
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Not compile with CUDA, should not reach here."
));
"Not compile with CUDA, should not reach here."
));
#endif
}
else
{
#ifdef PADDLE_WITH_XPU
auto
dst_xpu_place
=
BOOST_GET_CONST
(
platform
::
XPUPlace
,
place_
);
memory
::
Copy
(
dst_xpu_place
,
static_cast
<
void
*>
(
input_ptr
),
platform
::
CPUPlace
(),
inputs
[
i
].
data
.
data
(),
inputs
[
i
].
data
.
length
());
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Not compile with XPU, should not reach here."
));
#endif
#endif
}
}
...
...
paddle/fluid/inference/api/api_impl_tester.cc
浏览文件 @
d199edd8
...
@@ -58,19 +58,15 @@ NativeConfig GetConfig() {
...
@@ -58,19 +58,15 @@ NativeConfig GetConfig() {
config
.
model_dir
=
FLAGS_word2vec_dirname
;
config
.
model_dir
=
FLAGS_word2vec_dirname
;
LOG
(
INFO
)
<<
"dirname "
<<
config
.
model_dir
;
LOG
(
INFO
)
<<
"dirname "
<<
config
.
model_dir
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
fraction_of_gpu_memory
=
0.15
;
#ifdef PADDLE_WITH_CUDA
config
.
use_gpu
=
true
;
#else
config
.
use_gpu
=
false
;
#endif
config
.
device
=
0
;
config
.
device
=
0
;
return
config
;
return
config
;
}
}
void
MainWord2Vec
(
bool
use_gpu
)
{
void
MainWord2Vec
(
const
paddle
::
PaddlePlace
&
place
)
{
NativeConfig
config
=
GetConfig
();
NativeConfig
config
=
GetConfig
();
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
paddle
::
gpu_place_used
(
place
);
config
.
use_xpu
=
paddle
::
xpu_place_used
(
place
);
framework
::
LoDTensor
first_word
,
second_word
,
third_word
,
fourth_word
;
framework
::
LoDTensor
first_word
,
second_word
,
third_word
,
fourth_word
;
framework
::
LoD
lod
{{
0
,
1
}};
framework
::
LoD
lod
{{
0
,
1
}};
...
@@ -117,11 +113,12 @@ void MainWord2Vec(bool use_gpu) {
...
@@ -117,11 +113,12 @@ void MainWord2Vec(bool use_gpu) {
}
}
}
}
void
MainImageClassification
(
bool
use_gpu
)
{
void
MainImageClassification
(
const
paddle
::
PaddlePlace
&
place
)
{
int
batch_size
=
2
;
int
batch_size
=
2
;
bool
repeat
=
false
;
bool
repeat
=
false
;
NativeConfig
config
=
GetConfig
();
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
paddle
::
gpu_place_used
(
place
);
config
.
use_xpu
=
paddle
::
xpu_place_used
(
place
);
config
.
model_dir
=
config
.
model_dir
=
FLAGS_book_dirname
+
"/image_classification_resnet.inference.model"
;
FLAGS_book_dirname
+
"/image_classification_resnet.inference.model"
;
...
@@ -162,9 +159,10 @@ void MainImageClassification(bool use_gpu) {
...
@@ -162,9 +159,10 @@ void MainImageClassification(bool use_gpu) {
}
}
}
}
void
MainThreadsWord2Vec
(
bool
use_gpu
)
{
void
MainThreadsWord2Vec
(
const
paddle
::
PaddlePlace
&
place
)
{
NativeConfig
config
=
GetConfig
();
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
paddle
::
gpu_place_used
(
place
);
config
.
use_xpu
=
paddle
::
xpu_place_used
(
place
);
auto
main_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
auto
main_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
// prepare inputs data and reference results
// prepare inputs data and reference results
...
@@ -223,11 +221,12 @@ void MainThreadsWord2Vec(bool use_gpu) {
...
@@ -223,11 +221,12 @@ void MainThreadsWord2Vec(bool use_gpu) {
}
}
}
}
void
MainThreadsImageClassification
(
bool
use_gpu
)
{
void
MainThreadsImageClassification
(
const
paddle
::
PaddlePlace
&
place
)
{
constexpr
int
num_jobs
=
4
;
// each job run 1 batch
constexpr
int
num_jobs
=
4
;
// each job run 1 batch
constexpr
int
batch_size
=
1
;
constexpr
int
batch_size
=
1
;
NativeConfig
config
=
GetConfig
();
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
paddle
::
gpu_place_used
(
place
);
config
.
use_xpu
=
paddle
::
xpu_place_used
(
place
);
config
.
model_dir
=
config
.
model_dir
=
FLAGS_book_dirname
+
"/image_classification_resnet.inference.model"
;
FLAGS_book_dirname
+
"/image_classification_resnet.inference.model"
;
...
@@ -276,29 +275,42 @@ void MainThreadsImageClassification(bool use_gpu) {
...
@@ -276,29 +275,42 @@ void MainThreadsImageClassification(bool use_gpu) {
}
}
}
}
TEST
(
inference_api_native
,
word2vec_cpu
)
{
MainWord2Vec
(
false
/*use_gpu*/
);
}
TEST
(
inference_api_native
,
word2vec_cpu
)
{
MainWord2Vec
(
paddle
::
PaddlePlace
::
kCPU
);
}
TEST
(
inference_api_native
,
word2vec_cpu_threads
)
{
TEST
(
inference_api_native
,
word2vec_cpu_threads
)
{
MainThreadsWord2Vec
(
false
/*use_gpu*/
);
MainThreadsWord2Vec
(
paddle
::
PaddlePlace
::
kCPU
);
}
}
TEST
(
inference_api_native
,
image_classification_cpu
)
{
TEST
(
inference_api_native
,
image_classification_cpu
)
{
MainImageClassification
(
false
/*use_gpu*/
);
MainImageClassification
(
paddle
::
PaddlePlace
::
kCPU
);
}
}
TEST
(
inference_api_native
,
image_classification_cpu_threads
)
{
TEST
(
inference_api_native
,
image_classification_cpu_threads
)
{
MainThreadsImageClassification
(
false
/*use_gpu*/
);
MainThreadsImageClassification
(
paddle
::
PaddlePlace
::
kCPU
);
}
}
#ifdef PADDLE_WITH_XPU
TEST
(
inference_api_native
,
word2vec_xpu
)
{
MainWord2Vec
(
paddle
::
PaddlePlace
::
kXPU
);
}
TEST
(
inference_api_native
,
image_classification_xpu
)
{
MainImageClassification
(
paddle
::
PaddlePlace
::
kXPU
);
}
#endif
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
TEST
(
inference_api_native
,
word2vec_gpu
)
{
MainWord2Vec
(
true
/*use_gpu*/
);
}
TEST
(
inference_api_native
,
word2vec_gpu
)
{
MainWord2Vec
(
paddle
::
PaddlePlace
::
kGPU
);
}
// Turn off temporarily for the unstable result.
// Turn off temporarily for the unstable result.
// TEST(inference_api_native, word2vec_gpu_threads) {
// TEST(inference_api_native, word2vec_gpu_threads) {
// MainThreadsWord2Vec(
true /*use_gpu*/
);
// MainThreadsWord2Vec(
paddle::PaddlePlace::kGPU
);
// }
// }
TEST
(
inference_api_native
,
image_classification_gpu
)
{
TEST
(
inference_api_native
,
image_classification_gpu
)
{
MainImageClassification
(
true
/*use_gpu*/
);
MainImageClassification
(
paddle
::
PaddlePlace
::
kGPU
);
}
}
// Turn off temporarily for the unstable result.
// Turn off temporarily for the unstable result.
// TEST(inference_api_native, image_classification_gpu_threads) {
// TEST(inference_api_native, image_classification_gpu_threads) {
// MainThreadsImageClassification(
true /*use_gpu*/
);
// MainThreadsImageClassification(
paddle::PaddlePlace::kGPU
);
// }
// }
#endif
#endif
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
d199edd8
...
@@ -185,11 +185,23 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -185,11 +185,23 @@ struct PD_INFER_DECL AnalysisConfig {
///
///
bool
use_gpu
()
const
{
return
use_gpu_
;
}
bool
use_gpu
()
const
{
return
use_gpu_
;
}
///
///
/// \brief A boolean state telling whether the XPU is turned on.
///
/// \return bool Whether the XPU is turned on.
///
bool
use_xpu
()
const
{
return
use_xpu_
;
}
///
/// \brief Get the GPU device id.
///
/// \return int The GPU device id.
///
int
gpu_device_id
()
const
{
return
gpu_device_id_
;
}
///
/// \brief Get the GPU device id.
/// \brief Get the GPU device id.
///
///
/// \return int The GPU device id.
/// \return int The GPU device id.
///
///
int
gpu_device_id
()
const
{
return
device_id_
;
}
int
xpu_device_id
()
const
{
return
xpu_
device_id_
;
}
///
///
/// \brief Get the initial size in MB of the GPU memory pool.
/// \brief Get the initial size in MB of the GPU memory pool.
///
///
...
@@ -579,7 +591,8 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -579,7 +591,8 @@ struct PD_INFER_DECL AnalysisConfig {
// GPU related.
// GPU related.
bool
use_gpu_
{
false
};
bool
use_gpu_
{
false
};
int
device_id_
{
0
};
int
gpu_device_id_
{
0
};
int
xpu_device_id_
{
0
};
uint64_t
memory_pool_init_size_mb_
{
100
};
// initial size is 100MB.
uint64_t
memory_pool_init_size_mb_
{
100
};
// initial size is 100MB.
bool
use_cudnn_
{
false
};
bool
use_cudnn_
{
false
};
...
...
paddle/fluid/inference/api/paddle_api.h
浏览文件 @
d199edd8
...
@@ -161,7 +161,7 @@ struct PD_INFER_DECL PaddleTensor {
...
@@ -161,7 +161,7 @@ struct PD_INFER_DECL PaddleTensor {
std
::
vector
<
std
::
vector
<
size_t
>>
lod
;
///< Tensor+LoD equals LoDTensor
std
::
vector
<
std
::
vector
<
size_t
>>
lod
;
///< Tensor+LoD equals LoDTensor
};
};
enum
class
PaddlePlace
{
kUNK
=
-
1
,
kCPU
,
kGPU
};
enum
class
PaddlePlace
{
kUNK
=
-
1
,
kCPU
,
kGPU
,
kXPU
};
/// \brief Represents an n-dimensional array of values.
/// \brief Represents an n-dimensional array of values.
/// The ZeroCopyTensor is used to store the input or output of the network.
/// The ZeroCopyTensor is used to store the input or output of the network.
...
@@ -360,6 +360,7 @@ class PD_INFER_DECL PaddlePredictor {
...
@@ -360,6 +360,7 @@ class PD_INFER_DECL PaddlePredictor {
struct
PD_INFER_DECL
NativeConfig
:
public
PaddlePredictor
::
Config
{
struct
PD_INFER_DECL
NativeConfig
:
public
PaddlePredictor
::
Config
{
NativeConfig
();
NativeConfig
();
/// GPU related fields.
/// GPU related fields.
bool
use_xpu
{
false
};
bool
use_gpu
{
false
};
bool
use_gpu
{
false
};
int
device
{
0
};
int
device
{
0
};
float
fraction_of_gpu_memory
{
float
fraction_of_gpu_memory
{
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
d199edd8
...
@@ -140,11 +140,16 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
...
@@ -140,11 +140,16 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \return A bool variable implying whether we are in gpu mode.
/// \return A bool variable implying whether we are in gpu mode.
bool
use_gpu
()
const
{
return
use_gpu_
;
}
bool
use_gpu
()
const
{
return
use_gpu_
;
}
/// \brief Check if we are using xpu.
/// \return A bool variable implying whether we are in xpu mode.
bool
use_xpu
()
const
{
return
use_xpu_
;
}
/// \brief Default destructor.
/// \brief Default destructor.
virtual
~
PassStrategy
()
=
default
;
virtual
~
PassStrategy
()
=
default
;
protected:
protected:
/// \cond Protected
/// \cond Protected
bool
use_xpu_
{
false
};
bool
use_gpu_
{
false
};
bool
use_gpu_
{
false
};
bool
use_mkldnn_
{
false
};
bool
use_mkldnn_
{
false
};
/// \endcond
/// \endcond
...
@@ -226,6 +231,14 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
...
@@ -226,6 +231,14 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
/// \endcond
/// \endcond
};
};
/// \class XpuPassStrategy
/// \brief The XPU passes controller, it is used in AnalysisPredictor with XPU
/// mode.
class
PD_INFER_DECL
XpuPassStrategy
final
:
public
PassStrategy
{
public:
XpuPassStrategy
()
:
PassStrategy
({})
{}
};
/// \brief List of tensorRT subgraph passes.
/// \brief List of tensorRT subgraph passes.
PD_INFER_DECL
extern
const
std
::
vector
<
std
::
string
>
kTRTSubgraphPasses
;
PD_INFER_DECL
extern
const
std
::
vector
<
std
::
string
>
kTRTSubgraphPasses
;
...
...
paddle/fluid/inference/capi/paddle_c_api.h
浏览文件 @
d199edd8
...
@@ -165,12 +165,19 @@ PADDLE_CAPI_EXPORT extern void PD_EnableUseGpu(PD_AnalysisConfig* config,
...
@@ -165,12 +165,19 @@ PADDLE_CAPI_EXPORT extern void PD_EnableUseGpu(PD_AnalysisConfig* config,
int
memory_pool_init_size_mb
,
int
memory_pool_init_size_mb
,
int
device_id
);
int
device_id
);
PADDLE_CAPI_EXPORT
extern
void
PD_EnableXpu
(
PD_AnalysisConfig
*
config
,
int
l3_workspace_size
);
PADDLE_CAPI_EXPORT
extern
void
PD_DisableGpu
(
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
void
PD_DisableGpu
(
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
bool
PD_UseGpu
(
const
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
bool
PD_UseGpu
(
const
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
bool
PD_UseXpu
(
const
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
int
PD_GpuDeviceId
(
const
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
int
PD_GpuDeviceId
(
const
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
int
PD_XpuDeviceId
(
const
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
int
PD_MemoryPoolInitSizeMb
(
PADDLE_CAPI_EXPORT
extern
int
PD_MemoryPoolInitSizeMb
(
const
PD_AnalysisConfig
*
config
);
const
PD_AnalysisConfig
*
config
);
...
...
paddle/fluid/inference/capi/pd_config.cc
浏览文件 @
d199edd8
...
@@ -111,6 +111,14 @@ void PD_EnableUseGpu(PD_AnalysisConfig* config, int memory_pool_init_size_mb,
...
@@ -111,6 +111,14 @@ void PD_EnableUseGpu(PD_AnalysisConfig* config, int memory_pool_init_size_mb,
device_id
);
device_id
);
}
}
void
PD_EnableXpu
(
PD_AnalysisConfig
*
config
,
int
l3_workspace_size
)
{
PADDLE_ENFORCE_NOT_NULL
(
config
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"The pointer of analysis configuration shouldn't be nullptr"
));
config
->
config
.
EnableXpu
(
l3_workspace_size
);
}
void
PD_DisableGpu
(
PD_AnalysisConfig
*
config
)
{
void
PD_DisableGpu
(
PD_AnalysisConfig
*
config
)
{
PADDLE_ENFORCE_NOT_NULL
(
PADDLE_ENFORCE_NOT_NULL
(
config
,
config
,
...
@@ -127,6 +135,14 @@ bool PD_UseGpu(const PD_AnalysisConfig* config) {
...
@@ -127,6 +135,14 @@ bool PD_UseGpu(const PD_AnalysisConfig* config) {
return
config
->
config
.
use_gpu
();
return
config
->
config
.
use_gpu
();
}
}
bool
PD_UseXpu
(
const
PD_AnalysisConfig
*
config
)
{
PADDLE_ENFORCE_NOT_NULL
(
config
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"The pointer of analysis configuration shouldn't be nullptr"
));
return
config
->
config
.
use_xpu
();
}
int
PD_GpuDeviceId
(
const
PD_AnalysisConfig
*
config
)
{
int
PD_GpuDeviceId
(
const
PD_AnalysisConfig
*
config
)
{
PADDLE_ENFORCE_NOT_NULL
(
PADDLE_ENFORCE_NOT_NULL
(
config
,
config
,
...
@@ -135,6 +151,14 @@ int PD_GpuDeviceId(const PD_AnalysisConfig* config) {
...
@@ -135,6 +151,14 @@ int PD_GpuDeviceId(const PD_AnalysisConfig* config) {
return
config
->
config
.
gpu_device_id
();
return
config
->
config
.
gpu_device_id
();
}
}
int
PD_XpuDeviceId
(
const
PD_AnalysisConfig
*
config
)
{
PADDLE_ENFORCE_NOT_NULL
(
config
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"The pointer of analysis configuration shouldn't be nullptr"
));
return
config
->
config
.
xpu_device_id
();
}
int
PD_MemoryPoolInitSizeMb
(
const
PD_AnalysisConfig
*
config
)
{
int
PD_MemoryPoolInitSizeMb
(
const
PD_AnalysisConfig
*
config
)
{
PADDLE_ENFORCE_NOT_NULL
(
PADDLE_ENFORCE_NOT_NULL
(
config
,
config
,
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
d199edd8
...
@@ -499,6 +499,9 @@ if(WITH_GPU AND TENSORRT_FOUND)
...
@@ -499,6 +499,9 @@ if(WITH_GPU AND TENSORRT_FOUND)
inference_analysis_test
(
test_analyzer_capi_gpu SRCS analyzer_capi_gpu_tester.cc
inference_analysis_test
(
test_analyzer_capi_gpu SRCS analyzer_capi_gpu_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
paddle_fluid_c
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
paddle_fluid_c
ARGS --infer_model=
${
TRT_MODEL_INSTALL_DIR
}
/trt_inference_test_models
)
ARGS --infer_model=
${
TRT_MODEL_INSTALL_DIR
}
/trt_inference_test_models
)
inference_analysis_test
(
test_analyzer_capi_xpu SRCS analyzer_capi_xpu_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
paddle_fluid_c
ARGS --infer_model=
${
TRT_MODEL_INSTALL_DIR
}
/trt_inference_test_models
)
set
(
TRT_MODEL_QUANT_RESNET_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/small_quant_model"
)
set
(
TRT_MODEL_QUANT_RESNET_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/small_quant_model"
)
if
(
NOT EXISTS
${
TRT_MODEL_QUANT_RESNET_DIR
}
/small_quant_model.tgz
)
if
(
NOT EXISTS
${
TRT_MODEL_QUANT_RESNET_DIR
}
/small_quant_model.tgz
)
...
...
paddle/fluid/inference/tests/api/analyzer_capi_xpu_tester.cc
0 → 100644
浏览文件 @
d199edd8
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <string>
#include <vector>
#include "paddle/fluid/inference/capi/paddle_c_api.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
#ifdef PADDLE_WITH_XPU
TEST
(
PD_AnalysisConfig
,
use_xpu
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/mobilenet"
;
PD_AnalysisConfig
*
config
=
PD_NewAnalysisConfig
();
PD_SwitchUseFeedFetchOps
(
config
,
false
);
PD_SwitchSpecifyInputNames
(
config
,
true
);
PD_SwitchIrDebug
(
config
,
true
);
PD_SetModel
(
config
,
model_dir
.
c_str
(),
nullptr
);
PD_SetOptimCacheDir
(
config
,
(
FLAGS_infer_model
+
"/OptimCacheDir"
).
c_str
());
const
char
*
model_dir_
=
PD_ModelDir
(
config
);
LOG
(
INFO
)
<<
model_dir_
;
PD_EnableXpu
(
config
,
0xfffc00
);
bool
use_xpu
=
PD_UseXpu
(
config
);
CHECK
(
use_xpu
)
<<
"NO"
;
int
device
=
PD_XpuDeviceId
(
config
);
CHECK
(
0
==
device
)
<<
"NO"
;
PD_SwitchIrOptim
(
config
,
true
);
bool
ir_optim
=
PD_IrOptim
(
config
);
CHECK
(
ir_optim
)
<<
"NO"
;
PD_EnableMemoryOptim
(
config
);
bool
memory_optim_enable
=
PD_MemoryOptimEnabled
(
config
);
CHECK
(
memory_optim_enable
)
<<
"NO"
;
PD_EnableProfile
(
config
);
bool
profiler_enable
=
PD_ProfileEnabled
(
config
);
CHECK
(
profiler_enable
)
<<
"NO"
;
PD_SetInValid
(
config
);
bool
is_valid
=
PD_IsValid
(
config
);
CHECK
(
!
is_valid
)
<<
"NO"
;
PD_DeleteAnalysisConfig
(
config
);
}
#endif
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/lite_mul_model_test.cc
浏览文件 @
d199edd8
...
@@ -58,6 +58,24 @@ int test_main(const AnalysisConfig& config, Barrier* barrier = nullptr) {
...
@@ -58,6 +58,24 @@ int test_main(const AnalysisConfig& config, Barrier* barrier = nullptr) {
return
0
;
return
0
;
}
}
#ifdef PADDLE_WITH_XPU
TEST
(
AnalysisPredictor
,
native_xpu
)
{
AnalysisConfig
config
;
config
.
EnableXpu
();
config
.
SetModel
(
FLAGS_infer_model
+
"/"
+
"mul_model"
);
test_main
(
config
);
}
#endif
#ifdef LITE_SUBGRAPH_WITH_XPU
TEST
(
AnalysisPredictor
,
lite_xpu
)
{
AnalysisConfig
config
;
config
.
EnableXpu
();
config
.
SetModel
(
FLAGS_infer_model
+
"/"
+
"mul_model"
);
config
.
EnableLiteEngine
(
paddle
::
AnalysisConfig
::
Precision
::
kFloat32
);
}
#endif
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
TEST
(
AnalysisPredictor
,
thread_local_stream
)
{
TEST
(
AnalysisPredictor
,
thread_local_stream
)
{
const
size_t
thread_num
=
5
;
const
size_t
thread_num
=
5
;
...
...
paddle/fluid/inference/tests/test_helper.h
浏览文件 @
d199edd8
...
@@ -27,6 +27,18 @@ limitations under the License. */
...
@@ -27,6 +27,18 @@ limitations under the License. */
DECLARE_bool
(
use_mkldnn
);
DECLARE_bool
(
use_mkldnn
);
namespace
paddle
{
bool
gpu_place_used
(
const
paddle
::
PaddlePlace
&
place
)
{
return
place
==
paddle
::
PaddlePlace
::
kGPU
;
}
bool
xpu_place_used
(
const
paddle
::
PaddlePlace
&
place
)
{
return
place
==
paddle
::
PaddlePlace
::
kXPU
;
}
bool
cpu_place_used
(
const
paddle
::
PaddlePlace
&
place
)
{
return
place
==
paddle
::
PaddlePlace
::
kCPU
;
}
}
// namespace paddle
template
<
typename
T
>
template
<
typename
T
>
void
SetupTensor
(
paddle
::
framework
::
LoDTensor
*
input
,
void
SetupTensor
(
paddle
::
framework
::
LoDTensor
*
input
,
paddle
::
framework
::
DDim
dims
,
T
lower
,
T
upper
)
{
paddle
::
framework
::
DDim
dims
,
T
lower
,
T
upper
)
{
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
浏览文件 @
d199edd8
...
@@ -197,12 +197,12 @@ void Free<platform::XPUPlace>(const platform::XPUPlace &place, void *p,
...
@@ -197,12 +197,12 @@ void Free<platform::XPUPlace>(const platform::XPUPlace &place, void *p,
template
<
>
template
<
>
uint64_t
Release
<
platform
::
XPUPlace
>
(
const
platform
::
XPUPlace
&
place
)
{
uint64_t
Release
<
platform
::
XPUPlace
>
(
const
platform
::
XPUPlace
&
place
)
{
#ifdef PADDLE_WITH_XPU
#ifdef PADDLE_WITH_XPU
PADDLE_THROW
(
LOG
(
WARNING
)
<<
"Release XPU pool is not supported now, no action here."
;
platform
::
errors
::
PermissionDenied
(
"Release XPU pool is not supported."
));
#else
#else
PADDLE_THROW
(
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"'XPUPlace' is not supported."
));
platform
::
errors
::
PermissionDenied
(
"'XPUPlace' is not supported."
));
#endif
#endif
return
-
1
;
}
}
template
<
>
template
<
>
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
d199edd8
...
@@ -58,7 +58,7 @@ cc_library(place SRCS place.cc DEPS enforce boost)
...
@@ -58,7 +58,7 @@ cc_library(place SRCS place.cc DEPS enforce boost)
cc_test
(
place_test SRCS place_test.cc DEPS place glog gflags
)
cc_test
(
place_test SRCS place_test.cc DEPS place glog gflags
)
if
(
WITH_XPU
)
if
(
WITH_XPU
)
cc_library
(
xpu_info SRCS xpu_info.cc DEPS gflags glog enforce
)
cc_library
(
xpu_info SRCS xpu_info.cc DEPS gflags glog enforce
xpulib
)
endif
()
endif
()
add_subdirectory
(
dynload
)
add_subdirectory
(
dynload
)
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
d199edd8
...
@@ -369,7 +369,8 @@ void BindPaddlePlace(py::module *m) {
...
@@ -369,7 +369,8 @@ void BindPaddlePlace(py::module *m) {
py
::
enum_
<
PaddlePlace
>
(
*
m
,
"PaddlePlace"
)
py
::
enum_
<
PaddlePlace
>
(
*
m
,
"PaddlePlace"
)
.
value
(
"UNK"
,
PaddlePlace
::
kUNK
)
.
value
(
"UNK"
,
PaddlePlace
::
kUNK
)
.
value
(
"CPU"
,
PaddlePlace
::
kCPU
)
.
value
(
"CPU"
,
PaddlePlace
::
kCPU
)
.
value
(
"GPU"
,
PaddlePlace
::
kGPU
);
.
value
(
"GPU"
,
PaddlePlace
::
kGPU
)
.
value
(
"XPU"
,
PaddlePlace
::
kXPU
);
}
}
void
BindPaddlePredictor
(
py
::
module
*
m
)
{
void
BindPaddlePredictor
(
py
::
module
*
m
)
{
...
@@ -398,6 +399,7 @@ void BindNativeConfig(py::module *m) {
...
@@ -398,6 +399,7 @@ void BindNativeConfig(py::module *m) {
py
::
class_
<
NativeConfig
,
PaddlePredictor
::
Config
>
(
*
m
,
"NativeConfig"
)
py
::
class_
<
NativeConfig
,
PaddlePredictor
::
Config
>
(
*
m
,
"NativeConfig"
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<>
())
.
def_readwrite
(
"use_gpu"
,
&
NativeConfig
::
use_gpu
)
.
def_readwrite
(
"use_gpu"
,
&
NativeConfig
::
use_gpu
)
.
def_readwrite
(
"use_xpu"
,
&
NativeConfig
::
use_xpu
)
.
def_readwrite
(
"device"
,
&
NativeConfig
::
device
)
.
def_readwrite
(
"device"
,
&
NativeConfig
::
device
)
.
def_readwrite
(
"fraction_of_gpu_memory"
,
.
def_readwrite
(
"fraction_of_gpu_memory"
,
&
NativeConfig
::
fraction_of_gpu_memory
)
&
NativeConfig
::
fraction_of_gpu_memory
)
...
@@ -459,7 +461,9 @@ void BindAnalysisConfig(py::module *m) {
...
@@ -459,7 +461,9 @@ void BindAnalysisConfig(py::module *m) {
py
::
arg
(
"l3_workspace_size"
))
py
::
arg
(
"l3_workspace_size"
))
.
def
(
"disable_gpu"
,
&
AnalysisConfig
::
DisableGpu
)
.
def
(
"disable_gpu"
,
&
AnalysisConfig
::
DisableGpu
)
.
def
(
"use_gpu"
,
&
AnalysisConfig
::
use_gpu
)
.
def
(
"use_gpu"
,
&
AnalysisConfig
::
use_gpu
)
.
def
(
"use_xpu"
,
&
AnalysisConfig
::
use_xpu
)
.
def
(
"gpu_device_id"
,
&
AnalysisConfig
::
gpu_device_id
)
.
def
(
"gpu_device_id"
,
&
AnalysisConfig
::
gpu_device_id
)
.
def
(
"xpu_device_id"
,
&
AnalysisConfig
::
xpu_device_id
)
.
def
(
"memory_pool_init_size_mb"
,
.
def
(
"memory_pool_init_size_mb"
,
&
AnalysisConfig
::
memory_pool_init_size_mb
)
&
AnalysisConfig
::
memory_pool_init_size_mb
)
.
def
(
"fraction_of_gpu_memory_for_pool"
,
.
def
(
"fraction_of_gpu_memory_for_pool"
,
...
...
python/paddle/fluid/tests/book/test_word2vec.py
浏览文件 @
d199edd8
...
@@ -26,7 +26,20 @@ import sys
...
@@ -26,7 +26,20 @@ import sys
paddle
.
enable_static
()
paddle
.
enable_static
()
def
train
(
use_cuda
,
is_sparse
,
is_parallel
,
save_dirname
,
is_local
=
True
):
def
get_place
(
target
):
if
target
==
"cuda"
:
return
fluid
.
CUDAPlace
(
0
)
elif
target
==
"xpu"
:
return
fluid
.
XPUPlace
(
0
)
elif
target
==
"cpu"
:
return
fluid
.
CPUPlace
()
else
:
raise
ValueError
(
"Target `{0}` is not on the support list: `cuda`, `xpu` and `cpu`."
.
format
(
target
))
def
train
(
target
,
is_sparse
,
is_parallel
,
save_dirname
,
is_local
=
True
):
PASS_NUM
=
100
PASS_NUM
=
100
EMBED_SIZE
=
32
EMBED_SIZE
=
32
HIDDEN_SIZE
=
256
HIDDEN_SIZE
=
256
...
@@ -93,7 +106,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
...
@@ -93,7 +106,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imikolov
.
train
(
word_dict
,
N
),
BATCH_SIZE
)
paddle
.
dataset
.
imikolov
.
train
(
word_dict
,
N
),
BATCH_SIZE
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
(
)
place
=
get_place
(
target
)
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
first_word
,
second_word
,
third_word
,
forth_word
,
next_word
],
feed_list
=
[
first_word
,
second_word
,
third_word
,
forth_word
,
next_word
],
...
@@ -143,13 +156,12 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
...
@@ -143,13 +156,12 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
train_loop
(
t
.
get_trainer_program
())
train_loop
(
t
.
get_trainer_program
())
def
infer
(
use_cuda
,
save_dirname
=
None
):
def
infer
(
target
,
save_dirname
=
None
):
if
save_dirname
is
None
:
if
save_dirname
is
None
:
return
return
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
(
)
place
=
get_place
(
target
)
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
inference_scope
=
fluid
.
core
.
Scope
()
inference_scope
=
fluid
.
core
.
Scope
()
with
fluid
.
scope_guard
(
inference_scope
):
with
fluid
.
scope_guard
(
inference_scope
):
# Use fluid.io.load_inference_model to obtain the inference program desc,
# Use fluid.io.load_inference_model to obtain the inference program desc,
...
@@ -211,10 +223,12 @@ def infer(use_cuda, save_dirname=None):
...
@@ -211,10 +223,12 @@ def infer(use_cuda, save_dirname=None):
infer_config
=
fluid
.
core
.
NativeConfig
()
infer_config
=
fluid
.
core
.
NativeConfig
()
infer_config
.
model_dir
=
'word2vec.inference.model'
infer_config
.
model_dir
=
'word2vec.inference.model'
i
nfer_config
.
use_gpu
=
use_cuda
i
f
target
==
"cuda"
:
if
use_cuda
:
infer_config
.
use_gpu
=
True
infer_config
.
device
=
0
infer_config
.
device
=
0
infer_config
.
fraction_of_gpu_memory
=
0.15
infer_config
.
fraction_of_gpu_memory
=
0.15
elif
target
==
"xpu"
:
infer_config
.
use_xpu
=
True
compiled_program
=
fluid
.
compiler
.
CompiledProgram
(
inference_program
)
compiled_program
=
fluid
.
compiler
.
CompiledProgram
(
inference_program
)
compiled_program
.
_with_inference_optimize
(
infer_config
)
compiled_program
.
_with_inference_optimize
(
infer_config
)
assert
compiled_program
.
_is_inference
is
True
assert
compiled_program
.
_is_inference
is
True
...
@@ -222,11 +236,13 @@ def infer(use_cuda, save_dirname=None):
...
@@ -222,11 +236,13 @@ def infer(use_cuda, save_dirname=None):
np_data
=
np
.
array
(
results
[
0
])
np_data
=
np
.
array
(
results
[
0
])
infer_out
=
infer_outputs
[
0
].
data
.
float_data
()
infer_out
=
infer_outputs
[
0
].
data
.
float_data
()
for
a
,
b
in
zip
(
np_data
[
0
],
infer_out
):
for
a
,
b
in
zip
(
np_data
[
0
],
infer_out
):
assert
np
.
isclose
(
a
,
b
),
"a: {}, b: {}"
.
format
(
a
,
b
)
assert
np
.
isclose
(
a
,
b
,
rtol
=
5e-5
),
"a: {}, b: {}"
.
format
(
a
,
b
)
def
main
(
use_cuda
,
is_sparse
,
is_parallel
):
def
main
(
target
,
is_sparse
,
is_parallel
):
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
if
target
==
"cuda"
and
not
fluid
.
core
.
is_compiled_with_cuda
():
return
if
target
==
"xpu"
and
not
fluid
.
core
.
is_compiled_with_xpu
():
return
return
if
not
is_parallel
:
if
not
is_parallel
:
...
@@ -234,8 +250,13 @@ def main(use_cuda, is_sparse, is_parallel):
...
@@ -234,8 +250,13 @@ def main(use_cuda, is_sparse, is_parallel):
else
:
else
:
save_dirname
=
None
save_dirname
=
None
train
(
use_cuda
,
is_sparse
,
is_parallel
,
save_dirname
)
if
target
==
"xpu"
:
infer
(
use_cuda
,
save_dirname
)
# This model cannot be trained with xpu temporarily,
# so only inference is turned on.
train
(
"cpu"
,
is_sparse
,
is_parallel
,
save_dirname
)
else
:
train
(
target
,
is_sparse
,
is_parallel
,
save_dirname
)
infer
(
target
,
save_dirname
)
FULL_TEST
=
os
.
getenv
(
'FULL_TEST'
,
FULL_TEST
=
os
.
getenv
(
'FULL_TEST'
,
...
@@ -247,8 +268,8 @@ class W2VTest(unittest.TestCase):
...
@@ -247,8 +268,8 @@ class W2VTest(unittest.TestCase):
pass
pass
def
inject_test_method
(
use_cuda
,
is_sparse
,
is_parallel
):
def
inject_test_method
(
target
,
is_sparse
,
is_parallel
):
fn_name
=
"test_{0}_{1}_{2}"
.
format
(
"cuda"
if
use_cuda
else
"cpu"
,
"sparse"
fn_name
=
"test_{0}_{1}_{2}"
.
format
(
target
,
"sparse"
if
is_sparse
else
"dense"
,
"parallel"
if
is_sparse
else
"dense"
,
"parallel"
if
is_parallel
else
"normal"
)
if
is_parallel
else
"normal"
)
...
@@ -259,11 +280,10 @@ def inject_test_method(use_cuda, is_sparse, is_parallel):
...
@@ -259,11 +280,10 @@ def inject_test_method(use_cuda, is_sparse, is_parallel):
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
program_guard
(
prog
,
startup_prog
):
with
fluid
.
program_guard
(
prog
,
startup_prog
):
main
(
main
(
use_cuda
=
use_cuda
,
target
=
target
,
is_sparse
=
is_sparse
,
is_parallel
=
is_parallel
)
is_sparse
=
is_sparse
,
is_parallel
=
is_parallel
)
if
(
not
fluid
.
core
.
is_compiled_with_cuda
()
or
use_cuda
)
and
is_sparse
:
if
(
not
fluid
.
core
.
is_compiled_with_cuda
()
or
target
==
"cuda"
)
and
is_sparse
:
fn
=
__impl__
fn
=
__impl__
else
:
else
:
# skip the other test when on CI server
# skip the other test when on CI server
...
@@ -273,10 +293,10 @@ def inject_test_method(use_cuda, is_sparse, is_parallel):
...
@@ -273,10 +293,10 @@ def inject_test_method(use_cuda, is_sparse, is_parallel):
setattr
(
W2VTest
,
fn_name
,
fn
)
setattr
(
W2VTest
,
fn_name
,
fn
)
for
use_cuda
in
(
False
,
True
):
for
target
in
(
"cuda"
,
"cpu"
,
"xpu"
):
for
is_sparse
in
(
False
,
True
):
for
is_sparse
in
(
False
,
True
):
for
is_parallel
in
(
False
,
):
for
is_parallel
in
(
False
,
):
inject_test_method
(
use_cuda
,
is_sparse
,
is_parallel
)
inject_test_method
(
target
,
is_sparse
,
is_parallel
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录