Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
d199edd8
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d199edd8
编写于
2月 04, 2021
作者:
石
石晓伟
提交者:
GitHub
2月 04, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support xpu with analysis predictor, test=develop (#30832) (#30863)
上级
d1ae7b98
变更
19
显示空白变更内容
内联
并排
Showing
19 changed file
with
379 addition
and
106 deletion
+379
-106
cmake/external/xpu.cmake
cmake/external/xpu.cmake
+52
-37
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+34
-15
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+36
-3
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+0
-1
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+20
-1
paddle/fluid/inference/api/api_impl_tester.cc
paddle/fluid/inference/api/api_impl_tester.cc
+33
-21
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+15
-2
paddle/fluid/inference/api/paddle_api.h
paddle/fluid/inference/api/paddle_api.h
+2
-1
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+13
-0
paddle/fluid/inference/capi/paddle_c_api.h
paddle/fluid/inference/capi/paddle_c_api.h
+7
-0
paddle/fluid/inference/capi/pd_config.cc
paddle/fluid/inference/capi/pd_config.cc
+24
-0
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+4
-1
paddle/fluid/inference/tests/api/analyzer_capi_xpu_tester.cc
paddle/fluid/inference/tests/api/analyzer_capi_xpu_tester.cc
+61
-0
paddle/fluid/inference/tests/api/lite_mul_model_test.cc
paddle/fluid/inference/tests/api/lite_mul_model_test.cc
+18
-0
paddle/fluid/inference/tests/test_helper.h
paddle/fluid/inference/tests/test_helper.h
+12
-0
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+2
-2
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+1
-1
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+5
-1
python/paddle/fluid/tests/book/test_word2vec.py
python/paddle/fluid/tests/book/test_word2vec.py
+40
-20
未找到文件。
cmake/external/xpu.cmake
浏览文件 @
d199edd8
...
...
@@ -5,36 +5,35 @@ endif()
INCLUDE
(
ExternalProject
)
SET
(
XPU_PROJECT
"extern_xpu"
)
if
(
WITH_AARCH64
)
if
(
NOT XPU_SDK_ROOT
)
if
(
WITH_AARCH64
)
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/aarch64/xpu_2021_01_13.tar.gz"
CACHE STRING
""
FORCE
)
elseif
(
WITH_SUNWAY
)
elseif
(
WITH_SUNWAY
)
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/sunway/xpu_2021_01_13.tar.gz"
CACHE STRING
""
FORCE
)
else
()
else
()
SET
(
XPU_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu_2021_01_13.tar.gz"
CACHE STRING
""
FORCE
)
endif
()
endif
()
SET
(
XPU_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/xpu"
)
SET
(
XPU_DOWNLOAD_DIR
"
${
XPU_SOURCE_DIR
}
/src/
${
XPU_PROJECT
}
"
)
SET
(
XPU_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu"
)
SET
(
XPU_API_INC_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu/include"
)
SET
(
XPU_LIB_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu/lib"
)
SET
(
XPU_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/xpu"
)
SET
(
XPU_DOWNLOAD_DIR
"
${
XPU_SOURCE_DIR
}
/src/
${
XPU_PROJECT
}
"
)
SET
(
XPU_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu"
)
SET
(
XPU_API_INC_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu/include"
)
SET
(
XPU_LIB_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu/lib"
)
SET
(
XPU_API_LIB_NAME
"libxpuapi.so"
)
SET
(
XPU_RT_LIB_NAME
"libxpurt.so"
)
SET
(
XPU_API_LIB
"
${
XPU_LIB_DIR
}
/
${
XPU_API_LIB_NAME
}
"
)
SET
(
XPU_RT_LIB
"
${
XPU_LIB_DIR
}
/
${
XPU_RT_LIB_NAME
}
"
)
SET
(
XPU_API_LIB_NAME
"libxpuapi.so"
)
SET
(
XPU_RT_LIB_NAME
"libxpurt.so"
)
SET
(
XPU_API_LIB
"
${
XPU_LIB_DIR
}
/
${
XPU_API_LIB_NAME
}
"
)
SET
(
XPU_RT_LIB
"
${
XPU_LIB_DIR
}
/
${
XPU_RT_LIB_NAME
}
"
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
XPU_INSTALL_DIR
}
/lib"
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
XPU_INSTALL_DIR
}
/lib"
)
INCLUDE_DIRECTORIES
(
${
XPU_API_INC_DIR
}
)
FILE
(
WRITE
${
XPU_DOWNLOAD_DIR
}
/CMakeLists.txt
FILE
(
WRITE
${
XPU_DOWNLOAD_DIR
}
/CMakeLists.txt
"PROJECT(XPU)
\n
"
"cmake_minimum_required(VERSION 3.0)
\n
"
"install(DIRECTORY xpu/include xpu/lib
\n
"
" DESTINATION
${
XPU_INSTALL_DIR
}
)
\n
"
)
ExternalProject_Add
(
ExternalProject_Add
(
${
XPU_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
PREFIX
${
XPU_SOURCE_DIR
}
...
...
@@ -45,8 +44,14 @@ ExternalProject_Add(
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
XPU_INSTALL_ROOT
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
XPU_INSTALL_ROOT
}
)
)
else
()
SET
(
XPU_API_INC_DIR
"
${
XPU_SDK_ROOT
}
/XTDK/include/"
)
SET
(
XPU_API_LIB
"
${
XPU_SDK_ROOT
}
/XTDK/shlib/libxpuapi.so"
)
SET
(
XPU_RT_LIB
"
${
XPU_SDK_ROOT
}
/XTDK/runtime/shlib/libxpurt.so"
)
endif
()
INCLUDE_DIRECTORIES
(
${
XPU_API_INC_DIR
}
)
ADD_LIBRARY
(
shared_xpuapi SHARED IMPORTED GLOBAL
)
set_property
(
TARGET shared_xpuapi PROPERTY IMPORTED_LOCATION
"
${
XPU_API_LIB
}
"
)
...
...
@@ -69,4 +74,14 @@ else(WITH_XPU_BKCL)
TARGET_LINK_LIBRARIES
(
xpulib
${
XPU_API_LIB
}
${
XPU_RT_LIB
}
)
endif
(
WITH_XPU_BKCL
)
ADD_DEPENDENCIES
(
xpulib
${
XPU_PROJECT
}
)
if
(
NOT XPU_SDK_ROOT
)
ADD_DEPENDENCIES
(
xpulib
${
XPU_PROJECT
}
)
else
()
ADD_CUSTOM_TARGET
(
extern_xpu DEPENDS xpulib
)
endif
()
# Ensure that xpu/api.h can be included without dependency errors.
file
(
GENERATE OUTPUT
${
CMAKE_CURRENT_BINARY_DIR
}
/.xpu_headers_dummy.cc CONTENT
""
)
add_library
(
xpu_headers_dummy STATIC
${
CMAKE_CURRENT_BINARY_DIR
}
/.xpu_headers_dummy.cc
)
add_dependencies
(
xpu_headers_dummy extern_xpu
)
link_libraries
(
xpu_headers_dummy
)
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
d199edd8
...
...
@@ -33,6 +33,8 @@ PassStrategy *AnalysisConfig::pass_builder() const {
if
(
use_gpu_
)
{
LOG
(
INFO
)
<<
"Create GPU IR passes"
;
pass_builder_
.
reset
(
new
GpuPassStrategy
);
}
else
if
(
use_xpu_
)
{
pass_builder_
.
reset
(
new
XpuPassStrategy
);
}
else
{
LOG
(
INFO
)
<<
"Create CPU IR passes"
;
pass_builder_
.
reset
(
new
CpuPassStrategy
);
...
...
@@ -73,7 +75,7 @@ void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb,
use_gpu_
=
true
;
memory_pool_init_size_mb_
=
memory_pool_init_size_mb
;
FLAGS_initial_gpu_memory_in_mb
=
memory_pool_init_size_mb_
;
device_id_
=
device_id
;
gpu_
device_id_
=
device_id
;
#else
LOG
(
ERROR
)
<<
"Please compile with gpu to EnableGpu()"
;
use_gpu_
=
false
;
...
...
@@ -115,7 +117,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// GPU related.
CP_MEMBER
(
use_gpu_
);
CP_MEMBER
(
use_cudnn_
);
CP_MEMBER
(
device_id_
);
CP_MEMBER
(
gpu_device_id_
);
CP_MEMBER
(
xpu_device_id_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
CP_MEMBER
(
enable_memory_optim_
);
...
...
@@ -174,8 +177,14 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
thread_local_stream_
);
if
(
use_gpu_
)
{
PADDLE_ENFORCE_EQ
(
use_xpu_
,
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
GpuPassStrategy
(
*
static_cast
<
GpuPassStrategy
*>
(
other
.
pass_builder
())));
}
else
if
(
use_xpu_
)
{
pass_builder_
.
reset
(
new
XpuPassStrategy
(
*
static_cast
<
XpuPassStrategy
*>
(
other
.
pass_builder
())));
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
(
*
static_cast
<
CpuPassStrategy
*>
(
other
.
pass_builder
())));
...
...
@@ -333,6 +342,12 @@ void AnalysisConfig::Update() {
// Append after the Affine_channel_conv_fuse pass.
pass_builder
()
->
InsertPass
(
3
,
"tensorrt_subgraph_pass"
);
}
}
else
if
(
use_xpu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
XpuPassStrategy
);
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
);
}
...
...
@@ -341,7 +356,13 @@ void AnalysisConfig::Update() {
if
(
use_gpu
())
{
pass_builder_
.
reset
(
new
GpuPassStrategy
(
*
static_cast
<
GpuPassStrategy
*>
(
pass_builder_
.
get
())));
}
else
if
(
use_xpu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
XpuPassStrategy
(
*
static_cast
<
XpuPassStrategy
*>
(
pass_builder_
.
get
())));
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
(
*
static_cast
<
CpuPassStrategy
*>
(
pass_builder_
.
get
())));
...
...
@@ -420,19 +441,16 @@ void AnalysisConfig::Update() {
}
if
(
use_xpu_
)
{
#ifndef LITE_SUBGRAPH_WITH_XPU
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"You tried to use an XPU device, but Paddle was not compiled "
"with XPU-runtime."
));
#endif
if
(
!
use_lite_
)
{
LOG
(
WARNING
)
<<
"Because XPU currently only works in Paddle-Lite "
"subgraph mode, please make sure you have enabled it."
;
}
#if (defined LITE_SUBGRAPH_WITH_XPU) || (defined PADDLE_WITH_XPU)
PADDLE_ENFORCE_EQ
(
use_gpu_
,
false
,
platform
::
errors
::
Unavailable
(
"Currently, XPU and GPU cannot be enabled in the "
"same analysis configuration."
));
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"You tried to use an XPU device, but Paddle was not compiled "
"with XPU-runtime."
));
#endif
}
if
(
ir_debug_
)
{
...
...
@@ -448,7 +466,8 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
use_gpu_
;
ss
<<
use_fc_padding_
;
ss
<<
device_id_
;
ss
<<
gpu_device_id_
;
ss
<<
xpu_device_id_
;
ss
<<
memory_pool_init_size_mb_
;
ss
<<
use_tensorrt_
;
...
...
@@ -507,7 +526,7 @@ float AnalysisConfig::fraction_of_gpu_memory_for_pool() const {
// Get the GPU memory details and calculate the fraction of memory for the
// GPU memory pool.
size_t
gpu_total
,
gpu_available
;
platform
::
SetDeviceId
(
device_id_
);
platform
::
SetDeviceId
(
gpu_
device_id_
);
platform
::
GpuMemoryUsage
(
&
gpu_available
,
&
gpu_total
);
double
total_gpu_memory
=
gpu_total
/
1024.
/
1024.
;
float
fraction_of_gpu_memory
=
...
...
@@ -548,7 +567,7 @@ NativeConfig AnalysisConfig::ToNativeConfig() const {
config
.
prog_file
=
prog_file_
;
config
.
param_file
=
params_file_
;
config
.
use_gpu
=
use_gpu_
;
config
.
device
=
device_id_
;
config
.
device
=
gpu_
device_id_
;
config
.
fraction_of_gpu_memory
=
fraction_of_gpu_memory_for_pool
();
config
.
specify_input_name
=
specify_input_name_
;
return
config
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
d199edd8
...
...
@@ -103,7 +103,10 @@ bool PaddleTensorToLoDTensor(const PaddleTensor &pt, framework::LoDTensor *t,
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std
::
memcpy
(
static_cast
<
void
*>
(
input_ptr
),
pt
.
data
.
data
(),
pt
.
data
.
length
());
}
else
{
}
else
if
(
platform
::
is_gpu_place
(
place
))
{
PADDLE_ENFORCE_EQ
(
platform
::
is_xpu_place
(
place
),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
#ifdef PADDLE_WITH_CUDA
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
...
...
@@ -116,6 +119,18 @@ bool PaddleTensorToLoDTensor(const PaddleTensor &pt, framework::LoDTensor *t,
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Fatal
(
"Not compile with CUDA, should not reach here."
));
#endif
}
else
if
(
platform
::
is_xpu_place
(
place
))
{
#ifdef PADDLE_WITH_XPU
auto
dst_xpu_place
=
BOOST_GET_CONST
(
platform
::
XPUPlace
,
place
);
memory
::
Copy
(
dst_xpu_place
,
static_cast
<
void
*>
(
input_ptr
),
platform
::
CPUPlace
(),
pt
.
data
.
data
(),
pt
.
data
.
length
());
#else
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Fatal
(
"Not compile with XPU, should not reach here."
));
#endif
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"The analysis predictor supports CPU, GPU and XPU now."
));
}
// TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
framework
::
LoD
lod
;
...
...
@@ -182,6 +197,12 @@ bool AnalysisPredictor::PrepareScope(
++
dev_id
)
{
memory
::
Release
(
platform
::
CUDAPlace
(
dev_id
));
}
#endif
#ifdef PADDLE_WITH_XPU
for
(
int
dev_id
=
0
;
dev_id
<
paddle
::
platform
::
GetXPUDeviceCount
();
++
dev_id
)
{
memory
::
Release
(
platform
::
XPUPlace
(
dev_id
));
}
#endif
memory
::
Release
(
platform
::
CPUPlace
());
});
...
...
@@ -219,7 +240,9 @@ bool AnalysisPredictor::PrepareProgram(
}
bool
AnalysisPredictor
::
CreateExecutor
()
{
if
(
config_
.
use_gpu
())
{
status_use_gpu_
=
true
;
PADDLE_ENFORCE_EQ
(
config_
.
use_xpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
gpu_device_id
());
#ifdef PADDLE_WITH_CUDA
if
(
config_
.
thread_local_stream_enabled
())
{
...
...
@@ -230,6 +253,8 @@ bool AnalysisPredictor::CreateExecutor() {
ctx
->
ResetThreadContext
(
platform
::
stream
::
Priority
::
kNormal
);
}
#endif
}
else
if
(
config_
.
use_xpu
())
{
place_
=
paddle
::
platform
::
XPUPlace
(
config_
.
xpu_device_id
());
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
}
...
...
@@ -734,11 +759,16 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
res
->
SetName
(
name
);
if
(
platform
::
is_cpu_place
(
place_
))
{
res
->
SetPlace
(
PaddlePlace
::
kCPU
);
}
else
if
(
platform
::
is_xpu_place
(
place_
))
{
PADDLE_ENFORCE_EQ
(
config_
.
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
auto
xpu_place
=
BOOST_GET_CONST
(
platform
::
XPUPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kXPU
,
xpu_place
.
GetDeviceId
());
}
else
{
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kGPU
,
gpu_place
.
GetDeviceId
());
}
return
res
;
}
...
...
@@ -755,6 +785,9 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
res
->
SetName
(
name
);
if
(
platform
::
is_cpu_place
(
place_
))
{
res
->
SetPlace
(
PaddlePlace
::
kCPU
);
}
else
if
(
platform
::
is_xpu_place
(
place_
))
{
auto
xpu_place
=
BOOST_GET_CONST
(
platform
::
XPUPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kXPU
,
xpu_place
.
GetDeviceId
());
}
else
{
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kGPU
,
gpu_place
.
GetDeviceId
());
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
d199edd8
...
...
@@ -415,7 +415,6 @@ class AnalysisPredictor : public PaddlePredictor {
private:
// Some status here that help to determine the status inside the predictor.
bool
status_is_cloned_
{
false
};
bool
status_use_gpu_
{
false
};
};
}
// namespace paddle
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
d199edd8
...
...
@@ -80,7 +80,12 @@ bool NativePaddlePredictor::Init(
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
if
(
config_
.
use_gpu
)
{
PADDLE_ENFORCE_EQ
(
config_
.
use_xpu
,
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
}
else
if
(
config_
.
use_xpu
)
{
place_
=
paddle
::
platform
::
XPUPlace
(
config_
.
device
);
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
}
...
...
@@ -240,7 +245,11 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std
::
memcpy
(
static_cast
<
void
*>
(
input_ptr
),
inputs
[
i
].
data
.
data
(),
inputs
[
i
].
data
.
length
());
}
else
{
}
else
if
(
platform
::
is_gpu_place
(
place_
))
{
PADDLE_ENFORCE_EQ
(
platform
::
is_xpu_place
(
place_
),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
#ifdef PADDLE_WITH_CUDA
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
...
...
@@ -253,6 +262,16 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Not compile with CUDA, should not reach here."
));
#endif
}
else
{
#ifdef PADDLE_WITH_XPU
auto
dst_xpu_place
=
BOOST_GET_CONST
(
platform
::
XPUPlace
,
place_
);
memory
::
Copy
(
dst_xpu_place
,
static_cast
<
void
*>
(
input_ptr
),
platform
::
CPUPlace
(),
inputs
[
i
].
data
.
data
(),
inputs
[
i
].
data
.
length
());
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Not compile with XPU, should not reach here."
));
#endif
}
...
...
paddle/fluid/inference/api/api_impl_tester.cc
浏览文件 @
d199edd8
...
...
@@ -58,19 +58,15 @@ NativeConfig GetConfig() {
config
.
model_dir
=
FLAGS_word2vec_dirname
;
LOG
(
INFO
)
<<
"dirname "
<<
config
.
model_dir
;
config
.
fraction_of_gpu_memory
=
0.15
;
#ifdef PADDLE_WITH_CUDA
config
.
use_gpu
=
true
;
#else
config
.
use_gpu
=
false
;
#endif
config
.
device
=
0
;
return
config
;
}
void
MainWord2Vec
(
bool
use_gpu
)
{
void
MainWord2Vec
(
const
paddle
::
PaddlePlace
&
place
)
{
NativeConfig
config
=
GetConfig
();
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
paddle
::
gpu_place_used
(
place
);
config
.
use_xpu
=
paddle
::
xpu_place_used
(
place
);
framework
::
LoDTensor
first_word
,
second_word
,
third_word
,
fourth_word
;
framework
::
LoD
lod
{{
0
,
1
}};
...
...
@@ -117,11 +113,12 @@ void MainWord2Vec(bool use_gpu) {
}
}
void
MainImageClassification
(
bool
use_gpu
)
{
void
MainImageClassification
(
const
paddle
::
PaddlePlace
&
place
)
{
int
batch_size
=
2
;
bool
repeat
=
false
;
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
paddle
::
gpu_place_used
(
place
);
config
.
use_xpu
=
paddle
::
xpu_place_used
(
place
);
config
.
model_dir
=
FLAGS_book_dirname
+
"/image_classification_resnet.inference.model"
;
...
...
@@ -162,9 +159,10 @@ void MainImageClassification(bool use_gpu) {
}
}
void
MainThreadsWord2Vec
(
bool
use_gpu
)
{
void
MainThreadsWord2Vec
(
const
paddle
::
PaddlePlace
&
place
)
{
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
paddle
::
gpu_place_used
(
place
);
config
.
use_xpu
=
paddle
::
xpu_place_used
(
place
);
auto
main_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
// prepare inputs data and reference results
...
...
@@ -223,11 +221,12 @@ void MainThreadsWord2Vec(bool use_gpu) {
}
}
void
MainThreadsImageClassification
(
bool
use_gpu
)
{
void
MainThreadsImageClassification
(
const
paddle
::
PaddlePlace
&
place
)
{
constexpr
int
num_jobs
=
4
;
// each job run 1 batch
constexpr
int
batch_size
=
1
;
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
paddle
::
gpu_place_used
(
place
);
config
.
use_xpu
=
paddle
::
xpu_place_used
(
place
);
config
.
model_dir
=
FLAGS_book_dirname
+
"/image_classification_resnet.inference.model"
;
...
...
@@ -276,29 +275,42 @@ void MainThreadsImageClassification(bool use_gpu) {
}
}
TEST
(
inference_api_native
,
word2vec_cpu
)
{
MainWord2Vec
(
false
/*use_gpu*/
);
}
TEST
(
inference_api_native
,
word2vec_cpu
)
{
MainWord2Vec
(
paddle
::
PaddlePlace
::
kCPU
);
}
TEST
(
inference_api_native
,
word2vec_cpu_threads
)
{
MainThreadsWord2Vec
(
false
/*use_gpu*/
);
MainThreadsWord2Vec
(
paddle
::
PaddlePlace
::
kCPU
);
}
TEST
(
inference_api_native
,
image_classification_cpu
)
{
MainImageClassification
(
false
/*use_gpu*/
);
MainImageClassification
(
paddle
::
PaddlePlace
::
kCPU
);
}
TEST
(
inference_api_native
,
image_classification_cpu_threads
)
{
MainThreadsImageClassification
(
false
/*use_gpu*/
);
MainThreadsImageClassification
(
paddle
::
PaddlePlace
::
kCPU
);
}
#ifdef PADDLE_WITH_XPU
TEST
(
inference_api_native
,
word2vec_xpu
)
{
MainWord2Vec
(
paddle
::
PaddlePlace
::
kXPU
);
}
TEST
(
inference_api_native
,
image_classification_xpu
)
{
MainImageClassification
(
paddle
::
PaddlePlace
::
kXPU
);
}
#endif
#ifdef PADDLE_WITH_CUDA
TEST
(
inference_api_native
,
word2vec_gpu
)
{
MainWord2Vec
(
true
/*use_gpu*/
);
}
TEST
(
inference_api_native
,
word2vec_gpu
)
{
MainWord2Vec
(
paddle
::
PaddlePlace
::
kGPU
);
}
// Turn off temporarily for the unstable result.
// TEST(inference_api_native, word2vec_gpu_threads) {
// MainThreadsWord2Vec(
true /*use_gpu*/
);
// MainThreadsWord2Vec(
paddle::PaddlePlace::kGPU
);
// }
TEST
(
inference_api_native
,
image_classification_gpu
)
{
MainImageClassification
(
true
/*use_gpu*/
);
MainImageClassification
(
paddle
::
PaddlePlace
::
kGPU
);
}
// Turn off temporarily for the unstable result.
// TEST(inference_api_native, image_classification_gpu_threads) {
// MainThreadsImageClassification(
true /*use_gpu*/
);
// MainThreadsImageClassification(
paddle::PaddlePlace::kGPU
);
// }
#endif
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
d199edd8
...
...
@@ -185,11 +185,23 @@ struct PD_INFER_DECL AnalysisConfig {
///
bool
use_gpu
()
const
{
return
use_gpu_
;
}
///
/// \brief A boolean state telling whether the XPU is turned on.
///
/// \return bool Whether the XPU is turned on.
///
bool
use_xpu
()
const
{
return
use_xpu_
;
}
///
/// \brief Get the GPU device id.
///
/// \return int The GPU device id.
///
int
gpu_device_id
()
const
{
return
gpu_device_id_
;
}
///
/// \brief Get the GPU device id.
///
/// \return int The GPU device id.
///
int
gpu_device_id
()
const
{
return
device_id_
;
}
int
xpu_device_id
()
const
{
return
xpu_
device_id_
;
}
///
/// \brief Get the initial size in MB of the GPU memory pool.
///
...
...
@@ -579,7 +591,8 @@ struct PD_INFER_DECL AnalysisConfig {
// GPU related.
bool
use_gpu_
{
false
};
int
device_id_
{
0
};
int
gpu_device_id_
{
0
};
int
xpu_device_id_
{
0
};
uint64_t
memory_pool_init_size_mb_
{
100
};
// initial size is 100MB.
bool
use_cudnn_
{
false
};
...
...
paddle/fluid/inference/api/paddle_api.h
浏览文件 @
d199edd8
...
...
@@ -161,7 +161,7 @@ struct PD_INFER_DECL PaddleTensor {
std
::
vector
<
std
::
vector
<
size_t
>>
lod
;
///< Tensor+LoD equals LoDTensor
};
enum
class
PaddlePlace
{
kUNK
=
-
1
,
kCPU
,
kGPU
};
enum
class
PaddlePlace
{
kUNK
=
-
1
,
kCPU
,
kGPU
,
kXPU
};
/// \brief Represents an n-dimensional array of values.
/// The ZeroCopyTensor is used to store the input or output of the network.
...
...
@@ -360,6 +360,7 @@ class PD_INFER_DECL PaddlePredictor {
struct
PD_INFER_DECL
NativeConfig
:
public
PaddlePredictor
::
Config
{
NativeConfig
();
/// GPU related fields.
bool
use_xpu
{
false
};
bool
use_gpu
{
false
};
int
device
{
0
};
float
fraction_of_gpu_memory
{
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
d199edd8
...
...
@@ -140,11 +140,16 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \return A bool variable implying whether we are in gpu mode.
bool
use_gpu
()
const
{
return
use_gpu_
;
}
/// \brief Check if we are using xpu.
/// \return A bool variable implying whether we are in xpu mode.
bool
use_xpu
()
const
{
return
use_xpu_
;
}
/// \brief Default destructor.
virtual
~
PassStrategy
()
=
default
;
protected:
/// \cond Protected
bool
use_xpu_
{
false
};
bool
use_gpu_
{
false
};
bool
use_mkldnn_
{
false
};
/// \endcond
...
...
@@ -226,6 +231,14 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
/// \endcond
};
/// \class XpuPassStrategy
/// \brief The XPU passes controller, it is used in AnalysisPredictor with XPU
/// mode.
class
PD_INFER_DECL
XpuPassStrategy
final
:
public
PassStrategy
{
public:
XpuPassStrategy
()
:
PassStrategy
({})
{}
};
/// \brief List of tensorRT subgraph passes.
PD_INFER_DECL
extern
const
std
::
vector
<
std
::
string
>
kTRTSubgraphPasses
;
...
...
paddle/fluid/inference/capi/paddle_c_api.h
浏览文件 @
d199edd8
...
...
@@ -165,12 +165,19 @@ PADDLE_CAPI_EXPORT extern void PD_EnableUseGpu(PD_AnalysisConfig* config,
int
memory_pool_init_size_mb
,
int
device_id
);
PADDLE_CAPI_EXPORT
extern
void
PD_EnableXpu
(
PD_AnalysisConfig
*
config
,
int
l3_workspace_size
);
PADDLE_CAPI_EXPORT
extern
void
PD_DisableGpu
(
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
bool
PD_UseGpu
(
const
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
bool
PD_UseXpu
(
const
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
int
PD_GpuDeviceId
(
const
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
int
PD_XpuDeviceId
(
const
PD_AnalysisConfig
*
config
);
PADDLE_CAPI_EXPORT
extern
int
PD_MemoryPoolInitSizeMb
(
const
PD_AnalysisConfig
*
config
);
...
...
paddle/fluid/inference/capi/pd_config.cc
浏览文件 @
d199edd8
...
...
@@ -111,6 +111,14 @@ void PD_EnableUseGpu(PD_AnalysisConfig* config, int memory_pool_init_size_mb,
device_id
);
}
void
PD_EnableXpu
(
PD_AnalysisConfig
*
config
,
int
l3_workspace_size
)
{
PADDLE_ENFORCE_NOT_NULL
(
config
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"The pointer of analysis configuration shouldn't be nullptr"
));
config
->
config
.
EnableXpu
(
l3_workspace_size
);
}
void
PD_DisableGpu
(
PD_AnalysisConfig
*
config
)
{
PADDLE_ENFORCE_NOT_NULL
(
config
,
...
...
@@ -127,6 +135,14 @@ bool PD_UseGpu(const PD_AnalysisConfig* config) {
return
config
->
config
.
use_gpu
();
}
bool
PD_UseXpu
(
const
PD_AnalysisConfig
*
config
)
{
PADDLE_ENFORCE_NOT_NULL
(
config
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"The pointer of analysis configuration shouldn't be nullptr"
));
return
config
->
config
.
use_xpu
();
}
int
PD_GpuDeviceId
(
const
PD_AnalysisConfig
*
config
)
{
PADDLE_ENFORCE_NOT_NULL
(
config
,
...
...
@@ -135,6 +151,14 @@ int PD_GpuDeviceId(const PD_AnalysisConfig* config) {
return
config
->
config
.
gpu_device_id
();
}
int
PD_XpuDeviceId
(
const
PD_AnalysisConfig
*
config
)
{
PADDLE_ENFORCE_NOT_NULL
(
config
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"The pointer of analysis configuration shouldn't be nullptr"
));
return
config
->
config
.
xpu_device_id
();
}
int
PD_MemoryPoolInitSizeMb
(
const
PD_AnalysisConfig
*
config
)
{
PADDLE_ENFORCE_NOT_NULL
(
config
,
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
d199edd8
...
...
@@ -499,6 +499,9 @@ if(WITH_GPU AND TENSORRT_FOUND)
inference_analysis_test
(
test_analyzer_capi_gpu SRCS analyzer_capi_gpu_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
paddle_fluid_c
ARGS --infer_model=
${
TRT_MODEL_INSTALL_DIR
}
/trt_inference_test_models
)
inference_analysis_test
(
test_analyzer_capi_xpu SRCS analyzer_capi_xpu_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
paddle_fluid_c
ARGS --infer_model=
${
TRT_MODEL_INSTALL_DIR
}
/trt_inference_test_models
)
set
(
TRT_MODEL_QUANT_RESNET_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/small_quant_model"
)
if
(
NOT EXISTS
${
TRT_MODEL_QUANT_RESNET_DIR
}
/small_quant_model.tgz
)
...
...
paddle/fluid/inference/tests/api/analyzer_capi_xpu_tester.cc
0 → 100644
浏览文件 @
d199edd8
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <string>
#include <vector>
#include "paddle/fluid/inference/capi/paddle_c_api.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
#ifdef PADDLE_WITH_XPU
TEST
(
PD_AnalysisConfig
,
use_xpu
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/mobilenet"
;
PD_AnalysisConfig
*
config
=
PD_NewAnalysisConfig
();
PD_SwitchUseFeedFetchOps
(
config
,
false
);
PD_SwitchSpecifyInputNames
(
config
,
true
);
PD_SwitchIrDebug
(
config
,
true
);
PD_SetModel
(
config
,
model_dir
.
c_str
(),
nullptr
);
PD_SetOptimCacheDir
(
config
,
(
FLAGS_infer_model
+
"/OptimCacheDir"
).
c_str
());
const
char
*
model_dir_
=
PD_ModelDir
(
config
);
LOG
(
INFO
)
<<
model_dir_
;
PD_EnableXpu
(
config
,
0xfffc00
);
bool
use_xpu
=
PD_UseXpu
(
config
);
CHECK
(
use_xpu
)
<<
"NO"
;
int
device
=
PD_XpuDeviceId
(
config
);
CHECK
(
0
==
device
)
<<
"NO"
;
PD_SwitchIrOptim
(
config
,
true
);
bool
ir_optim
=
PD_IrOptim
(
config
);
CHECK
(
ir_optim
)
<<
"NO"
;
PD_EnableMemoryOptim
(
config
);
bool
memory_optim_enable
=
PD_MemoryOptimEnabled
(
config
);
CHECK
(
memory_optim_enable
)
<<
"NO"
;
PD_EnableProfile
(
config
);
bool
profiler_enable
=
PD_ProfileEnabled
(
config
);
CHECK
(
profiler_enable
)
<<
"NO"
;
PD_SetInValid
(
config
);
bool
is_valid
=
PD_IsValid
(
config
);
CHECK
(
!
is_valid
)
<<
"NO"
;
PD_DeleteAnalysisConfig
(
config
);
}
#endif
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/lite_mul_model_test.cc
浏览文件 @
d199edd8
...
...
@@ -58,6 +58,24 @@ int test_main(const AnalysisConfig& config, Barrier* barrier = nullptr) {
return
0
;
}
#ifdef PADDLE_WITH_XPU
TEST
(
AnalysisPredictor
,
native_xpu
)
{
AnalysisConfig
config
;
config
.
EnableXpu
();
config
.
SetModel
(
FLAGS_infer_model
+
"/"
+
"mul_model"
);
test_main
(
config
);
}
#endif
#ifdef LITE_SUBGRAPH_WITH_XPU
TEST
(
AnalysisPredictor
,
lite_xpu
)
{
AnalysisConfig
config
;
config
.
EnableXpu
();
config
.
SetModel
(
FLAGS_infer_model
+
"/"
+
"mul_model"
);
config
.
EnableLiteEngine
(
paddle
::
AnalysisConfig
::
Precision
::
kFloat32
);
}
#endif
#ifdef PADDLE_WITH_CUDA
TEST
(
AnalysisPredictor
,
thread_local_stream
)
{
const
size_t
thread_num
=
5
;
...
...
paddle/fluid/inference/tests/test_helper.h
浏览文件 @
d199edd8
...
...
@@ -27,6 +27,18 @@ limitations under the License. */
DECLARE_bool
(
use_mkldnn
);
namespace
paddle
{
bool
gpu_place_used
(
const
paddle
::
PaddlePlace
&
place
)
{
return
place
==
paddle
::
PaddlePlace
::
kGPU
;
}
bool
xpu_place_used
(
const
paddle
::
PaddlePlace
&
place
)
{
return
place
==
paddle
::
PaddlePlace
::
kXPU
;
}
bool
cpu_place_used
(
const
paddle
::
PaddlePlace
&
place
)
{
return
place
==
paddle
::
PaddlePlace
::
kCPU
;
}
}
// namespace paddle
template
<
typename
T
>
void
SetupTensor
(
paddle
::
framework
::
LoDTensor
*
input
,
paddle
::
framework
::
DDim
dims
,
T
lower
,
T
upper
)
{
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
浏览文件 @
d199edd8
...
...
@@ -197,12 +197,12 @@ void Free<platform::XPUPlace>(const platform::XPUPlace &place, void *p,
template
<
>
uint64_t
Release
<
platform
::
XPUPlace
>
(
const
platform
::
XPUPlace
&
place
)
{
#ifdef PADDLE_WITH_XPU
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"Release XPU pool is not supported."
));
LOG
(
WARNING
)
<<
"Release XPU pool is not supported now, no action here."
;
#else
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"'XPUPlace' is not supported."
));
#endif
return
-
1
;
}
template
<
>
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
d199edd8
...
...
@@ -58,7 +58,7 @@ cc_library(place SRCS place.cc DEPS enforce boost)
cc_test
(
place_test SRCS place_test.cc DEPS place glog gflags
)
if
(
WITH_XPU
)
cc_library
(
xpu_info SRCS xpu_info.cc DEPS gflags glog enforce
)
cc_library
(
xpu_info SRCS xpu_info.cc DEPS gflags glog enforce
xpulib
)
endif
()
add_subdirectory
(
dynload
)
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
d199edd8
...
...
@@ -369,7 +369,8 @@ void BindPaddlePlace(py::module *m) {
py
::
enum_
<
PaddlePlace
>
(
*
m
,
"PaddlePlace"
)
.
value
(
"UNK"
,
PaddlePlace
::
kUNK
)
.
value
(
"CPU"
,
PaddlePlace
::
kCPU
)
.
value
(
"GPU"
,
PaddlePlace
::
kGPU
);
.
value
(
"GPU"
,
PaddlePlace
::
kGPU
)
.
value
(
"XPU"
,
PaddlePlace
::
kXPU
);
}
void
BindPaddlePredictor
(
py
::
module
*
m
)
{
...
...
@@ -398,6 +399,7 @@ void BindNativeConfig(py::module *m) {
py
::
class_
<
NativeConfig
,
PaddlePredictor
::
Config
>
(
*
m
,
"NativeConfig"
)
.
def
(
py
::
init
<>
())
.
def_readwrite
(
"use_gpu"
,
&
NativeConfig
::
use_gpu
)
.
def_readwrite
(
"use_xpu"
,
&
NativeConfig
::
use_xpu
)
.
def_readwrite
(
"device"
,
&
NativeConfig
::
device
)
.
def_readwrite
(
"fraction_of_gpu_memory"
,
&
NativeConfig
::
fraction_of_gpu_memory
)
...
...
@@ -459,7 +461,9 @@ void BindAnalysisConfig(py::module *m) {
py
::
arg
(
"l3_workspace_size"
))
.
def
(
"disable_gpu"
,
&
AnalysisConfig
::
DisableGpu
)
.
def
(
"use_gpu"
,
&
AnalysisConfig
::
use_gpu
)
.
def
(
"use_xpu"
,
&
AnalysisConfig
::
use_xpu
)
.
def
(
"gpu_device_id"
,
&
AnalysisConfig
::
gpu_device_id
)
.
def
(
"xpu_device_id"
,
&
AnalysisConfig
::
xpu_device_id
)
.
def
(
"memory_pool_init_size_mb"
,
&
AnalysisConfig
::
memory_pool_init_size_mb
)
.
def
(
"fraction_of_gpu_memory_for_pool"
,
...
...
python/paddle/fluid/tests/book/test_word2vec.py
浏览文件 @
d199edd8
...
...
@@ -26,7 +26,20 @@ import sys
paddle
.
enable_static
()
def
train
(
use_cuda
,
is_sparse
,
is_parallel
,
save_dirname
,
is_local
=
True
):
def
get_place
(
target
):
if
target
==
"cuda"
:
return
fluid
.
CUDAPlace
(
0
)
elif
target
==
"xpu"
:
return
fluid
.
XPUPlace
(
0
)
elif
target
==
"cpu"
:
return
fluid
.
CPUPlace
()
else
:
raise
ValueError
(
"Target `{0}` is not on the support list: `cuda`, `xpu` and `cpu`."
.
format
(
target
))
def
train
(
target
,
is_sparse
,
is_parallel
,
save_dirname
,
is_local
=
True
):
PASS_NUM
=
100
EMBED_SIZE
=
32
HIDDEN_SIZE
=
256
...
...
@@ -93,7 +106,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imikolov
.
train
(
word_dict
,
N
),
BATCH_SIZE
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
(
)
place
=
get_place
(
target
)
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
first_word
,
second_word
,
third_word
,
forth_word
,
next_word
],
...
...
@@ -143,13 +156,12 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
train_loop
(
t
.
get_trainer_program
())
def
infer
(
use_cuda
,
save_dirname
=
None
):
def
infer
(
target
,
save_dirname
=
None
):
if
save_dirname
is
None
:
return
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
(
)
place
=
get_place
(
target
)
exe
=
fluid
.
Executor
(
place
)
inference_scope
=
fluid
.
core
.
Scope
()
with
fluid
.
scope_guard
(
inference_scope
):
# Use fluid.io.load_inference_model to obtain the inference program desc,
...
...
@@ -211,10 +223,12 @@ def infer(use_cuda, save_dirname=None):
infer_config
=
fluid
.
core
.
NativeConfig
()
infer_config
.
model_dir
=
'word2vec.inference.model'
i
nfer_config
.
use_gpu
=
use_cuda
if
use_cuda
:
i
f
target
==
"cuda"
:
infer_config
.
use_gpu
=
True
infer_config
.
device
=
0
infer_config
.
fraction_of_gpu_memory
=
0.15
elif
target
==
"xpu"
:
infer_config
.
use_xpu
=
True
compiled_program
=
fluid
.
compiler
.
CompiledProgram
(
inference_program
)
compiled_program
.
_with_inference_optimize
(
infer_config
)
assert
compiled_program
.
_is_inference
is
True
...
...
@@ -222,11 +236,13 @@ def infer(use_cuda, save_dirname=None):
np_data
=
np
.
array
(
results
[
0
])
infer_out
=
infer_outputs
[
0
].
data
.
float_data
()
for
a
,
b
in
zip
(
np_data
[
0
],
infer_out
):
assert
np
.
isclose
(
a
,
b
),
"a: {}, b: {}"
.
format
(
a
,
b
)
assert
np
.
isclose
(
a
,
b
,
rtol
=
5e-5
),
"a: {}, b: {}"
.
format
(
a
,
b
)
def
main
(
use_cuda
,
is_sparse
,
is_parallel
):
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
def
main
(
target
,
is_sparse
,
is_parallel
):
if
target
==
"cuda"
and
not
fluid
.
core
.
is_compiled_with_cuda
():
return
if
target
==
"xpu"
and
not
fluid
.
core
.
is_compiled_with_xpu
():
return
if
not
is_parallel
:
...
...
@@ -234,8 +250,13 @@ def main(use_cuda, is_sparse, is_parallel):
else
:
save_dirname
=
None
train
(
use_cuda
,
is_sparse
,
is_parallel
,
save_dirname
)
infer
(
use_cuda
,
save_dirname
)
if
target
==
"xpu"
:
# This model cannot be trained with xpu temporarily,
# so only inference is turned on.
train
(
"cpu"
,
is_sparse
,
is_parallel
,
save_dirname
)
else
:
train
(
target
,
is_sparse
,
is_parallel
,
save_dirname
)
infer
(
target
,
save_dirname
)
FULL_TEST
=
os
.
getenv
(
'FULL_TEST'
,
...
...
@@ -247,8 +268,8 @@ class W2VTest(unittest.TestCase):
pass
def
inject_test_method
(
use_cuda
,
is_sparse
,
is_parallel
):
fn_name
=
"test_{0}_{1}_{2}"
.
format
(
"cuda"
if
use_cuda
else
"cpu"
,
"sparse"
def
inject_test_method
(
target
,
is_sparse
,
is_parallel
):
fn_name
=
"test_{0}_{1}_{2}"
.
format
(
target
,
"sparse"
if
is_sparse
else
"dense"
,
"parallel"
if
is_parallel
else
"normal"
)
...
...
@@ -259,11 +280,10 @@ def inject_test_method(use_cuda, is_sparse, is_parallel):
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
program_guard
(
prog
,
startup_prog
):
main
(
use_cuda
=
use_cuda
,
is_sparse
=
is_sparse
,
is_parallel
=
is_parallel
)
target
=
target
,
is_sparse
=
is_sparse
,
is_parallel
=
is_parallel
)
if
(
not
fluid
.
core
.
is_compiled_with_cuda
()
or
use_cuda
)
and
is_sparse
:
if
(
not
fluid
.
core
.
is_compiled_with_cuda
()
or
target
==
"cuda"
)
and
is_sparse
:
fn
=
__impl__
else
:
# skip the other test when on CI server
...
...
@@ -273,10 +293,10 @@ def inject_test_method(use_cuda, is_sparse, is_parallel):
setattr
(
W2VTest
,
fn_name
,
fn
)
for
use_cuda
in
(
False
,
True
):
for
target
in
(
"cuda"
,
"cpu"
,
"xpu"
):
for
is_sparse
in
(
False
,
True
):
for
is_parallel
in
(
False
,
):
inject_test_method
(
use_cuda
,
is_sparse
,
is_parallel
)
inject_test_method
(
target
,
is_sparse
,
is_parallel
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录