Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
4e3fb219
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4e3fb219
编写于
7月 14, 2021
作者:
W
Wilber
提交者:
GitHub
7月 14, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Inference support Ascend910 (#34101)
上级
a4028b4b
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
257 addition
and
10 deletion
+257
-10
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+55
-2
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+14
-0
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+18
-1
paddle/fluid/inference/api/api_impl_tester.cc
paddle/fluid/inference/api/api_impl_tester.cc
+13
-0
paddle/fluid/inference/api/details/zero_copy_tensor.cc
paddle/fluid/inference/api/details/zero_copy_tensor.cc
+35
-2
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+25
-2
paddle/fluid/inference/api/paddle_api.h
paddle/fluid/inference/api/paddle_api.h
+1
-0
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+21
-1
paddle/fluid/inference/api/paddle_tensor.h
paddle/fluid/inference/api/paddle_tensor.h
+1
-1
paddle/fluid/inference/capi_exp/pd_config.cc
paddle/fluid/inference/capi_exp/pd_config.cc
+14
-0
paddle/fluid/inference/capi_exp/pd_config.h
paddle/fluid/inference/capi_exp/pd_config.h
+24
-0
paddle/fluid/inference/goapi/config.go
paddle/fluid/inference/goapi/config.go
+27
-0
paddle/fluid/inference/tests/test_helper.h
paddle/fluid/inference/tests/test_helper.h
+3
-0
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+6
-1
未找到文件。
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
4e3fb219
...
...
@@ -36,6 +36,8 @@ PassStrategy *AnalysisConfig::pass_builder() const {
pass_builder_
.
reset
(
new
GpuPassStrategy
);
}
else
if
(
use_xpu_
)
{
pass_builder_
.
reset
(
new
XpuPassStrategy
);
}
else
if
(
use_npu_
)
{
pass_builder_
.
reset
(
new
NpuPassStrategy
);
}
else
{
LOG
(
INFO
)
<<
"Create CPU IR passes"
;
pass_builder_
.
reset
(
new
CpuPassStrategy
);
...
...
@@ -110,6 +112,18 @@ void AnalysisConfig::EnableXpu(int l3_workspace_size, bool locked,
Update
();
}
void
AnalysisConfig
::
EnableNpu
(
int
device_id
)
{
#ifdef PADDLE_WITH_ASCEND_CL
use_npu_
=
true
;
npu_device_id_
=
device_id
;
#else
LOG
(
ERROR
)
<<
"Please compile with npu to EnableNpu()"
;
use_npu_
=
false
;
#endif
Update
();
}
AnalysisConfig
::
AnalysisConfig
(
const
AnalysisConfig
&
other
)
{
#define CP_MEMBER(member__) member__ = other.member__;
...
...
@@ -127,7 +141,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
use_gpu_
);
CP_MEMBER
(
use_cudnn_
);
CP_MEMBER
(
gpu_device_id_
);
CP_MEMBER
(
xpu_device_id_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
CP_MEMBER
(
enable_memory_optim_
);
...
...
@@ -167,7 +180,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
lite_ops_filter_
);
CP_MEMBER
(
lite_zero_copy_
);
// XPU related.
CP_MEMBER
(
use_xpu_
);
CP_MEMBER
(
xpu_device_id_
);
CP_MEMBER
(
xpu_l3_workspace_size_
);
CP_MEMBER
(
xpu_locked_
);
CP_MEMBER
(
xpu_autotune_
);
...
...
@@ -175,6 +190,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
xpu_precision_
);
CP_MEMBER
(
xpu_adaptive_seqlen_
);
// NPU related.
CP_MEMBER
(
use_npu_
);
CP_MEMBER
(
npu_device_id_
);
// profile related.
CP_MEMBER
(
with_profile_
);
...
...
@@ -202,6 +221,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
}
else
if
(
use_xpu_
)
{
pass_builder_
.
reset
(
new
XpuPassStrategy
(
*
static_cast
<
XpuPassStrategy
*>
(
other
.
pass_builder
())));
}
else
if
(
use_npu_
)
{
pass_builder_
.
reset
(
new
NpuPassStrategy
(
*
static_cast
<
NpuPassStrategy
*>
(
other
.
pass_builder
())));
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
(
*
static_cast
<
CpuPassStrategy
*>
(
other
.
pass_builder
())));
...
...
@@ -376,7 +398,9 @@ void AnalysisConfig::Update() {
if
(
info
==
serialized_info_cache_
)
return
;
// Transfer pass_builder and copy the existing compatible passes.
if
(
!
pass_builder_
||
((
use_gpu
()
^
pass_builder_
->
use_gpu
())))
{
if
(
!
pass_builder_
||
((
use_gpu
()
^
pass_builder_
->
use_gpu
()))
||
((
use_xpu
()
^
pass_builder_
->
use_xpu
()))
||
((
use_npu
()
^
pass_builder_
->
use_npu
())))
{
if
(
use_gpu
())
{
pass_builder_
.
reset
(
new
GpuPassStrategy
);
...
...
@@ -390,6 +414,12 @@ void AnalysisConfig::Update() {
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
XpuPassStrategy
);
}
else
if
(
use_npu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between GPU and NPU."
));
pass_builder_
.
reset
(
new
NpuPassStrategy
);
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
);
}
...
...
@@ -405,6 +435,13 @@ void AnalysisConfig::Update() {
"Only one choice can be made between CPU and XPU."
));
pass_builder_
.
reset
(
new
XpuPassStrategy
(
*
static_cast
<
XpuPassStrategy
*>
(
pass_builder_
.
get
())));
}
else
if
(
use_npu
())
{
PADDLE_ENFORCE_EQ
(
use_gpu
(),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between GPU and NPU."
));
pass_builder_
.
reset
(
new
NpuPassStrategy
(
*
static_cast
<
NpuPassStrategy
*>
(
pass_builder_
.
get
())));
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
(
*
static_cast
<
CpuPassStrategy
*>
(
pass_builder_
.
get
())));
...
...
@@ -502,6 +539,19 @@ void AnalysisConfig::Update() {
#endif
}
if
(
use_npu_
)
{
#ifdef PADDLE_WITH_ASCEND_CL
PADDLE_ENFORCE_EQ
(
use_gpu_
,
false
,
platform
::
errors
::
Unavailable
(
"Currently, NPU and GPU cannot be enabled in the "
"same analysis configuration."
));
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"You tried to use an NPU device, but Paddle was not compiled "
"with NPU-runtime."
));
#endif
}
if
(
ir_debug_
)
{
pass_builder
()
->
TurnOnDebug
();
}
...
...
@@ -566,6 +616,9 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
xpu_precision_
;
ss
<<
xpu_adaptive_seqlen_
;
ss
<<
use_npu_
;
ss
<<
npu_device_id_
;
ss
<<
thread_local_stream_
;
return
ss
.
str
();
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
4e3fb219
...
...
@@ -264,6 +264,14 @@ bool AnalysisPredictor::CreateExecutor() {
"with WITH_XPU."
));
#endif // PADDLE_WITH_XPU
}
}
else
if
(
config_
.
use_npu
())
{
#ifdef PADDLE_WITH_ASCEND_CL
place_
=
paddle
::
platform
::
NPUPlace
(
config_
.
npu_device_id
());
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"You tried to use NPU forward propagation, but Paddle was not compiled "
"with WITH_ASCEND_CL."
));
#endif
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
}
...
...
@@ -847,6 +855,9 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
auto
xpu_place
=
BOOST_GET_CONST
(
platform
::
XPUPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kXPU
,
xpu_place
.
GetDeviceId
());
}
}
else
if
(
platform
::
is_npu_place
(
place_
))
{
auto
npu_place
=
BOOST_GET_CONST
(
platform
::
NPUPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kNPU
,
npu_place
.
GetDeviceId
());
}
else
{
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kGPU
,
gpu_place
.
GetDeviceId
());
...
...
@@ -879,6 +890,9 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
auto
xpu_place
=
BOOST_GET_CONST
(
platform
::
XPUPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kXPU
,
xpu_place
.
GetDeviceId
());
}
}
else
if
(
platform
::
is_npu_place
(
place_
))
{
auto
npu_place
=
BOOST_GET_CONST
(
platform
::
NPUPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kNPU
,
npu_place
.
GetDeviceId
());
}
else
{
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place_
);
res
->
SetPlace
(
PaddlePlace
::
kGPU
,
gpu_place
.
GetDeviceId
());
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
4e3fb219
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_bool
(
profile
,
false
,
"Turn on profiler for fluid"
);
...
...
@@ -78,6 +79,8 @@ bool NativePaddlePredictor::Init(
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
}
else
if
(
config_
.
use_xpu
)
{
place_
=
paddle
::
platform
::
XPUPlace
(
config_
.
device
);
}
else
if
(
config_
.
use_npu
)
{
place_
=
paddle
::
platform
::
NPUPlace
(
config_
.
device
);
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
}
...
...
@@ -255,7 +258,7 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Not compile with CUDA, should not reach here."
));
#endif
}
else
{
}
else
if
(
platform
::
is_xpu_place
(
place_
))
{
#ifdef PADDLE_WITH_XPU
auto
dst_xpu_place
=
BOOST_GET_CONST
(
platform
::
XPUPlace
,
place_
);
memory
::
Copy
(
dst_xpu_place
,
static_cast
<
void
*>
(
input_ptr
),
...
...
@@ -264,6 +267,20 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Not compile with XPU, should not reach here."
));
#endif
}
else
{
#ifdef PADDLE_WITH_ASCEND_CL
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
static_cast
<
const
platform
::
NPUDeviceContext
*>
(
pool
.
Get
(
place_
));
auto
dst_npu_place
=
BOOST_GET_CONST
(
platform
::
NPUPlace
,
place_
);
memory
::
Copy
(
dst_npu_place
,
static_cast
<
void
*>
(
input_ptr
),
platform
::
CPUPlace
(),
inputs
[
i
].
data
.
data
(),
inputs
[
i
].
data
.
length
(),
dev_ctx
->
stream
());
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Not compile with NPU, should not reach here."
));
#endif
}
...
...
paddle/fluid/inference/api/api_impl_tester.cc
浏览文件 @
4e3fb219
...
...
@@ -67,6 +67,7 @@ void MainWord2Vec(const paddle::PaddlePlace& place) {
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
config
.
use_gpu
=
paddle
::
gpu_place_used
(
place
);
config
.
use_xpu
=
paddle
::
xpu_place_used
(
place
);
config
.
use_npu
=
paddle
::
npu_place_used
(
place
);
framework
::
LoDTensor
first_word
,
second_word
,
third_word
,
fourth_word
;
framework
::
LoD
lod
{{
0
,
1
}};
...
...
@@ -119,6 +120,7 @@ void MainImageClassification(const paddle::PaddlePlace& place) {
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
paddle
::
gpu_place_used
(
place
);
config
.
use_xpu
=
paddle
::
xpu_place_used
(
place
);
config
.
use_npu
=
paddle
::
npu_place_used
(
place
);
config
.
model_dir
=
FLAGS_book_dirname
+
"/image_classification_resnet.inference.model"
;
...
...
@@ -163,6 +165,7 @@ void MainThreadsWord2Vec(const paddle::PaddlePlace& place) {
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
paddle
::
gpu_place_used
(
place
);
config
.
use_xpu
=
paddle
::
xpu_place_used
(
place
);
config
.
use_npu
=
paddle
::
npu_place_used
(
place
);
auto
main_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
// prepare inputs data and reference results
...
...
@@ -227,6 +230,7 @@ void MainThreadsImageClassification(const paddle::PaddlePlace& place) {
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
paddle
::
gpu_place_used
(
place
);
config
.
use_xpu
=
paddle
::
xpu_place_used
(
place
);
config
.
use_npu
=
paddle
::
npu_place_used
(
place
);
config
.
model_dir
=
FLAGS_book_dirname
+
"/image_classification_resnet.inference.model"
;
...
...
@@ -297,6 +301,15 @@ TEST(inference_api_native, image_classification_xpu) {
}
#endif
#ifdef PADDLE_WITH_ASCEND_CL
TEST
(
inference_api_native
,
word2vec_npu
)
{
MainWord2Vec
(
paddle
::
PaddlePlace
::
kNPU
);
}
// TEST(inference_api_native, image_classification_npu) {
// MainImageClassification(paddle::PaddlePlace::kNPU);
// }
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST
(
inference_api_native
,
word2vec_gpu
)
{
MainWord2Vec
(
paddle
::
PaddlePlace
::
kGPU
);
...
...
paddle/fluid/inference/api/details/zero_copy_tensor.cc
浏览文件 @
4e3fb219
...
...
@@ -16,6 +16,7 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/enforce.h"
...
...
@@ -150,10 +151,26 @@ void Tensor::CopyFromCpu(const T *data) {
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"Can not create tensor with XPU place because paddle is not compiled "
"with XPU."
));
#endif
}
else
if
(
place_
==
PlaceType
::
kNPU
)
{
#ifdef PADDLE_WITH_ASCEND_CL
paddle
::
platform
::
DeviceContextPool
&
pool
=
paddle
::
platform
::
DeviceContextPool
::
Instance
();
paddle
::
platform
::
NPUPlace
npu_place
(
device_
);
auto
*
t_data
=
tensor
->
mutable_data
<
T
>
(
npu_place
);
auto
*
dev_ctx
=
static_cast
<
const
paddle
::
platform
::
NPUDeviceContext
*>
(
pool
.
Get
(
npu_place
));
paddle
::
memory
::
Copy
(
npu_place
,
static_cast
<
void
*>
(
t_data
),
paddle
::
platform
::
CPUPlace
(),
data
,
ele_size
,
dev_ctx
->
stream
());
#else
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"Can not create tensor with NPU place because paddle is not compiled "
"with NPU."
));
#endif
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"The analysis predictor supports CPU, GPU and XPU now."
));
"The analysis predictor supports CPU, GPU
, NPU
and XPU now."
));
}
}
...
...
@@ -212,10 +229,26 @@ void Tensor::CopyToCpu(T *data) {
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"Can not create tensor with XPU place because paddle is not compiled "
"with XPU."
));
#endif
}
else
if
(
place_
==
PlaceType
::
kNPU
)
{
#ifdef PADDLE_WITH_ASCEND_CL
paddle
::
platform
::
DeviceContextPool
&
pool
=
paddle
::
platform
::
DeviceContextPool
::
Instance
();
auto
npu_place
=
BOOST_GET_CONST
(
paddle
::
platform
::
NPUPlace
,
t_place
);
auto
*
dev_ctx
=
static_cast
<
const
paddle
::
platform
::
NPUDeviceContext
*>
(
pool
.
Get
(
npu_place
));
paddle
::
memory
::
Copy
(
paddle
::
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
data
),
npu_place
,
t_data
,
ele_num
*
sizeof
(
T
),
dev_ctx
->
stream
());
aclrtSynchronizeStream
(
dev_ctx
->
stream
());
#else
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"Can not create tensor with NPU place because paddle is not compiled "
"with NPU."
));
#endif
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
InvalidArgument
(
"The analysis predictor supports CPU, GPU and XPU now."
));
"The analysis predictor supports CPU, GPU
, NPU
and XPU now."
));
}
}
template
PD_INFER_DECL
void
Tensor
::
CopyFromCpu
<
float
>(
const
float
*
data
);
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
4e3fb219
...
...
@@ -203,6 +203,12 @@ struct PD_INFER_DECL AnalysisConfig {
const
std
::
string
&
precision
=
"int16"
,
bool
adaptive_seqlen
=
false
);
///
/// \brief Turn on NPU.
///
/// \param device_id device_id the NPU card to use (default is 0).
///
void
EnableNpu
(
int
device_id
=
0
);
///
/// \brief A boolean state telling whether the GPU is turned on.
///
/// \return bool Whether the GPU is turned on.
...
...
@@ -215,6 +221,12 @@ struct PD_INFER_DECL AnalysisConfig {
///
bool
use_xpu
()
const
{
return
use_xpu_
;
}
///
/// \brief A boolean state telling whether the NPU is turned on.
///
/// \return bool Whether the NPU is turned on.
///
bool
use_npu
()
const
{
return
use_npu_
;
}
///
/// \brief Get the GPU device id.
///
/// \return int The GPU device id.
...
...
@@ -227,6 +239,12 @@ struct PD_INFER_DECL AnalysisConfig {
///
int
xpu_device_id
()
const
{
return
xpu_device_id_
;
}
///
/// \brief Get the NPU device id.
///
/// \return int The NPU device id.
///
int
npu_device_id
()
const
{
return
npu_device_id_
;
}
///
/// \brief Get the initial size in MB of the GPU memory pool.
///
/// \return int The initial size in MB of the GPU memory pool.
...
...
@@ -619,11 +637,15 @@ struct PD_INFER_DECL AnalysisConfig {
// GPU related.
bool
use_gpu_
{
false
};
int
gpu_device_id_
{
0
};
int
xpu_device_id_
{
0
};
uint64_t
memory_pool_init_size_mb_
{
100
};
// initial size is 100MB.
bool
thread_local_stream_
{
false
};
bool
use_cudnn_
{
false
};
// NPU related
bool
use_npu_
{
false
};
int
npu_device_id_
{
0
};
// Padding related
bool
use_fc_padding_
{
true
};
...
...
@@ -689,8 +711,9 @@ struct PD_INFER_DECL AnalysisConfig {
Precision
lite_precision_mode_
;
bool
lite_zero_copy_
;
bool
thread_local_stream_
{
false
};
// XPU related.
bool
use_xpu_
{
false
};
int
xpu_device_id_
{
0
};
int
xpu_l3_workspace_size_
;
bool
xpu_locked_
;
bool
xpu_autotune_
;
...
...
paddle/fluid/inference/api/paddle_api.h
浏览文件 @
4e3fb219
...
...
@@ -303,6 +303,7 @@ struct PD_INFER_DECL NativeConfig : public PaddlePredictor::Config {
/// GPU related fields.
bool
use_xpu
{
false
};
bool
use_gpu
{
false
};
bool
use_npu
{
false
};
int
device
{
0
};
float
fraction_of_gpu_memory
{
-
1.
f
};
///< Change to a float in (0,1] if needed.
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
4e3fb219
...
...
@@ -144,6 +144,10 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \return A bool variable implying whether we are in xpu mode.
bool
use_xpu
()
const
{
return
use_xpu_
;
}
/// \brief Check if we are using npu.
/// \return A bool variable implying whether we are in npu mode.
bool
use_npu
()
const
{
return
use_npu_
;
}
/// \brief Default destructor.
virtual
~
PassStrategy
()
=
default
;
...
...
@@ -151,6 +155,7 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \cond Protected
bool
use_xpu_
{
false
};
bool
use_gpu_
{
false
};
bool
use_npu_
{
false
};
bool
use_mkldnn_
{
false
};
/// \endcond
};
...
...
@@ -236,7 +241,22 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
/// mode.
class
PD_INFER_DECL
XpuPassStrategy
final
:
public
PassStrategy
{
public:
XpuPassStrategy
()
:
PassStrategy
({})
{}
XpuPassStrategy
()
:
PassStrategy
({})
{
use_xpu_
=
true
;
}
};
/// \class NpuPassStrategy
/// \brief The NPU passes controller, it is used in AnalysisPredictor with NPU
/// mode.
class
PD_INFER_DECL
NpuPassStrategy
final
:
public
PassStrategy
{
public:
NpuPassStrategy
()
:
PassStrategy
({})
{
use_npu_
=
true
;
}
/// \brief Construct by copying another NpuPassStrategy object.
/// \param[in] other The NpuPassStrategy object we want to copy.
explicit
NpuPassStrategy
(
const
NpuPassStrategy
&
other
)
:
PassStrategy
(
other
.
AllPasses
())
{
use_npu_
=
true
;
}
};
/// \brief List of tensorRT subgraph passes.
...
...
paddle/fluid/inference/api/paddle_tensor.h
浏览文件 @
4e3fb219
...
...
@@ -28,7 +28,7 @@ enum DataType {
// TODO(Superjomn) support more data types if needed.
};
enum
class
PlaceType
{
kUNK
=
-
1
,
kCPU
,
kGPU
,
kXPU
};
enum
class
PlaceType
{
kUNK
=
-
1
,
kCPU
,
kGPU
,
kXPU
,
kNPU
};
/// \brief Represents an n-dimensional array of values.
/// The Tensor is used to store the input or output of the network.
...
...
paddle/fluid/inference/capi_exp/pd_config.cc
浏览文件 @
4e3fb219
...
...
@@ -135,11 +135,21 @@ void PD_ConfigEnableXpu(__pd_keep PD_Config* pd_config,
precision
,
adaptive_seqlen
);
}
void
PD_ConfigEnableNpu
(
__pd_keep
PD_Config
*
pd_config
,
int32_t
device_id
)
{
CHECK_AND_CONVERT_PD_CONFIG
;
config
->
EnableNpu
(
device_id
);
}
PD_Bool
PD_ConfigUseXpu
(
__pd_keep
PD_Config
*
pd_config
)
{
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
use_xpu
();
}
PD_Bool
PD_ConfigUseNpu
(
__pd_keep
PD_Config
*
pd_config
)
{
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
use_npu
();
}
int32_t
PD_ConfigGpuDeviceId
(
__pd_keep
PD_Config
*
pd_config
)
{
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
gpu_device_id
();
...
...
@@ -148,6 +158,10 @@ int32_t PD_ConfigXpuDeviceId(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
xpu_device_id
();
}
int32_t
PD_ConfigNpuDeviceId
(
__pd_keep
PD_Config
*
pd_config
)
{
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
npu_device_id
();
}
int32_t
PD_ConfigMemoryPoolInitSizeMb
(
__pd_keep
PD_Config
*
pd_config
)
{
CHECK_AND_CONVERT_PD_CONFIG
;
return
config
->
memory_pool_init_size_mb
();
...
...
paddle/fluid/inference/capi_exp/pd_config.h
浏览文件 @
4e3fb219
...
...
@@ -177,6 +177,14 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableXpu(
PD_Bool
autotune
,
const
char
*
autotune_file
,
const
char
*
precision
,
PD_Bool
adaptive_seqlen
);
///
/// \brief Turn on NPU.
///
/// \param[in] pd_onfig config
/// \param[in] device_id device_id the NPU card to use.
///
PADDLE_CAPI_EXPORT
extern
void
PD_ConfigEnableNpu
(
__pd_keep
PD_Config
*
pd_config
,
int32_t
device_id
);
///
/// \brief A boolean state telling whether the XPU is turned on.
///
/// \param[in] pd_onfig config
...
...
@@ -185,6 +193,14 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableXpu(
PADDLE_CAPI_EXPORT
extern
PD_Bool
PD_ConfigUseXpu
(
__pd_keep
PD_Config
*
pd_config
);
///
/// \brief A boolean state telling whether the NPU is turned on.
///
/// \param[in] pd_onfig config
/// \return Whether the NPU is turned on.
///
PADDLE_CAPI_EXPORT
extern
PD_Bool
PD_ConfigUseNpu
(
__pd_keep
PD_Config
*
pd_config
);
///
/// \brief Get the GPU device id.
///
/// \param[in] pd_onfig config
...
...
@@ -201,6 +217,14 @@ PADDLE_CAPI_EXPORT extern int32_t PD_ConfigGpuDeviceId(
PADDLE_CAPI_EXPORT
extern
int32_t
PD_ConfigXpuDeviceId
(
__pd_keep
PD_Config
*
pd_config
);
///
/// \brief Get the NPU device id.
///
/// \param[in] pd_onfig config
/// \return The NPU device id.
///
PADDLE_CAPI_EXPORT
extern
int32_t
PD_ConfigNpuDeviceId
(
__pd_keep
PD_Config
*
pd_config
);
///
/// \brief Get the initial size in MB of the GPU memory pool.
///
/// \param[in] pd_onfig config
...
...
paddle/fluid/inference/goapi/config.go
浏览文件 @
4e3fb219
...
...
@@ -181,6 +181,15 @@ func (config *Config) EnableXpu(l3WorkspaceSize int32, locked bool, autotune boo
cAutotuneFile
,
cPrecision
,
cvtGoBoolToPD
(
adaptiveSeqlen
))
}
///
/// \brief Turn on NPU.
///
/// \param deviceId the NPU card to use.
///
func
(
config
*
Config
)
EnableNpu
(
deviceId
int32
)
{
C
.
PD_ConfigEnableNpu
(
config
.
c
,
C
.
int32_t
(
deviceId
))
}
///
/// \brief A boolean state telling whether the GPU is turned on.
///
...
...
@@ -199,6 +208,15 @@ func (config *Config) UseXpu() bool {
return
cvtPDBoolToGo
(
C
.
PD_ConfigUseXpu
(
config
.
c
))
}
///
/// \brief A boolean state telling whether the NPU is turned on.
///
/// \return bool Whether the NPU is turned on.
///
func
(
config
*
Config
)
UseNpu
()
bool
{
return
cvtPDBoolToGo
(
C
.
PD_ConfigUseNpu
(
config
.
c
))
}
///
/// \brief Get the GPU device id.
///
...
...
@@ -217,6 +235,15 @@ func (config *Config) XpuDeviceId() int32 {
return
int32
(
C
.
PD_ConfigXpuDeviceId
(
config
.
c
))
}
///
/// \brief Get the NPU device id.
///
/// \return int32 The NPU device id.
///
func
(
config
*
Config
)
NpuDeviceId
()
int32
{
return
int32
(
C
.
PD_ConfigNpuDeviceId
(
config
.
c
))
}
///
/// \brief Get the initial size in MB of the GPU memory pool.
///
...
...
paddle/fluid/inference/tests/test_helper.h
浏览文件 @
4e3fb219
...
...
@@ -34,6 +34,9 @@ bool gpu_place_used(const paddle::PaddlePlace& place) {
bool
xpu_place_used
(
const
paddle
::
PaddlePlace
&
place
)
{
return
place
==
paddle
::
PaddlePlace
::
kXPU
;
}
bool
npu_place_used
(
const
paddle
::
PaddlePlace
&
place
)
{
return
place
==
paddle
::
PaddlePlace
::
kNPU
;
}
bool
cpu_place_used
(
const
paddle
::
PaddlePlace
&
place
)
{
return
place
==
paddle
::
PaddlePlace
::
kCPU
;
}
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
4e3fb219
...
...
@@ -379,7 +379,8 @@ void BindPaddlePlace(py::module *m) {
.
value
(
"UNK"
,
PaddlePlace
::
kUNK
)
.
value
(
"CPU"
,
PaddlePlace
::
kCPU
)
.
value
(
"GPU"
,
PaddlePlace
::
kGPU
)
.
value
(
"XPU"
,
PaddlePlace
::
kXPU
);
.
value
(
"XPU"
,
PaddlePlace
::
kXPU
)
.
value
(
"NPU"
,
PaddlePlace
::
kNPU
);
}
void
BindPaddlePredictor
(
py
::
module
*
m
)
{
...
...
@@ -409,6 +410,7 @@ void BindNativeConfig(py::module *m) {
.
def
(
py
::
init
<>
())
.
def_readwrite
(
"use_gpu"
,
&
NativeConfig
::
use_gpu
)
.
def_readwrite
(
"use_xpu"
,
&
NativeConfig
::
use_xpu
)
.
def_readwrite
(
"use_npu"
,
&
NativeConfig
::
use_npu
)
.
def_readwrite
(
"device"
,
&
NativeConfig
::
device
)
.
def_readwrite
(
"fraction_of_gpu_memory"
,
&
NativeConfig
::
fraction_of_gpu_memory
)
...
...
@@ -471,11 +473,14 @@ void BindAnalysisConfig(py::module *m) {
py
::
arg
(
"locked"
)
=
false
,
py
::
arg
(
"autotune"
)
=
true
,
py
::
arg
(
"autotune_file"
)
=
""
,
py
::
arg
(
"precision"
)
=
"int16"
,
py
::
arg
(
"adaptive_seqlen"
)
=
false
)
.
def
(
"enable_npu"
,
&
AnalysisConfig
::
EnableNpu
,
py
::
arg
(
"device_id"
)
=
0
)
.
def
(
"disable_gpu"
,
&
AnalysisConfig
::
DisableGpu
)
.
def
(
"use_gpu"
,
&
AnalysisConfig
::
use_gpu
)
.
def
(
"use_xpu"
,
&
AnalysisConfig
::
use_xpu
)
.
def
(
"use_npu"
,
&
AnalysisConfig
::
use_npu
)
.
def
(
"gpu_device_id"
,
&
AnalysisConfig
::
gpu_device_id
)
.
def
(
"xpu_device_id"
,
&
AnalysisConfig
::
xpu_device_id
)
.
def
(
"npu_device_id"
,
&
AnalysisConfig
::
npu_device_id
)
.
def
(
"memory_pool_init_size_mb"
,
&
AnalysisConfig
::
memory_pool_init_size_mb
)
.
def
(
"fraction_of_gpu_memory_for_pool"
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录