Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c1e5a393
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c1e5a393
编写于
1月 25, 2022
作者:
W
Wilber
提交者:
GitHub
1月 25, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[PTEN] Add xpu context. (#39098)
上级
b2a7261d
变更
28
隐藏空白更改
内联
并排
Showing
28 changed file
with
958 addition
and
448 deletion
+958
-448
paddle/fluid/framework/pten_utils.h
paddle/fluid/framework/pten_utils.h
+7
-0
paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc
...le/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc
+2
-2
paddle/fluid/operators/dropout_op_xpu.cc
paddle/fluid/operators/dropout_op_xpu.cc
+2
-2
paddle/fluid/operators/reshape_op.cc
paddle/fluid/operators/reshape_op.cc
+7
-3
paddle/fluid/operators/softmax_op_xpu.cc
paddle/fluid/operators/softmax_op_xpu.cc
+2
-2
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+3
-0
paddle/fluid/platform/device/xpu/CMakeLists.txt
paddle/fluid/platform/device/xpu/CMakeLists.txt
+1
-1
paddle/fluid/platform/device/xpu/enforce_xpu.h
paddle/fluid/platform/device/xpu/enforce_xpu.h
+8
-149
paddle/fluid/platform/device/xpu/xpu_header.h
paddle/fluid/platform/device/xpu/xpu_header.h
+1
-38
paddle/fluid/platform/device/xpu/xpu_info.cc
paddle/fluid/platform/device/xpu/xpu_info.cc
+17
-103
paddle/fluid/platform/device/xpu/xpu_info.h
paddle/fluid/platform/device/xpu/xpu_info.h
+3
-24
paddle/fluid/platform/device/xpu/xpu_op_list.cc
paddle/fluid/platform/device/xpu/xpu_op_list.cc
+8
-6
paddle/fluid/platform/device/xpu/xpu_op_list.h
paddle/fluid/platform/device/xpu/xpu_op_list.h
+3
-3
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+3
-41
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+2
-28
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+13
-10
paddle/pten/backends/CMakeLists.txt
paddle/pten/backends/CMakeLists.txt
+9
-1
paddle/pten/backends/cpu/cpu_context.cc
paddle/pten/backends/cpu/cpu_context.cc
+9
-13
paddle/pten/backends/xpu/CMakeLists.txt
paddle/pten/backends/xpu/CMakeLists.txt
+2
-0
paddle/pten/backends/xpu/enforce_xpu.h
paddle/pten/backends/xpu/enforce_xpu.h
+194
-0
paddle/pten/backends/xpu/forwards.h
paddle/pten/backends/xpu/forwards.h
+28
-0
paddle/pten/backends/xpu/xpu_context.cc
paddle/pten/backends/xpu/xpu_context.cc
+169
-0
paddle/pten/backends/xpu/xpu_context.h
paddle/pten/backends/xpu/xpu_context.h
+53
-6
paddle/pten/backends/xpu/xpu_header.h
paddle/pten/backends/xpu/xpu_header.h
+56
-0
paddle/pten/backends/xpu/xpu_info.cc
paddle/pten/backends/xpu/xpu_info.cc
+199
-0
paddle/pten/backends/xpu/xpu_info.h
paddle/pten/backends/xpu/xpu_info.h
+93
-0
paddle/pten/core/device_context.cc
paddle/pten/core/device_context.cc
+40
-11
paddle/pten/core/device_context.h
paddle/pten/core/device_context.h
+24
-5
未找到文件。
paddle/fluid/framework/pten_utils.h
浏览文件 @
c1e5a393
...
...
@@ -86,5 +86,12 @@ struct ConvertToPtenContext<platform::CPUDeviceContext> {
using
TYPE
=
pten
::
CPUContext
;
};
#ifdef PADDLE_WITH_XPU
template
<
>
struct
ConvertToPtenContext
<
platform
::
XPUDeviceContext
>
{
using
TYPE
=
pten
::
XPUContext
;
};
#endif
}
// namespace framework
}
// namespace paddle
paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc
浏览文件 @
c1e5a393
...
...
@@ -94,11 +94,11 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
inverse_scale
=
0.0
;
}
paddle
::
platform
::
XPUVersion
version
=
dev_ctx
.
xpu_version
();
auto
version
=
dev_ctx
.
xpu_version
();
framework
::
Tensor
float_x
;
framework
::
Tensor
float_out
;
if
(
std
::
is_same
<
T
,
paddle
::
platform
::
float16
>::
value
&&
(
version
==
p
addle
::
platform
::
XPUVersion
::
XPU1
))
{
(
version
==
p
ten
::
backends
::
xpu
::
XPUVersion
::
XPU1
))
{
float_x
.
mutable_data
<
MPDType
>
(
dev_ctx
.
GetPlace
(),
x
->
numel
()
*
sizeof
(
MPDType
));
float_out
.
mutable_data
<
MPDType
>
(
dev_ctx
.
GetPlace
(),
...
...
paddle/fluid/operators/dropout_op_xpu.cc
浏览文件 @
c1e5a393
...
...
@@ -107,8 +107,8 @@ class DropoutGradXPUKernel : public framework::OpKernel<T> {
return
;
}
paddle
::
platform
::
XPUVersion
version
=
dev_ctx
.
xpu_version
();
if
(
version
==
p
addle
::
platform
::
XPUVersion
::
XPU1
)
{
auto
version
=
dev_ctx
.
xpu_version
();
if
(
version
==
p
ten
::
backends
::
xpu
::
XPUVersion
::
XPU1
)
{
xpu
::
ctx_guard
RAII_GUARD
(
dev_ctx
.
x_context
());
XPUType
*
mask_new
=
RAII_GUARD
.
alloc_l3_or_gm
<
XPUType
>
(
mask
->
numel
());
float
scale
=
...
...
paddle/fluid/operators/reshape_op.cc
浏览文件 @
c1e5a393
...
...
@@ -448,7 +448,8 @@ class ReshapeKernel {
#ifdef PADDLE_WITH_XPU
if
(
platform
::
is_xpu_place
(
ctx
.
GetPlace
()))
{
auto
&
dev_ctx
=
ctx
.
device_context
<
platform
::
XPUDeviceContext
>
();
pten
::
ReshapeKernel
(
dev_ctx
,
*
pt_x
.
get
(),
pt_scalar_shape
,
pt_out
);
pten
::
ReshapeKernel
(
static_cast
<
const
pten
::
XPUContext
&>
(
dev_ctx
),
*
pt_x
.
get
(),
pt_scalar_shape
,
pt_out
);
}
#endif
// non-inplace need move all result from pt_out to out, inplace need set
...
...
@@ -485,7 +486,8 @@ class ReshapeGradKernel {
#ifdef PADDLE_WITH_XPU
if
(
platform
::
is_xpu_place
(
ctx
.
GetPlace
()))
{
auto
&
dev_ctx
=
ctx
.
device_context
<
platform
::
XPUDeviceContext
>
();
pten
::
ReshapeGradKernel
(
dev_ctx
,
*
pt_d_out
.
get
(),
pt_d_x
.
get
());
pten
::
ReshapeGradKernel
(
static_cast
<
const
pten
::
XPUContext
&>
(
dev_ctx
),
*
pt_d_out
.
get
(),
pt_d_x
.
get
());
}
#endif
}
...
...
@@ -516,7 +518,9 @@ class ReshapeDoubleGradKernel {
#ifdef PADDLE_WITH_XPU
if
(
platform
::
is_xpu_place
(
ctx
.
GetPlace
()))
{
auto
&
dev_ctx
=
ctx
.
device_context
<
platform
::
XPUDeviceContext
>
();
pten
::
ReshapeDoubleGradKernel
(
dev_ctx
,
*
pt_dd_x
.
get
(),
pt_dd_out
.
get
());
pten
::
ReshapeDoubleGradKernel
(
static_cast
<
const
pten
::
XPUContext
&>
(
dev_ctx
),
*
pt_dd_x
.
get
(),
pt_dd_out
.
get
());
}
#endif
}
...
...
paddle/fluid/operators/softmax_op_xpu.cc
浏览文件 @
c1e5a393
...
...
@@ -45,8 +45,8 @@ class SoftmaxXPUKernel : public framework::OpKernel<T> {
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
int
r
=
XPU_SUCCESS
;
paddle
::
platform
::
XPUVersion
version
=
dev_ctx
.
xpu_version
();
if
(
version
==
p
addle
::
platform
::
XPUVersion
::
XPU1
)
{
auto
version
=
dev_ctx
.
xpu_version
();
if
(
version
==
p
ten
::
backends
::
xpu
::
XPUVersion
::
XPU1
)
{
xpu
::
ctx_guard
RAII_GUARD
(
dev_ctx
.
x_context
());
XPUType
*
clip_x_data_l3
=
RAII_GUARD
.
alloc_l3_or_gm
<
XPUType
>
(
x
->
numel
());
r
=
xpu
::
clip_v2
(
dev_ctx
.
x_context
(),
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
c1e5a393
...
...
@@ -121,6 +121,9 @@ cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc DEPS boost)
cc_library
(
device_context SRCS device_context.cc init.cc DEPS simple_threadpool malloc xxhash
${
STREAM_CALLBACK_DEPS
}
place pten_place eigen3 stringpiece cpu_helper cpu_info framework_proto
${
IPU_CTX_DEPS
}
${
GPU_CTX_DEPS
}
${
NPU_CTX_DEPS
}
${
MKLDNN_CTX_DEPS
}
${
dgc_deps
}
dlpack cudnn_workspace_helper
${
XPU_CTX_DEPS
}
${
MLU_CTX_DEPS
}
cpu_context
)
if
(
WITH_XPU
)
target_link_libraries
(
device_context xpu_context
)
endif
()
cc_library
(
collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc DEPS framework_proto device_context enforce
)
if
(
WITH_ASCEND_CL
)
...
...
paddle/fluid/platform/device/xpu/CMakeLists.txt
浏览文件 @
c1e5a393
...
...
@@ -4,7 +4,7 @@ endif()
set
(
XPU_CTX_DEPS xpulib ssl crypto rt z resolv dl
)
cc_library
(
xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib device_context place
)
cc_library
(
xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib device_context place
pten_xpu_info
)
cc_library
(
xpu_op_list SRCS xpu_op_list.cc DEPS gflags glog enforce xpulib device_context
)
add_subdirectory
(
tests
)
paddle/fluid/platform/device/xpu/enforce_xpu.h
浏览文件 @
c1e5a393
...
...
@@ -15,177 +15,36 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
#include "paddle/fluid/platform/enforce.h"
#include "
xpu/bkcl
.h"
#include "
paddle/pten/backends/xpu/enforce_xpu
.h"
namespace
paddle
{
namespace
platform
{
// Note: XPU runtime api return int, not XPUError_t
inline
const
char
*
xpuGetErrorString
(
int
stat
)
{
switch
(
stat
)
{
case
XPU_SUCCESS
:
return
"Success"
;
case
XPUERR_INVALID_DEVICE
:
return
"Invalid XPU device"
;
case
XPUERR_UNINIT
:
return
"XPU runtime not properly inited"
;
case
XPUERR_NOMEM
:
return
"Device memory not enough"
;
case
XPUERR_NOCPUMEM
:
return
"CPU memory not enough"
;
case
XPUERR_INVALID_PARAM
:
return
"Invalid parameter"
;
case
XPUERR_NOXPUFUNC
:
return
"Cannot get XPU Func"
;
case
XPUERR_LDSO
:
return
"Error loading dynamic library"
;
case
XPUERR_LDSYM
:
return
"Error loading func from dynamic library"
;
case
XPUERR_SIMULATOR
:
return
"Error from XPU Simulator"
;
case
XPUERR_NOSUPPORT
:
return
"Operation not supported"
;
case
XPUERR_ABNORMAL
:
return
"Device abnormal due to previous error"
;
case
XPUERR_KEXCEPTION
:
return
"Exception in kernel execution"
;
case
XPUERR_TIMEOUT
:
return
"Kernel execution timed out"
;
case
XPUERR_BUSY
:
return
"Resource busy"
;
case
XPUERR_USEAFCLOSE
:
return
"Use a stream after closed"
;
case
XPUERR_UCECC
:
return
"Uncorrectable ECC"
;
case
XPUERR_OVERHEAT
:
return
"Overheat"
;
case
XPUERR_UNEXPECT
:
return
"Execution error, reach unexpected control flow"
;
case
XPUERR_DEVRESET
:
return
"Device is being reset, try again later"
;
case
XPUERR_HWEXCEPTION
:
return
"Hardware module exception"
;
case
XPUERR_HBM_INIT
:
return
"Error init HBM"
;
case
XPUERR_DEVINIT
:
return
"Error init device"
;
case
XPUERR_PEERRESET
:
return
"Device is being reset, try again later"
;
case
XPUERR_MAXDEV
:
return
"Device count exceed limit"
;
case
XPUERR_NOIOC
:
return
"Unknown IOCTL command"
;
case
XPUERR_DMATIMEOUT
:
return
"DMA timed out, a reboot maybe needed"
;
case
XPUERR_DMAABORT
:
return
"DMA aborted due to error, possibly wrong address or hardware "
"state"
;
case
XPUERR_MCUUNINIT
:
return
"Firmware not initialized"
;
case
XPUERR_OLDFW
:
return
"Firmware version too old (<15), please update."
;
case
XPUERR_PCIE
:
return
"Error in PCIE"
;
case
XPUERR_FAULT
:
return
"Error copy between kernel and user space"
;
case
XPUERR_INTERRUPTED
:
return
"Execution interrupted by user"
;
default:
return
"unkonwn error"
;
}
return
pten
::
backends
::
xpu
::
xpuGetErrorString
(
stat
);
}
inline
const
char
*
bkclGetErrorString
(
BKCLResult_t
stat
)
{
switch
(
stat
)
{
case
BKCL_SUCCESS
:
return
"BKCL_SUCCESS"
;
case
BKCL_INVALID_ARGUMENT
:
return
"BKCL_INVALID_ARGUMENT"
;
case
BKCL_RUNTIME_ERROR
:
return
"BKCL_RUNTIME_ERROR"
;
case
BKCL_SYSTEM_ERROR
:
return
"BKCL_SYSTEM_ERROR"
;
case
BKCL_INTERNAL_ERROR
:
return
"BKCL_INTERNAL_ERROR"
;
default:
return
"Unknown BKCL status"
;
}
return
pten
::
backends
::
xpu
::
bkclGetErrorString
(
stat
);
}
inline
const
char
*
xdnnGetErrorString
(
int
stat
)
{
switch
(
stat
)
{
case
xpu
::
Error_t
::
SUCCESS
:
return
"XDNN_SUCCESS"
;
case
xpu
::
Error_t
::
INVALID_PARAM
:
return
"XDNN_INVALID_PARAM"
;
case
xpu
::
Error_t
::
RUNTIME_ERROR
:
return
"XDNN_RUNTIME_ERROR"
;
case
xpu
::
Error_t
::
NO_ENOUGH_WORKSPACE
:
return
"XDNN_NO_ENOUGH_WORKSPACE"
;
case
xpu
::
Error_t
::
NOT_IMPLEMENT
:
return
"XDNN_NOT_IMPLEMENT"
;
default:
return
"Unknown XDNN status"
;
}
return
pten
::
backends
::
xpu
::
xdnnGetErrorString
(
stat
);
}
inline
std
::
string
build_xpu_error_msg
(
int
stat
)
{
std
::
string
msg
(
"XPU Error <"
+
std
::
to_string
(
stat
)
+
">, "
);
return
msg
+
xpuGetErrorString
(
stat
)
+
" "
;
return
pten
::
backends
::
xpu
::
build_xpu_error_msg
(
stat
);
}
inline
std
::
string
build_xpu_error_msg
(
BKCLResult_t
stat
)
{
std
::
string
msg
(
"BKCL Error, "
);
return
msg
+
bkclGetErrorString
(
stat
)
+
" "
;
return
pten
::
backends
::
xpu
::
build_xpu_error_msg
(
stat
);
}
inline
std
::
string
build_xpu_xdnn_error_msg
(
int
stat
,
std
::
string
msg
)
{
return
msg
+
" XDNN Error, "
+
xdnnGetErrorString
(
stat
)
+
" "
;
return
pten
::
backends
::
xpu
::
build_xpu_xdnn_error_msg
(
stat
,
msg
)
;
}
namespace
details
{
template
<
typename
T
>
struct
ExternalApiType
{};
#define DEFINE_EXTERNAL_API_TYPE(type, success_value) \
template <> \
struct ExternalApiType<type> { \
using Type = type; \
static constexpr Type kSuccess = success_value; \
}
DEFINE_EXTERNAL_API_TYPE
(
int
,
XPU_SUCCESS
);
DEFINE_EXTERNAL_API_TYPE
(
BKCLResult_t
,
BKCL_SUCCESS
);
#undef DEFINE_EXTERNAL_API_TYPE
}
// namespace details
#define PADDLE_ENFORCE_XPU_SUCCESS(COND) \
do { \
auto __cond__ = (COND); \
using __XPU_STATUS_TYPE__ = decltype(__cond__); \
constexpr auto __success_type__ = \
::paddle::platform::details::ExternalApiType< \
__XPU_STATUS_TYPE__>::kSuccess; \
if (UNLIKELY(__cond__ != __success_type__)) { \
auto __summary__ = paddle::platform::errors::External( \
::paddle::platform::build_xpu_error_msg(__cond__)); \
__THROW_ERROR_INTERNAL__(__summary__); \
} \
} while (0)
#define PADDLE_ENFORCE_XDNN_SUCCESS(COND, MSG) \
do { \
auto __cond__ = (COND); \
if (UNLIKELY(__cond__ != xpu::Error_t::SUCCESS)) { \
auto __summary__ = paddle::platform::errors::External( \
::paddle::platform::build_xpu_xdnn_error_msg(__cond__, MSG)); \
__THROW_ERROR_INTERNAL__(__summary__); \
} \
} while (0)
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/xpu/xpu_header.h
浏览文件 @
c1e5a393
...
...
@@ -15,42 +15,5 @@ limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU
#include <map>
#include <string>
#include <unordered_map>
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "xpu/runtime.h"
#include "xpu/runtime_ex.h"
#include "xpu/xdnn.h"
namespace
xpu
=
baidu
::
xpu
::
api
;
static
std
::
map
<
int
,
std
::
string
>
XPUAPIErrorMsg
=
{
{
xpu
::
Error_t
::
SUCCESS
,
"xpu api success"
},
{
xpu
::
Error_t
::
INVALID_PARAM
,
"xpu api invalid param"
},
{
xpu
::
Error_t
::
RUNTIME_ERROR
,
"xpu api runtime error"
},
{
xpu
::
Error_t
::
NO_ENOUGH_WORKSPACE
,
"xpu api no enough workspace"
}};
template
<
typename
T
>
class
XPUTypeTrait
{
public:
using
Type
=
T
;
};
template
<
>
class
XPUTypeTrait
<
paddle
::
platform
::
float16
>
{
public:
using
Type
=
float16
;
};
template
<
>
class
XPUTypeTrait
<
paddle
::
platform
::
bfloat16
>
{
public:
using
Type
=
bfloat16
;
};
#include "paddle/pten/backends/xpu/xpu_header.h"
#endif
paddle/fluid/platform/device/xpu/xpu_info.cc
浏览文件 @
c1e5a393
...
...
@@ -14,22 +14,14 @@ limitations under the License. */
#include <cstdlib>
#include <string>
#include "gflags/gflags.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/device/xpu/enforce_xpu.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/split.h"
PADDLE_DEFINE_EXPORTED_string
(
selected_xpus
,
""
,
"A list of device ids separated by comma, like: 0,1,2,3. "
"This option is useful when doing multi process training and "
"each process have only one device (XPU). If you want to use "
"all visible devices, set this to empty string. NOTE: the "
"reason of doing this is that we want to use P2P communication"
"between XPU devices, use XPU_VISIBLE_DEVICES can only use"
"share-memory only."
);
#include "paddle/pten/backends/xpu/xpu_info.h"
namespace
paddle
{
namespace
platform
{
...
...
@@ -37,101 +29,40 @@ namespace platform {
/**************************** Version Management **************************/
//! Get the version of XPU Driver
int
GetDriverVersion
()
{
uint32_t
driver_version_major
=
0
;
uint32_t
driver_version_minor
=
0
;
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_get_driver_version
(
&
driver_version_major
,
&
driver_version_minor
));
int
driver_version
=
driver_version_major
*
10
+
driver_version_minor
;
return
driver_version
;
}
int
GetDriverVersion
()
{
return
pten
::
backends
::
xpu
::
GetDriverVersion
();
}
//! Get the version of XPU Runtime
int
GetRuntimeVersion
()
{
uint32_t
rumtime_version_major
=
0
;
uint32_t
rumtime_version_minor
=
0
;
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_get_runtime_version
(
&
rumtime_version_major
,
&
rumtime_version_minor
));
int
runtime_version
=
rumtime_version_major
*
10
+
rumtime_version_minor
;
return
runtime_version
;
}
int
GetRuntimeVersion
()
{
return
pten
::
backends
::
xpu
::
GetRuntimeVersion
();
}
/**************************** Device Management **************************/
static
int
GetDeviceCountImpl
()
{
const
auto
*
xpu_visible_devices
=
std
::
getenv
(
"XPU_VISIBLE_DEVICES"
);
if
(
xpu_visible_devices
!=
nullptr
)
{
std
::
string
xpu_visible_devices_str
(
xpu_visible_devices
);
if
(
std
::
all_of
(
xpu_visible_devices_str
.
begin
(),
xpu_visible_devices_str
.
end
(),
[](
char
ch
)
{
return
ch
==
' '
;
}))
{
VLOG
(
2
)
<<
"XPU_VISIBLE_DEVICES is set to be empty. No XPU detected."
;
return
0
;
}
}
int
count
=
0
;
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_device_count
(
&
count
));
return
count
;
}
int
GetXPUDeviceCount
()
{
static
auto
dev_cnt
=
GetDeviceCountImpl
();
return
dev_cnt
;
}
int
GetXPUDeviceCount
()
{
return
pten
::
backends
::
xpu
::
GetXPUDeviceCount
();
}
int
GetXPUCurrentDeviceId
()
{
int
dev_id
;
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_current_device
(
&
dev_id
));
if
(
dev_id
>=
64
)
{
// if dev_id >= 64, the device is a simulator device, -64 to get real dev_id
dev_id
-=
64
;
}
return
dev_id
;
return
pten
::
backends
::
xpu
::
GetXPUCurrentDeviceId
();
}
void
SetXPUDeviceId
(
int
id
)
{
PADDLE_ENFORCE_LT
(
id
,
GetXPUDeviceCount
(),
platform
::
errors
::
InvalidArgument
(
"id must less than XPU count"
));
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_set_device
(
id
));
}
void
SetXPUDeviceId
(
int
id
)
{
pten
::
backends
::
xpu
::
SetXPUDeviceId
(
id
);
}
//! Get a list of device ids from environment variable or use all.
std
::
vector
<
int
>
GetXPUSelectedDevices
()
{
// use user specified XPUs in single-node multi-process mode.
std
::
vector
<
int
>
devices
;
if
(
!
FLAGS_selected_xpus
.
empty
())
{
auto
devices_str
=
paddle
::
string
::
Split
(
FLAGS_selected_xpus
,
','
);
for
(
auto
id
:
devices_str
)
{
devices
.
push_back
(
atoi
(
id
.
c_str
()));
}
}
else
{
int
count
=
GetXPUDeviceCount
();
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
devices
.
push_back
(
i
);
}
}
return
devices
;
return
pten
::
backends
::
xpu
::
GetXPUSelectedDevices
();
}
/**************************** Memory Management **************************/
void
MemcpySyncH2D
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
const
platform
::
XPUPlace
&
dst_place
)
{
platform
::
XPUDeviceGuard
guard
(
dst_place
.
device
);
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_memcpy
(
dst
,
src
,
count
,
XPUMemcpyKind
::
XPU_HOST_TO_DEVICE
));
pten
::
backends
::
xpu
::
MemcpySyncH2D
(
dst
,
src
,
count
,
dst_place
);
}
void
MemcpySyncD2H
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
const
platform
::
XPUPlace
&
src_place
)
{
platform
::
XPUDeviceGuard
guard
(
src_place
.
device
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
GetByPlace
(
src_place
);
dev_ctx
->
Wait
();
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_memcpy
(
dst
,
src
,
count
,
XPUMemcpyKind
::
XPU_DEVICE_TO_HOST
));
pten
::
backends
::
xpu
::
MemcpySyncD2H
(
dst
,
src
,
count
,
src_place
,
*
dev_ctx
);
}
// if src.device == dst.device and you need sync , after call this function,
...
...
@@ -139,33 +70,16 @@ void MemcpySyncD2H(void* dst, const void* src, size_t count,
void
MemcpySyncD2D
(
void
*
dst
,
const
platform
::
XPUPlace
&
dst_place
,
const
void
*
src
,
const
platform
::
XPUPlace
&
src_place
,
size_t
count
)
{
int
dev_id
=
GetXPUCurrentDeviceId
();
if
(
dst_place
.
device
==
dev_id
&&
src_place
.
device
==
dev_id
)
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
GetByPlace
(
src_place
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
xpu
::
copy
(
dev_ctx
->
x_context
(),
static_cast
<
const
int8_t
*>
(
src
),
static_cast
<
int8_t
*>
(
dst
),
count
),
"copy "
);
}
else
{
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_memcpy_peer
(
dst_place
.
device
,
dst
,
src_place
.
device
,
src
,
count
));
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
GetByPlace
(
src_place
);
pten
::
backends
::
xpu
::
MemcpySyncD2D
(
dst
,
dst_place
,
src
,
src_place
,
count
,
*
dev_ctx
);
}
/**************************** Others **************************/
XPUVersion
get_xpu_version
(
int
dev_id
)
{
uint64_t
v
=
0
;
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_device_get_attr
(
&
v
,
XPUATTR_MODEL
,
dev_id
));
if
(
v
==
K100
||
v
==
K200
)
{
VLOG
(
1
)
<<
"KUNLUN device "
<<
dev_id
<<
" is XPU1
\n
"
;
return
XPU1
;
}
else
{
VLOG
(
1
)
<<
"KUNLUN device "
<<
dev_id
<<
" is XPU2
\n
"
;
return
XPU2
;
}
pten
::
backends
::
xpu
::
XPUVersion
get_xpu_version
(
int
dev_id
)
{
return
pten
::
backends
::
xpu
::
get_xpu_version
(
dev_id
);
}
}
// namespace platform
...
...
paddle/fluid/platform/device/xpu/xpu_info.h
浏览文件 @
c1e5a393
...
...
@@ -13,6 +13,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include <vector>
#include "paddle/fluid/platform/place.h"
#include "paddle/pten/backends/xpu/xpu_info.h"
namespace
paddle
{
namespace
platform
{
...
...
@@ -50,31 +51,9 @@ void MemcpySyncD2D(void *dst, const platform::XPUPlace &dst_place,
const
void
*
src
,
const
platform
::
XPUPlace
&
src_place
,
size_t
count
);
class
XPUDeviceGuard
{
public:
explicit
inline
XPUDeviceGuard
(
int
dev_id
)
{
int
prev_id
=
platform
::
GetXPUCurrentDeviceId
();
if
(
prev_id
!=
dev_id
)
{
prev_id_
=
prev_id
;
platform
::
SetXPUDeviceId
(
dev_id
);
}
}
using
XPUDeviceGuard
=
pten
::
backends
::
xpu
::
XPUDeviceGuard
;
inline
~
XPUDeviceGuard
()
{
if
(
prev_id_
!=
-
1
)
{
platform
::
SetXPUDeviceId
(
prev_id_
);
}
}
XPUDeviceGuard
(
const
XPUDeviceGuard
&
o
)
=
delete
;
XPUDeviceGuard
&
operator
=
(
const
XPUDeviceGuard
&
o
)
=
delete
;
private:
int
prev_id_
{
-
1
};
};
enum
XPUVersion
{
XPU1
,
XPU2
};
XPUVersion
get_xpu_version
(
int
dev_id
);
pten
::
backends
::
xpu
::
XPUVersion
get_xpu_version
(
int
dev_id
);
}
// namespace platform
}
// namespace paddle
...
...
paddle/fluid/platform/device/xpu/xpu_op_list.cc
浏览文件 @
c1e5a393
...
...
@@ -24,7 +24,7 @@ namespace platform {
bool
is_xpu_support_op
(
const
std
::
string
&
op_name
,
const
pOpKernelType
&
type
)
{
auto
&
ops
=
get_kl1_ops
();
auto
v
=
get_xpu_version
(
type
.
place_
.
device
);
if
(
v
==
XPU2
)
{
if
(
v
==
pten
::
backends
::
xpu
::
XPUVersion
::
XPU2
)
{
ops
=
get_kl2_ops
();
}
...
...
@@ -74,10 +74,11 @@ bool is_in_xpu_black_list(const std::string& op_name) {
return
false
;
}
std
::
vector
<
vartype
::
Type
>
get_xpu_op_support_type
(
const
std
::
string
&
op_name
,
XPUVersion
version
)
{
std
::
vector
<
vartype
::
Type
>
get_xpu_op_support_type
(
const
std
::
string
&
op_name
,
pten
::
backends
::
xpu
::
XPUVersion
version
)
{
std
::
vector
<
vartype
::
Type
>
res
;
auto
&
ops
=
version
==
XPU1
?
get_kl1_ops
()
:
get_kl2_ops
();
auto
&
ops
=
version
==
pten
::
backends
::
xpu
::
XPUVersion
::
XPU1
?
get_kl1_ops
()
:
get_kl2_ops
();
if
(
ops
.
find
(
op_name
)
!=
ops
.
end
())
{
XPUKernelSet
&
type_set
=
ops
[
op_name
];
for
(
auto
&
item
:
type_set
)
{
...
...
@@ -87,9 +88,10 @@ std::vector<vartype::Type> get_xpu_op_support_type(const std::string& op_name,
return
res
;
}
XPUOpListMap
get_xpu_op_list
(
XPUVersion
version
)
{
XPUOpListMap
get_xpu_op_list
(
pten
::
backends
::
xpu
::
XPUVersion
version
)
{
XPUOpListMap
res
;
auto
&
ops
=
version
==
XPU1
?
get_kl1_ops
()
:
get_kl2_ops
();
auto
&
ops
=
version
==
pten
::
backends
::
xpu
::
XPUVersion
::
XPU1
?
get_kl1_ops
()
:
get_kl2_ops
();
for
(
auto
&
op
:
ops
)
{
std
::
vector
<
vartype
::
Type
>
op_vartypes
;
for
(
auto
&
item
:
op
.
second
)
{
...
...
paddle/fluid/platform/device/xpu/xpu_op_list.h
浏览文件 @
c1e5a393
...
...
@@ -27,9 +27,9 @@ using XPUOpListMap =
bool
is_xpu_support_op
(
const
std
::
string
&
op_name
,
const
pOpKernelType
&
type
);
bool
is_in_xpu_black_list
(
const
std
::
string
&
op_name
);
std
::
vector
<
vartype
::
Type
>
get_xpu_op_support_type
(
const
std
::
string
&
op_name
,
XPUVersion
version
);
XPUOpListMap
get_xpu_op_list
(
XPUVersion
version
);
std
::
vector
<
vartype
::
Type
>
get_xpu_op_support_type
(
const
std
::
string
&
op_name
,
pten
::
backends
::
xpu
::
XPUVersion
version
);
XPUOpListMap
get_xpu_op_list
(
pten
::
backends
::
xpu
::
XPUVersion
version
);
}
// namespace platform
}
// namespace paddle
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
c1e5a393
...
...
@@ -246,52 +246,14 @@ IPUDeviceContext::~IPUDeviceContext() {}
#endif
#ifdef PADDLE_WITH_XPU
XPUDeviceContext
::
XPUDeviceContext
()
{
context_
=
xpu
::
create_context
();
xpu_version_
=
get_xpu_version
(
place_
.
device
);
}
XPUDeviceContext
::
XPUDeviceContext
()
:
pten
::
XPUContext
()
{}
XPUDeviceContext
::~
XPUDeviceContext
()
{}
XPUDeviceContext
::
XPUDeviceContext
(
XPUPlace
place
)
:
place_
(
place
)
{
platform
::
XPUDeviceGuard
guard
(
place
.
device
);
XPUDeviceContext
::
XPUDeviceContext
(
XPUPlace
place
)
:
pten
::
XPUContext
(
place
)
{
LOG_FIRST_N
(
WARNING
,
1
)
<<
"Please NOTE: xpu device: "
<<
static_cast
<
int
>
(
place_
.
device
);
context_
=
xpu
::
create_context
();
const
int
MAX_XPU_NUM
=
16
;
static
void
*
l3ptrs
[
MAX_XPU_NUM
]
=
{
nullptr
};
int
l3_size
=
13.5
*
1024
*
1024
;
if
(
std
::
getenv
(
"XPU_PADDLE_L3_SIZE"
)
!=
nullptr
)
{
l3_size
=
atoi
(
std
::
getenv
(
"XPU_PADDLE_L3_SIZE"
));
}
auto
selected_xpus
=
GetXPUSelectedDevices
();
for
(
unsigned
int
i
=
0
;
i
<
selected_xpus
.
size
();
i
++
)
{
if
(
place
.
device
==
selected_xpus
[
i
])
{
if
(
l3ptrs
[
place
.
device
]
==
nullptr
)
{
xpu_malloc
(
static_cast
<
void
**>
(
&
l3ptrs
[
place
.
device
]),
l3_size
,
XPU_MEM_L3
);
}
if
(
l3ptrs
[
place
.
device
]
!=
nullptr
)
{
context_
->
_l3_mgr
.
set
(
l3ptrs
[
place
.
device
],
l3_size
);
VLOG
(
3
)
<<
"xpu place "
<<
place
.
device
<<
" set l3 size "
<<
l3_size
;
}
break
;
}
}
<<
static_cast
<
int
>
(
place
.
device
);
}
void
XPUDeviceContext
::
Wait
()
const
{
platform
::
SetXPUDeviceId
(
place_
.
device
);
xpu_wait
(
context_
->
xpu_stream
);
}
Place
XPUDeviceContext
::
GetPlace
()
const
{
return
place_
;
}
xpu
::
Context
*
XPUDeviceContext
::
x_context
()
const
{
return
context_
;
}
#endif
#ifdef PADDLE_WITH_ASCEND_CL
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
c1e5a393
...
...
@@ -78,6 +78,7 @@ struct GpuDevice;
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
#include "paddle/fluid/platform/device/xpu/xpu_info.h"
#include "paddle/pten/backends/xpu/xpu_context.h"
#endif
#ifdef PADDLE_WITH_ASCEND_CL
...
...
@@ -171,39 +172,12 @@ struct DefaultDeviceContextType<platform::MLUPlace>;
#ifdef PADDLE_WITH_XPU
namespace
xpu
=
baidu
::
xpu
::
api
;
class
XPUDeviceContext
:
public
Device
Context
{
class
XPUDeviceContext
:
public
pten
::
XPU
Context
{
public:
XPUDeviceContext
();
explicit
XPUDeviceContext
(
XPUPlace
place
);
virtual
~
XPUDeviceContext
();
Eigen
::
DefaultDevice
*
eigen_device
()
const
{
return
nullptr
;
}
XPUVersion
xpu_version
()
const
{
return
xpu_version_
;
}
Place
GetPlace
()
const
override
;
xpu
::
Context
*
x_context
()
const
;
/*! \brief Wait for all operations completion in the stream. */
void
Wait
()
const
override
;
#ifdef PADDLE_WITH_XPU_BKCL
/*! \brief Return bkcl context. */
BKCLContext_t
bkcl_context
()
const
{
return
bkcl_context_
;
}
/*! \brief Set bkcl context. */
void
set_bkcl_context
(
BKCLContext_t
context
)
{
bkcl_context_
=
context
;
}
#endif
private:
XPUPlace
place_
;
XPUVersion
xpu_version_
;
xpu
::
Context
*
context_
;
#ifdef PADDLE_WITH_XPU_BKCL
BKCLContext_t
bkcl_context_
;
#endif
// Need to be the same with other DeviceContext,
// Eventhough eigen_device_ is not used in XPU
std
::
unique_ptr
<
Eigen
::
DefaultDevice
>
eigen_device_
;
DISABLE_COPY_AND_ASSIGN
(
XPUDeviceContext
);
};
template
<
>
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
c1e5a393
...
...
@@ -1756,27 +1756,30 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"__repr__"
,
string
::
to_string
<
const
platform
::
XPUPlace
&>
)
.
def
(
"__str__"
,
string
::
to_string
<
const
platform
::
XPUPlace
&>
);
#ifdef PADDLE_WITH_XPU
py
::
enum_
<
p
latform
::
XPUVersion
>
(
m
,
"XPUVersion"
,
py
::
arithmetic
())
.
value
(
"XPU1"
,
p
latform
::
XPUVersion
::
XPU1
)
.
value
(
"XPU2"
,
p
latform
::
XPUVersion
::
XPU2
)
py
::
enum_
<
p
ten
::
backends
::
xpu
::
XPUVersion
>
(
m
,
"XPUVersion"
,
py
::
arithmetic
())
.
value
(
"XPU1"
,
p
ten
::
backends
::
xpu
::
XPUVersion
::
XPU1
)
.
value
(
"XPU2"
,
p
ten
::
backends
::
xpu
::
XPUVersion
::
XPU2
)
.
export_values
();
m
.
def
(
"get_xpu_device_count"
,
platform
::
GetXPUDeviceCount
);
m
.
def
(
"get_xpu_device_version"
,
[](
int
device_id
)
{
return
platform
::
get_xpu_version
(
device_id
);
});
m
.
def
(
"get_xpu_device_op_support_types"
,
[](
const
std
::
string
&
op_name
,
platform
::
XPUVersion
version
)
{
return
platform
::
get_xpu_op_support_type
(
op_name
,
version
);
});
m
.
def
(
"get_xpu_device_op_list"
,
[](
platform
::
XPUVersion
version
)
{
m
.
def
(
"get_xpu_device_op_support_types"
,
[](
const
std
::
string
&
op_name
,
pten
::
backends
::
xpu
::
XPUVersion
version
)
{
return
platform
::
get_xpu_op_support_type
(
op_name
,
version
);
});
m
.
def
(
"get_xpu_device_op_list"
,
[](
pten
::
backends
::
xpu
::
XPUVersion
version
)
{
return
platform
::
get_xpu_op_list
(
version
);
});
m
.
def
(
"is_float16_supported"
,
[](
const
platform
::
XPUPlace
&
place
)
->
bool
{
// XPUs with Compute Capability > xpu2 support float16 and bfloat16
return
platform
::
get_xpu_version
(
place
.
device
)
>
platform
::
XPUVersion
::
XPU1
;
return
platform
::
get_xpu_version
(
place
.
device
)
>
pten
::
backends
::
xpu
::
XPUVersion
::
XPU1
;
});
m
.
def
(
"is_bfloat16_supported"
,
[](
const
platform
::
XPUPlace
&
place
)
->
bool
{
// XPUs with Compute Capability > xpu2 support float16 and bfloat16
return
platform
::
get_xpu_version
(
place
.
device
)
>
platform
::
XPUVersion
::
XPU1
;
return
platform
::
get_xpu_version
(
place
.
device
)
>
pten
::
backends
::
xpu
::
XPUVersion
::
XPU1
;
});
#endif
...
...
paddle/pten/backends/CMakeLists.txt
浏览文件 @
c1e5a393
...
...
@@ -2,4 +2,12 @@ add_subdirectory(dynload)
add_subdirectory
(
cpu
)
cc_library
(
pten_context SRCS all_context.cc DEPS device_context
)
if
(
WITH_XPU
)
add_subdirectory
(
xpu
)
endif
()
cc_library
(
pten_context SRCS all_context.cc DEPS device_context cpu_context
)
if
(
WITH_XPU
)
add_dependencies
(
pten_context xpu_context
)
endif
()
paddle/pten/backends/cpu/cpu_context.cc
浏览文件 @
c1e5a393
...
...
@@ -18,16 +18,11 @@
// NOTE: The paddle framework should add WITH_EIGEN option to support compile
// without eigen.
#include "paddle/pten/core/device_context.h"
#include "unsupported/Eigen/CXX11/Tensor"
namespace
pten
{
struct
CPUContext
::
CPUImpl
{
Eigen
::
DefaultDevice
*
device_
{
nullptr
};
CPUContextResource
res_
;
CPUPlace
place_
;
CPUImpl
()
{
device_
=
new
Eigen
::
DefaultDevice
();
}
// Users need to manage external resources.
...
...
@@ -36,7 +31,7 @@ struct CPUContext::CPUImpl {
}
~
CPUImpl
()
{
if
(
res_
.
device
==
nullptr
)
{
if
(
res_
.
device
==
nullptr
&&
device_
!=
nullptr
)
{
delete
device_
;
device_
=
nullptr
;
}
...
...
@@ -56,27 +51,28 @@ struct CPUContext::CPUImpl {
}
Place
GetPlace
()
const
{
return
place_
;
}
Eigen
::
DefaultDevice
*
device_
{
nullptr
};
CPUContextResource
res_
;
CPUPlace
place_
;
};
CPUContext
::
CPUContext
()
:
DeviceContext
()
,
cpu_impl_
(
nullptr
)
{
CPUContext
::
CPUContext
()
:
DeviceContext
()
{
cpu_impl_
=
std
::
make_unique
<
CPUImpl
>
();
}
CPUContext
::
CPUContext
(
const
CPUContext
&
other
)
:
DeviceContext
(),
cpu_impl_
(
nullptr
)
{
CPUContext
::
CPUContext
(
const
CPUContext
&
other
)
:
DeviceContext
()
{
cpu_impl_
=
std
::
make_unique
<
CPUImpl
>
();
cpu_impl_
->
SetEigenDevice
(
other
.
eigen_device
());
}
CPUContext
::
CPUContext
(
CPUContext
&&
other
)
:
DeviceContext
(),
cpu_impl_
(
nullptr
)
{
CPUContext
::
CPUContext
(
CPUContext
&&
other
)
:
DeviceContext
()
{
cpu_impl_
=
std
::
move
(
other
.
cpu_impl_
);
}
CPUContext
::~
CPUContext
()
=
default
;
CPUContext
::
CPUContext
(
const
CPUContextResource
&
ctx_res
)
:
DeviceContext
(),
cpu_impl_
(
nullptr
)
{
CPUContext
::
CPUContext
(
const
CPUContextResource
&
ctx_res
)
:
DeviceContext
()
{
cpu_impl_
=
std
::
make_unique
<
CPUImpl
>
(
ctx_res
);
}
...
...
paddle/pten/backends/xpu/CMakeLists.txt
0 → 100644
浏览文件 @
c1e5a393
cc_library
(
pten_xpu_info SRCS xpu_info.cc DEPS enforce xpulib pten_place
)
cc_library
(
xpu_context SRCS xpu_context.cc DEPS pten_device_context pten_xpu_info
)
paddle/pten/backends/xpu/enforce_xpu.h
0 → 100644
浏览文件 @
c1e5a393
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/pten/backends/xpu/xpu_header.h"
#include "xpu/bkcl.h"
#include "paddle/fluid/platform/enforce.h"
namespace
pten
{
namespace
backends
{
namespace
xpu
{
// Note: XPU runtime api return int, not XPUError_t
inline
const
char
*
xpuGetErrorString
(
int
stat
)
{
switch
(
stat
)
{
case
XPU_SUCCESS
:
return
"Success"
;
case
XPUERR_INVALID_DEVICE
:
return
"Invalid XPU device"
;
case
XPUERR_UNINIT
:
return
"XPU runtime not properly inited"
;
case
XPUERR_NOMEM
:
return
"Device memory not enough"
;
case
XPUERR_NOCPUMEM
:
return
"CPU memory not enough"
;
case
XPUERR_INVALID_PARAM
:
return
"Invalid parameter"
;
case
XPUERR_NOXPUFUNC
:
return
"Cannot get XPU Func"
;
case
XPUERR_LDSO
:
return
"Error loading dynamic library"
;
case
XPUERR_LDSYM
:
return
"Error loading func from dynamic library"
;
case
XPUERR_SIMULATOR
:
return
"Error from XPU Simulator"
;
case
XPUERR_NOSUPPORT
:
return
"Operation not supported"
;
case
XPUERR_ABNORMAL
:
return
"Device abnormal due to previous error"
;
case
XPUERR_KEXCEPTION
:
return
"Exception in kernel execution"
;
case
XPUERR_TIMEOUT
:
return
"Kernel execution timed out"
;
case
XPUERR_BUSY
:
return
"Resource busy"
;
case
XPUERR_USEAFCLOSE
:
return
"Use a stream after closed"
;
case
XPUERR_UCECC
:
return
"Uncorrectable ECC"
;
case
XPUERR_OVERHEAT
:
return
"Overheat"
;
case
XPUERR_UNEXPECT
:
return
"Execution error, reach unexpected control flow"
;
case
XPUERR_DEVRESET
:
return
"Device is being reset, try again later"
;
case
XPUERR_HWEXCEPTION
:
return
"Hardware module exception"
;
case
XPUERR_HBM_INIT
:
return
"Error init HBM"
;
case
XPUERR_DEVINIT
:
return
"Error init device"
;
case
XPUERR_PEERRESET
:
return
"Device is being reset, try again later"
;
case
XPUERR_MAXDEV
:
return
"Device count exceed limit"
;
case
XPUERR_NOIOC
:
return
"Unknown IOCTL command"
;
case
XPUERR_DMATIMEOUT
:
return
"DMA timed out, a reboot maybe needed"
;
case
XPUERR_DMAABORT
:
return
"DMA aborted due to error, possibly wrong address or hardware "
"state"
;
case
XPUERR_MCUUNINIT
:
return
"Firmware not initialized"
;
case
XPUERR_OLDFW
:
return
"Firmware version too old (<15), please update."
;
case
XPUERR_PCIE
:
return
"Error in PCIE"
;
case
XPUERR_FAULT
:
return
"Error copy between kernel and user space"
;
case
XPUERR_INTERRUPTED
:
return
"Execution interrupted by user"
;
default:
return
"unkonwn error"
;
}
}
inline
const
char
*
bkclGetErrorString
(
BKCLResult_t
stat
)
{
switch
(
stat
)
{
case
BKCL_SUCCESS
:
return
"BKCL_SUCCESS"
;
case
BKCL_INVALID_ARGUMENT
:
return
"BKCL_INVALID_ARGUMENT"
;
case
BKCL_RUNTIME_ERROR
:
return
"BKCL_RUNTIME_ERROR"
;
case
BKCL_SYSTEM_ERROR
:
return
"BKCL_SYSTEM_ERROR"
;
case
BKCL_INTERNAL_ERROR
:
return
"BKCL_INTERNAL_ERROR"
;
default:
return
"Unknown BKCL status"
;
}
}
inline
const
char
*
xdnnGetErrorString
(
int
stat
)
{
switch
(
stat
)
{
case
baidu
::
xpu
::
api
::
Error_t
::
SUCCESS
:
return
"XDNN_SUCCESS"
;
case
baidu
::
xpu
::
api
::
Error_t
::
INVALID_PARAM
:
return
"XDNN_INVALID_PARAM"
;
case
baidu
::
xpu
::
api
::
Error_t
::
RUNTIME_ERROR
:
return
"XDNN_RUNTIME_ERROR"
;
case
baidu
::
xpu
::
api
::
Error_t
::
NO_ENOUGH_WORKSPACE
:
return
"XDNN_NO_ENOUGH_WORKSPACE"
;
case
baidu
::
xpu
::
api
::
Error_t
::
NOT_IMPLEMENT
:
return
"XDNN_NOT_IMPLEMENT"
;
default:
return
"Unknown XDNN status"
;
}
}
inline
std
::
string
build_xpu_error_msg
(
int
stat
)
{
std
::
string
msg
(
"XPU Error <"
+
std
::
to_string
(
stat
)
+
">, "
);
return
msg
+
xpuGetErrorString
(
stat
)
+
" "
;
}
inline
std
::
string
build_xpu_error_msg
(
BKCLResult_t
stat
)
{
std
::
string
msg
(
"BKCL Error, "
);
return
msg
+
bkclGetErrorString
(
stat
)
+
" "
;
}
inline
std
::
string
build_xpu_xdnn_error_msg
(
int
stat
,
std
::
string
msg
)
{
return
msg
+
" XDNN Error, "
+
xdnnGetErrorString
(
stat
)
+
" "
;
}
namespace
details
{
template
<
typename
T
>
struct
ExternalApiType
{};
#define DEFINE_EXTERNAL_API_TYPE(type, success_value) \
template <> \
struct ExternalApiType<type> { \
using Type = type; \
static constexpr Type kSuccess = success_value; \
}
DEFINE_EXTERNAL_API_TYPE
(
int
,
XPU_SUCCESS
);
DEFINE_EXTERNAL_API_TYPE
(
BKCLResult_t
,
BKCL_SUCCESS
);
#undef DEFINE_EXTERNAL_API_TYPE
}
// namespace details
#define PADDLE_ENFORCE_XPU_SUCCESS(COND) \
do { \
auto __cond__ = (COND); \
using __XPU_STATUS_TYPE__ = decltype(__cond__); \
constexpr auto __success_type__ = \
::pten::backends::xpu::details::ExternalApiType< \
__XPU_STATUS_TYPE__>::kSuccess; \
if (UNLIKELY(__cond__ != __success_type__)) { \
auto __summary__ = paddle::platform::errors::External( \
::pten::backends::xpu::build_xpu_error_msg(__cond__)); \
__THROW_ERROR_INTERNAL__(__summary__); \
} \
} while (0)
#define PADDLE_ENFORCE_XDNN_SUCCESS(COND, MSG) \
do { \
auto __cond__ = (COND); \
if (UNLIKELY(__cond__ != baidu::xpu::api::Error_t::SUCCESS)) { \
auto __summary__ = paddle::platform::errors::External( \
::pten::backends::xpu::build_xpu_xdnn_error_msg(__cond__, MSG)); \
__THROW_ERROR_INTERNAL__(__summary__); \
} \
} while (0)
}
// namespace xpu
}
// namespace backends
}
// namespace pten
paddle/pten/backends/xpu/forwards.h
0 → 100644
浏览文件 @
c1e5a393
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// Forward-declares.
#pragma once
// Forward declaration of xpu context.
namespace
baidu
{
namespace
xpu
{
namespace
api
{
struct
Context
;
typedef
void
*
BKCLContext_t
;
}
// namespace api
}
// namespace xpu
}
// namespace baidu
paddle/pten/backends/xpu/xpu_context.cc
0 → 100644
浏览文件 @
c1e5a393
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/pten/backends/xpu/xpu_context.h"
#include <memory>
#include "paddle/pten/api/ext/exception.h"
#include "xpu/runtime.h"
#include "xpu/runtime_ex.h"
#include "xpu/xdnn.h"
namespace
xpu
=
baidu
::
xpu
::
api
;
namespace
pten
{
struct
XPUContext
::
XPUImpl
{
void
SetL3Cache
()
{
const
int
MAX_XPU_NUM
=
16
;
static
void
*
l3ptrs
[
MAX_XPU_NUM
]
=
{
nullptr
};
int
l3_size
=
13.5
*
1024
*
1024
;
if
(
std
::
getenv
(
"XPU_PADDLE_L3_SIZE"
)
!=
nullptr
)
{
l3_size
=
atoi
(
std
::
getenv
(
"XPU_PADDLE_L3_SIZE"
));
}
auto
selected_xpus
=
backends
::
xpu
::
GetXPUSelectedDevices
();
for
(
unsigned
int
i
=
0
;
i
<
selected_xpus
.
size
();
i
++
)
{
if
(
place_
.
GetDeviceId
()
==
selected_xpus
[
i
])
{
if
(
l3ptrs
[
place_
.
GetDeviceId
()]
==
nullptr
)
{
xpu_malloc
(
static_cast
<
void
**>
(
&
l3ptrs
[
place_
.
GetDeviceId
()]),
l3_size
,
XPU_MEM_L3
);
}
if
(
l3ptrs
[
place_
.
GetDeviceId
()]
!=
nullptr
)
{
context_
->
_l3_mgr
.
set
(
l3ptrs
[
place_
.
GetDeviceId
()],
l3_size
);
VLOG
(
3
)
<<
"xpu place "
<<
place_
.
GetDeviceId
()
<<
" set l3 size "
<<
l3_size
;
}
break
;
}
}
}
XPUImpl
()
{
context_
=
xpu
::
create_context
();
xpu_version_
=
backends
::
xpu
::
get_xpu_version
(
place_
.
device
);
}
explicit
XPUImpl
(
XPUPlace
place
)
:
place_
(
place
)
{
backends
::
xpu
::
XPUDeviceGuard
guard
(
place_
.
GetDeviceId
());
LOG_FIRST_N
(
WARNING
,
1
)
<<
"Please NOTE: xpu device: "
<<
static_cast
<
int
>
(
place_
.
device
);
context_
=
xpu
::
create_context
();
xpu_version_
=
backends
::
xpu
::
get_xpu_version
(
place_
.
device
);
SetL3Cache
();
}
// Users need to manage external resources.
explicit
XPUImpl
(
const
XPUContextResource
&
ctx_res
,
const
XPUPlace
&
place
=
XPUPlace
(
0
))
:
res_
(
ctx_res
),
place_
(
place
)
{
context_
=
res_
.
context
;
xpu_version_
=
backends
::
xpu
::
get_xpu_version
(
place_
.
device
);
SetL3Cache
();
}
~
XPUImpl
()
{
if
(
res_
.
context
==
nullptr
&&
context_
!=
nullptr
)
{
xpu
::
destroy_context
(
context_
);
context_
=
nullptr
;
}
}
Place
GetPlace
()
const
{
return
place_
;
}
backends
::
xpu
::
XPUVersion
GetXpuVersion
()
const
{
return
xpu_version_
;
}
xpu
::
Context
*
GetXContext
()
const
{
PD_CHECK
(
context_
!=
nullptr
,
"the xpu context is nullptr."
);
return
context_
;
}
xpu
::
BKCLContext_t
GetBkclContext
()
const
{
return
bkcl_context_
;
}
void
Wait
()
const
{
backends
::
xpu
::
SetXPUDeviceId
(
place_
.
GetDeviceId
());
PD_CHECK
(
context_
!=
nullptr
,
"the xpu context is nullptr."
);
xpu_wait
(
context_
->
xpu_stream
);
}
void
SetXContext
(
xpu
::
Context
*
context
)
{
if
(
context
==
nullptr
)
{
return
;
}
res_
.
context
=
context
;
context_
=
context
;
}
void
SetBkclContext
(
xpu
::
BKCLContext_t
context
)
{
bkcl_context_
=
context
;
}
XPUContextResource
res_
;
XPUPlace
place_
;
backends
::
xpu
::
XPUVersion
xpu_version_
;
xpu
::
Context
*
context_
{
nullptr
};
// NOTE: Distributed communicator, distributed framework manages its
// resources, XPUContext only holds references.
xpu
::
BKCLContext_t
bkcl_context_
{
nullptr
};
};
XPUContext
::
XPUContext
()
:
DeviceContext
()
{
impl_
=
std
::
make_unique
<
XPUImpl
>
();
}
XPUContext
::
XPUContext
(
const
XPUPlace
&
place
)
{
impl_
=
std
::
make_unique
<
XPUImpl
>
(
place
);
}
XPUContext
::
XPUContext
(
const
XPUContext
&
other
)
:
DeviceContext
()
{
impl_
=
std
::
make_unique
<
XPUImpl
>
();
impl_
->
SetXContext
(
other
.
x_context
());
impl_
->
SetBkclContext
(
other
.
bkcl_context
());
}
XPUContext
::
XPUContext
(
XPUContext
&&
other
)
:
DeviceContext
()
{
impl_
=
std
::
move
(
other
.
impl_
);
}
XPUContext
::~
XPUContext
()
=
default
;
XPUContext
::
XPUContext
(
const
XPUContextResource
&
ctx_res
)
:
DeviceContext
()
{
impl_
=
std
::
make_unique
<
XPUImpl
>
(
ctx_res
);
}
Place
XPUContext
::
GetPlace
()
const
{
return
impl_
->
GetPlace
();
}
backends
::
xpu
::
XPUVersion
XPUContext
::
xpu_version
()
const
{
return
impl_
->
GetXpuVersion
();
}
xpu
::
Context
*
XPUContext
::
x_context
()
const
{
return
impl_
->
GetXContext
();
}
xpu
::
BKCLContext_t
XPUContext
::
bkcl_context
()
const
{
return
impl_
->
GetBkclContext
();
}
void
XPUContext
::
Wait
()
const
{
impl_
->
Wait
();
}
void
XPUContext
::
set_x_context
(
xpu
::
Context
*
context
)
{
impl_
->
SetXContext
(
context
);
}
void
XPUContext
::
set_bkcl_context
(
xpu
::
BKCLContext_t
context
)
{
impl_
->
SetBkclContext
(
context
);
}
}
// namespace pten
paddle/pten/backends/xpu/xpu_context.h
浏览文件 @
c1e5a393
...
...
@@ -14,13 +14,60 @@ limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU
#include <memory>
#include "paddle/pten/backends/xpu/forwards.h"
#include "paddle/pten/common/place.h"
#include "paddle/pten/core/device_context.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
#include "paddle/pten/backends/xpu/xpu_header.h"
#include "paddle/pten/backends/xpu/xpu_info.h"
namespace
xpu
=
baidu
::
xpu
::
api
;
namespace
pten
{
using
XPUContext
=
paddle
::
platform
::
XPUDeviceContext
;
}
// namespace pten
#endif // PADDLE_WITH_XPU
struct
XPUContextResource
{
xpu
::
Context
*
context
{
nullptr
};
};
class
XPUContext
:
public
DeviceContext
{
public:
// NOTE: DeviceContext hold resources. Used in training scenarios.
XPUContext
();
explicit
XPUContext
(
const
XPUPlace
&
);
// NOTE: Share the same underlying resources, please ensure that resources are
// not released.
XPUContext
(
const
XPUContext
&
);
XPUContext
(
XPUContext
&&
);
virtual
~
XPUContext
();
Place
GetPlace
()
const
override
;
backends
::
xpu
::
XPUVersion
xpu_version
()
const
;
xpu
::
Context
*
x_context
()
const
;
// Return bkcl context.
xpu
::
BKCLContext_t
bkcl_context
()
const
;
// Wait for all operations completion in the stream.
void
Wait
()
const
override
;
public:
// NOTE: External users manage resources. Used in inference scenarios.
explicit
XPUContext
(
const
XPUContextResource
&
);
void
set_x_context
(
xpu
::
Context
*
);
void
set_bkcl_context
(
xpu
::
BKCLContext_t
context
);
private:
struct
XPUImpl
;
std
::
unique_ptr
<
XPUImpl
>
impl_
;
};
}
// namespace pten
paddle/pten/backends/xpu/xpu_header.h
0 → 100644
浏览文件 @
c1e5a393
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU
#include <map>
#include <string>
#include <unordered_map>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/pten/common/bfloat16.h"
#include "paddle/pten/common/float16.h"
#include "xpu/runtime.h"
#include "xpu/runtime_ex.h"
#include "xpu/xdnn.h"
namespace
xpu
=
baidu
::
xpu
::
api
;
static
std
::
map
<
int
,
std
::
string
>
XPUAPIErrorMsg
=
{
{
xpu
::
Error_t
::
SUCCESS
,
"xpu api success"
},
{
xpu
::
Error_t
::
INVALID_PARAM
,
"xpu api invalid param"
},
{
xpu
::
Error_t
::
RUNTIME_ERROR
,
"xpu api runtime error"
},
{
xpu
::
Error_t
::
NO_ENOUGH_WORKSPACE
,
"xpu api no enough workspace"
}};
template
<
typename
T
>
class
XPUTypeTrait
{
public:
using
Type
=
T
;
};
template
<
>
class
XPUTypeTrait
<
pten
::
dtype
::
float16
>
{
public:
using
Type
=
float16
;
};
template
<
>
class
XPUTypeTrait
<
pten
::
dtype
::
bfloat16
>
{
public:
using
Type
=
bfloat16
;
};
#endif
paddle/pten/backends/xpu/xpu_info.cc
0 → 100644
浏览文件 @
c1e5a393
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/backends/xpu/xpu_info.h"
#include <algorithm>
#include <cstdlib>
#include <string>
#include "paddle/pten/backends/xpu/enforce_xpu.h"
#include "paddle/pten/backends/xpu/xpu_context.h"
#include "paddle/pten/backends/xpu/xpu_header.h"
#include "paddle/pten/common/place.h"
// TODO(wilber): The pten computing library requires a component to manage
// flags.
#include "paddle/fluid/platform/flags.h"
PADDLE_DEFINE_EXPORTED_string
(
selected_xpus
,
""
,
"A list of device ids separated by comma, like: 0,1,2,3. "
"This option is useful when doing multi process training and "
"each process have only one device (XPU). If you want to use "
"all visible devices, set this to empty string. NOTE: the "
"reason of doing this is that we want to use P2P communication"
"between XPU devices, use XPU_VISIBLE_DEVICES can only use"
"share-memory only."
);
namespace
pten
{
class
XPUContext
;
namespace
backends
{
namespace
xpu
{
/**************************** Version Management **************************/
//! Get the version of XPU Driver
int
GetDriverVersion
()
{
uint32_t
driver_version_major
=
0
;
uint32_t
driver_version_minor
=
0
;
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_get_driver_version
(
&
driver_version_major
,
&
driver_version_minor
));
int
driver_version
=
driver_version_major
*
10
+
driver_version_minor
;
return
driver_version
;
}
//! Get the version of XPU Runtime
int
GetRuntimeVersion
()
{
uint32_t
rumtime_version_major
=
0
;
uint32_t
rumtime_version_minor
=
0
;
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_get_runtime_version
(
&
rumtime_version_major
,
&
rumtime_version_minor
));
int
runtime_version
=
rumtime_version_major
*
10
+
rumtime_version_minor
;
return
runtime_version
;
}
/**************************** Device Management **************************/
static
int
GetDeviceCountImpl
()
{
const
auto
*
xpu_visible_devices
=
std
::
getenv
(
"XPU_VISIBLE_DEVICES"
);
if
(
xpu_visible_devices
!=
nullptr
)
{
std
::
string
xpu_visible_devices_str
(
xpu_visible_devices
);
if
(
std
::
all_of
(
xpu_visible_devices_str
.
begin
(),
xpu_visible_devices_str
.
end
(),
[](
char
ch
)
{
return
ch
==
' '
;
}))
{
VLOG
(
2
)
<<
"XPU_VISIBLE_DEVICES is set to be empty. No XPU detected."
;
return
0
;
}
}
int
count
=
0
;
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_device_count
(
&
count
));
return
count
;
}
int
GetXPUDeviceCount
()
{
static
auto
dev_cnt
=
GetDeviceCountImpl
();
return
dev_cnt
;
}
int
GetXPUCurrentDeviceId
()
{
int
dev_id
;
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_current_device
(
&
dev_id
));
if
(
dev_id
>=
64
)
{
// if dev_id >= 64, the device is a simulator device, -64 to get real dev_id
dev_id
-=
64
;
}
return
dev_id
;
}
void
SetXPUDeviceId
(
int
id
)
{
PADDLE_ENFORCE_LT
(
id
,
GetXPUDeviceCount
(),
paddle
::
platform
::
errors
::
InvalidArgument
(
"id must less than XPU count"
));
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_set_device
(
id
));
}
static
inline
std
::
vector
<
std
::
string
>
Split
(
std
::
string
const
&
original
,
char
separator
)
{
std
::
vector
<
std
::
string
>
results
;
std
::
string
token
;
std
::
istringstream
is
(
original
);
while
(
std
::
getline
(
is
,
token
,
separator
))
{
if
(
!
token
.
empty
())
{
results
.
push_back
(
token
);
}
}
return
results
;
}
//! Get a list of device ids from environment variable or use all.
std
::
vector
<
int
>
GetXPUSelectedDevices
()
{
// use user specified XPUs in single-node multi-process mode.
std
::
vector
<
int
>
devices
;
if
(
!
FLAGS_selected_xpus
.
empty
())
{
auto
devices_str
=
Split
(
FLAGS_selected_xpus
,
','
);
for
(
auto
id
:
devices_str
)
{
devices
.
push_back
(
atoi
(
id
.
c_str
()));
}
}
else
{
int
count
=
GetXPUDeviceCount
();
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
devices
.
push_back
(
i
);
}
}
return
devices
;
}
/**************************** Memory Management **************************/
void
MemcpySyncH2D
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
const
pten
::
XPUPlace
&
dst_place
)
{
XPUDeviceGuard
guard
(
dst_place
.
device
);
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_memcpy
(
dst
,
src
,
count
,
XPUMemcpyKind
::
XPU_HOST_TO_DEVICE
));
}
void
MemcpySyncD2H
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
const
pten
::
XPUPlace
&
src_place
,
const
pten
::
XPUContext
&
dev_ctx
)
{
XPUDeviceGuard
guard
(
src_place
.
GetDeviceId
());
dev_ctx
.
Wait
();
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_memcpy
(
dst
,
src
,
count
,
XPUMemcpyKind
::
XPU_DEVICE_TO_HOST
));
}
// if src.device == dst.device and you need sync , after call this function,
// need to call xpu_wait()
void
MemcpySyncD2D
(
void
*
dst
,
const
pten
::
XPUPlace
&
dst_place
,
const
void
*
src
,
const
pten
::
XPUPlace
&
src_place
,
size_t
count
,
const
pten
::
XPUContext
&
dev_ctx
)
{
int
dev_id
=
GetXPUCurrentDeviceId
();
if
(
dst_place
.
device
==
dev_id
&&
src_place
.
device
==
dev_id
)
{
PADDLE_ENFORCE_XDNN_SUCCESS
(
baidu
::
xpu
::
api
::
copy
(
dev_ctx
.
x_context
(),
static_cast
<
const
int8_t
*>
(
src
),
static_cast
<
int8_t
*>
(
dst
),
count
),
"copy "
);
}
else
{
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_memcpy_peer
(
dst_place
.
device
,
dst
,
src_place
.
device
,
src
,
count
));
}
}
/**************************** Others **************************/
XPUVersion
get_xpu_version
(
int
dev_id
)
{
uint64_t
v
=
0
;
PADDLE_ENFORCE_XPU_SUCCESS
(
xpu_device_get_attr
(
&
v
,
XPUATTR_MODEL
,
dev_id
));
if
(
v
==
K100
||
v
==
K200
)
{
VLOG
(
1
)
<<
"KUNLUN device "
<<
dev_id
<<
" is XPU1
\n
"
;
return
XPU1
;
}
else
{
VLOG
(
1
)
<<
"KUNLUN device "
<<
dev_id
<<
" is XPU2
\n
"
;
return
XPU2
;
}
}
}
// namespace xpu
}
// namespace backends
}
// namespace pten
paddle/pten/backends/xpu/xpu_info.h
0 → 100644
浏览文件 @
c1e5a393
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/pten/common/place.h"
namespace
pten
{
class
XPUContext
;
namespace
backends
{
namespace
xpu
{
/***** Version Management *****/
//! Get the version of XPU Driver
int
GetDriverVersion
();
//! Get the version of XPU Runtime
int
GetRuntimeVersion
();
/***** Device Management *****/
//! Get the total number of XPU devices in system.
int
GetXPUDeviceCount
();
//! Set the XPU device id for next execution.
void
SetXPUDeviceId
(
int
device_id
);
//! Get the current XPU device id in system.
int
GetXPUCurrentDeviceId
();
//! Get a list of device ids from environment variable or use all.
std
::
vector
<
int
>
GetXPUSelectedDevices
();
/***** Memory Management *****/
//! Copy memory from address src to dst synchronously.
void
MemcpySyncH2D
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
const
pten
::
XPUPlace
&
dst_place
);
void
MemcpySyncD2H
(
void
*
dst
,
const
void
*
src
,
size_t
count
,
const
pten
::
XPUPlace
&
src_place
,
const
pten
::
XPUContext
&
dev_ctx
);
void
MemcpySyncD2D
(
void
*
dst
,
const
pten
::
XPUPlace
&
dst_place
,
const
void
*
src
,
const
pten
::
XPUPlace
&
src_place
,
size_t
count
,
const
pten
::
XPUContext
&
dev_ctx
);
class
XPUDeviceGuard
{
public:
explicit
inline
XPUDeviceGuard
(
int
dev_id
)
{
int
prev_id
=
GetXPUCurrentDeviceId
();
if
(
prev_id
!=
dev_id
)
{
prev_id_
=
prev_id
;
SetXPUDeviceId
(
dev_id
);
}
}
inline
~
XPUDeviceGuard
()
{
if
(
prev_id_
!=
-
1
)
{
SetXPUDeviceId
(
prev_id_
);
}
}
XPUDeviceGuard
(
const
XPUDeviceGuard
&
o
)
=
delete
;
XPUDeviceGuard
&
operator
=
(
const
XPUDeviceGuard
&
o
)
=
delete
;
private:
int
prev_id_
{
-
1
};
};
enum
XPUVersion
{
XPU1
,
XPU2
};
XPUVersion
get_xpu_version
(
int
dev_id
);
}
// namespace xpu
}
// namespace backends
}
// namespace pten
paddle/pten/core/device_context.cc
浏览文件 @
c1e5a393
...
...
@@ -13,28 +13,45 @@
// limitations under the License.
#include "paddle/pten/core/device_context.h"
#include "paddle/pten/api/ext/exception.h"
namespace
pten
{
struct
DeviceContext
::
Impl
{
Allocator
*
allocator_
{
nullptr
};
Impl
()
=
default
;
~
Impl
()
=
default
;
void
SetAllocator
(
Allocator
*
allocator
)
{
allocator_
=
allocator
;
}
void
SetDeviceAllocator
(
Allocator
*
allocator
)
{
device_allocator_
=
allocator
;
}
void
SetHostAllocator
(
Allocator
*
allocator
)
{
host_allocator_
=
allocator
;
}
const
Allocator
&
GetDeviceAllocator
()
const
{
PD_CHECK
(
device_allocator_
!=
nullptr
,
"the device_allocator is nullptr."
);
return
*
device_allocator_
;
}
const
Allocator
&
GetAllocator
()
const
{
return
*
allocator_
;
}
const
Allocator
&
GetHostAllocator
()
const
{
PD_CHECK
(
host_allocator_
!=
nullptr
,
"the host_allocator is nullptr."
);
return
*
host_allocator_
;
}
// TODO(Wilber): Add impl. It seems that tensorbase not have interface to
// communicate with allocator.
void
Alloc
(
TensorBase
*
tensor
)
{}
void
HostAlloc
(
TensorBase
*
tensor
)
{}
void
DeviceAlloc
(
TensorBase
*
tensor
)
{}
Allocator
*
device_allocator_
{
nullptr
};
Allocator
*
host_allocator_
{
nullptr
};
};
DeviceContext
::
DeviceContext
()
{
impl_
=
std
::
make_unique
<
Impl
>
();
}
DeviceContext
::
DeviceContext
(
const
DeviceContext
&
other
)
{
impl_
->
SetAllocator
(
const_cast
<
Allocator
*>
(
&
other
.
GetAllocator
()));
impl_
->
SetDeviceAllocator
(
const_cast
<
Allocator
*>
(
&
other
.
GetDeviceAllocator
()));
impl_
->
SetHostAllocator
(
const_cast
<
Allocator
*>
(
&
other
.
GetHostAllocator
()));
}
DeviceContext
::
DeviceContext
(
DeviceContext
&&
other
)
{
...
...
@@ -43,14 +60,26 @@ DeviceContext::DeviceContext(DeviceContext&& other) {
DeviceContext
::~
DeviceContext
()
=
default
;
void
DeviceContext
::
SetAllocator
(
Allocator
*
allocator
)
{
impl_
->
SetAllocator
(
allocator
);
void
DeviceContext
::
SetHostAllocator
(
Allocator
*
allocator
)
{
impl_
->
SetHostAllocator
(
allocator
);
}
void
DeviceContext
::
SetDeviceAllocator
(
Allocator
*
allocator
)
{
impl_
->
SetDeviceAllocator
(
allocator
);
}
const
Allocator
&
DeviceContext
::
GetHostAllocator
()
const
{
return
impl_
->
GetHostAllocator
();
}
const
Allocator
&
DeviceContext
::
GetAllocator
()
const
{
return
impl_
->
GetAllocator
();
const
Allocator
&
DeviceContext
::
Get
Device
Allocator
()
const
{
return
impl_
->
Get
Device
Allocator
();
}
void
DeviceContext
::
Alloc
(
TensorBase
*
tensor
)
{
impl_
->
Alloc
(
tensor
);
}
void
DeviceContext
::
HostAlloc
(
TensorBase
*
tensor
)
{
impl_
->
HostAlloc
(
tensor
);
}
void
DeviceContext
::
DeviceAlloc
(
TensorBase
*
tensor
)
{
impl_
->
DeviceAlloc
(
tensor
);
}
}
// namespace pten
paddle/pten/core/device_context.h
浏览文件 @
c1e5a393
...
...
@@ -57,19 +57,38 @@ class DeviceContext {
*
* @param allocator
*/
void
SetAllocator
(
Allocator
*
);
void
Set
Device
Allocator
(
Allocator
*
);
/**
* @brief Get the const Allocator object.
* @brief Get the const
deveice-releated
Allocator object.
*
* @return Allocator
*/
const
Allocator
&
GetAllocator
()
const
;
const
Allocator
&
Get
Device
Allocator
()
const
;
/**
* @brief Allocate memory for tensor.
* @brief Allocate
device
memory for tensor.
*/
void
Alloc
(
pten
::
TensorBase
*
);
void
DeviceAlloc
(
pten
::
TensorBase
*
);
/**
* @brief Set the host Allocator object.
*
* @param allocator
*/
void
SetHostAllocator
(
Allocator
*
);
/**
* @brief Get the const host Allocator object.
*
* @return Allocator
*/
const
Allocator
&
GetHostAllocator
()
const
;
/**
* @brief Allocate host memory for tensor.
*/
void
HostAlloc
(
pten
::
TensorBase
*
);
// TODO(wilber): Just for the convenience of migrating the code, it will be
// modified or removed later.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录