Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
0d51fcf1
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
0d51fcf1
编写于
7月 26, 2022
作者:
R
ronnywang
提交者:
GitHub
7月 26, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[CustomDevice] add blas_axpby api for gradient_accumulator (#44584)
上级
356ff436
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
245 addition
and
13 deletion
+245
-13
paddle/fluid/imperative/gradient_accumulator.cc
paddle/fluid/imperative/gradient_accumulator.cc
+13
-9
paddle/phi/backends/CMakeLists.txt
paddle/phi/backends/CMakeLists.txt
+1
-1
paddle/phi/backends/custom/custom_device.cc
paddle/phi/backends/custom/custom_device.cc
+46
-0
paddle/phi/backends/custom/custom_device_test.cc
paddle/phi/backends/custom/custom_device_test.cc
+48
-0
paddle/phi/backends/custom/fake_cpu_device.h
paddle/phi/backends/custom/fake_cpu_device.h
+13
-0
paddle/phi/backends/device_base.cc
paddle/phi/backends/device_base.cc
+12
-0
paddle/phi/backends/device_base.h
paddle/phi/backends/device_base.h
+10
-0
paddle/phi/backends/device_ext.h
paddle/phi/backends/device_ext.h
+13
-1
paddle/phi/backends/device_manager.cc
paddle/phi/backends/device_manager.cc
+75
-0
paddle/phi/backends/device_manager.h
paddle/phi/backends/device_manager.h
+14
-2
未找到文件。
paddle/fluid/imperative/gradient_accumulator.cc
浏览文件 @
0d51fcf1
...
...
@@ -39,6 +39,9 @@
#ifdef PADDLE_WITH_MLU
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
#include "paddle/phi/backends/device_manager.h"
#endif
namespace
paddle
{
namespace
imperative
{
...
...
@@ -189,10 +192,19 @@ class TensorAddFunctor
place
));
}
void
operator
()(
const
platform
::
CustomPlace
&
place
)
const
{
#ifdef PADDLE_WITH_CUSTOM_DEVICE
platform
::
CustomDeviceContext
*
ctx
=
dynamic_cast
<
platform
::
CustomDeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
phi
::
stream
::
Stream
stream
(
place
,
ctx
->
stream
());
auto
device
=
phi
::
DeviceManager
::
GetDeviceWithPlace
(
place
);
device
->
BlasAXPBY
<
T
>
(
stream
,
static_cast
<
size_t
>
(
numel_
),
1.
,
x_
,
1.
,
y_
);
#else
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode"
,
place
));
#endif
}
private:
...
...
@@ -351,15 +363,7 @@ void TensorAdd(const VarType& src, VarType* dst) {
return
;
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
if
(
platform
::
is_custom_place
(
place
))
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Gradient accumulation of data type (%s) on place (%s) is not "
"supported in imperative mode"
,
framework
::
DataTypeToString
(
data_type
),
place
));
}
#endif
#ifdef PADDLE_WITH_XPU
if
(
platform
::
is_xpu_place
(
place
))
{
if
(
data_type
==
framework
::
DataTypeTrait
<
float
>::
DataType
())
{
...
...
paddle/phi/backends/CMakeLists.txt
浏览文件 @
0d51fcf1
...
...
@@ -51,7 +51,7 @@ if(WITH_CUSTOM_DEVICE)
cc_test
(
custom_device_test
SRCS custom/custom_device_test.cc
DEPS phi_backends phi_device_context
)
DEPS phi_backends phi_device_context
gradient_accumulator
)
cc_test
(
capi_test
SRCS custom/capi_test.cc
...
...
paddle/phi/backends/custom/custom_device.cc
浏览文件 @
0d51fcf1
...
...
@@ -14,6 +14,8 @@
#include "paddle/fluid/platform/device/custom/enforce_custom.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/backends/callback_manager.h"
#include "paddle/phi/backends/device_base.h"
#include "paddle/phi/backends/device_guard.h"
...
...
@@ -608,6 +610,27 @@ class CustomDevice : public DeviceInterface {
#undef return_result
}
C_DataType
ToCDatatType
(
paddle
::
experimental
::
DataType
data_type
)
{
#define return_result(in, ret) \
case in: \
return C_DataType::ret
switch
(
data_type
)
{
return_result
(
paddle
::
experimental
::
DataType
::
FLOAT64
,
FLOAT64
);
return_result
(
paddle
::
experimental
::
DataType
::
FLOAT32
,
FLOAT32
);
return_result
(
paddle
::
experimental
::
DataType
::
FLOAT16
,
FLOAT16
);
return_result
(
paddle
::
experimental
::
DataType
::
INT64
,
INT64
);
return_result
(
paddle
::
experimental
::
DataType
::
INT32
,
INT32
);
return_result
(
paddle
::
experimental
::
DataType
::
INT16
,
INT16
);
return_result
(
paddle
::
experimental
::
DataType
::
INT8
,
INT8
);
default:
{
PADDLE_THROW
(
phi
::
errors
::
Unavailable
(
"DataType is not supported on %s."
,
Type
()));
return
C_DataType
::
UNDEFINED
;
}
}
#undef return_result
}
void
CCLGetUniqueId
(
ccl
::
CCLRootId
*
unique_id
)
override
{
CHECK_PTR
(
pimpl_
->
xccl_get_unique_id_size
);
CHECK_PTR
(
pimpl_
->
xccl_get_unique_id
);
...
...
@@ -771,6 +794,27 @@ class CustomDevice : public DeviceInterface {
reinterpret_cast
<
C_Stream
>
(
stream
.
raw_stream
())));
}
void
BlasAXPBY
(
size_t
dev_id
,
const
stream
::
Stream
&
stream
,
paddle
::
experimental
::
DataType
dtype
,
size_t
numel
,
float
alpha
,
void
*
x
,
float
beta
,
void
*
y
)
override
{
CHECK_PTR
(
pimpl_
->
blas_axpby
);
const
auto
device
=
&
devices_pool
[
dev_id
];
PADDLE_ENFORCE_CUSTOM_DEVICE_SUCCESS
(
pimpl_
->
blas_axpby
(
device
,
reinterpret_cast
<
C_Stream
>
(
stream
.
raw_stream
()),
ToCDatatType
(
dtype
),
numel
,
alpha
,
x
,
beta
,
y
));
}
private:
inline
int
PlaceToIdNoCheck
(
const
Place
&
place
)
{
int
dev_id
=
place
.
GetDeviceId
();
...
...
@@ -877,6 +921,8 @@ bool ValidCustomCustomRuntimeParams(const CustomRuntimeParams* params) {
CHECK_INTERFACE
(
xccl_group_end
,
false
);
CHECK_INTERFACE
(
xccl_send
,
false
);
CHECK_INTERFACE
(
xccl_recv
,
false
);
CHECK_INTERFACE
(
blas_axpby
,
false
);
return
true
;
#undef CHECK_INTERFACE
}
...
...
paddle/phi/backends/custom/custom_device_test.cc
浏览文件 @
0d51fcf1
...
...
@@ -18,6 +18,8 @@
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/imperative/gradient_accumulator.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/backends/custom/fake_cpu_device.h"
#include "paddle/phi/backends/device_manager.h"
...
...
@@ -237,6 +239,51 @@ void TestCustomCCL(const paddle::platform::Place& place) {
stream
);
}
void
TestBlasAPI
(
const
paddle
::
platform
::
Place
&
place
)
{
std
::
cout
<<
"TestBlasAPI on "
<<
place
<<
std
::
endl
;
if
(
paddle
::
platform
::
is_custom_place
(
place
)
==
false
)
{
return
;
}
auto
device
=
phi
::
DeviceManager
::
GetDeviceWithPlace
(
place
);
phi
::
stream
::
Stream
stream
(
place
,
nullptr
);
device
->
BlasAXPBY
<
float
>
(
stream
,
0
,
1.
,
nullptr
,
1.
,
nullptr
);
paddle
::
framework
::
Variable
var1
;
paddle
::
framework
::
Variable
var2
;
std
::
vector
<
float
>
src_data
(
10
,
1.0
);
std
::
vector
<
float
>
dst_data
(
10
,
0.0
);
std
::
vector
<
float
>
result
;
paddle
::
platform
::
CPUPlace
src_place
;
for
(
unsigned
int
i
=
0
;
i
<
10
;
i
++
)
{
result
.
emplace_back
(
src_data
[
i
]
+
dst_data
[
i
]);
}
std
::
vector
<
int64_t
>
dims
=
{
2
,
5
};
auto
*
src
=
var1
.
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
auto
*
dst
=
var2
.
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
src
->
Resize
(
phi
::
make_ddim
(
dims
));
dst
->
Resize
(
phi
::
make_ddim
(
dims
));
auto
*
src_mutable
=
src
->
mutable_data
<
float
>
(
place
);
auto
*
dst_mutable
=
dst
->
mutable_data
<
float
>
(
place
);
paddle
::
memory
::
Copy
(
place
,
src_mutable
,
src_place
,
src_data
.
data
(),
sizeof
(
float
)
*
src_data
.
size
());
paddle
::
memory
::
Copy
(
place
,
dst_mutable
,
src_place
,
dst_data
.
data
(),
sizeof
(
float
)
*
dst_data
.
size
());
paddle
::
imperative
::
TensorAdd
<
paddle
::
framework
::
Variable
>
(
var1
,
&
var2
);
paddle
::
framework
::
LoDTensor
rlt
;
paddle
::
platform
::
CPUPlace
rlt_place
;
paddle
::
framework
::
TensorCopySync
(
*
dst
,
rlt_place
,
&
rlt
);
}
TEST
(
CustomDevice
,
Tensor
)
{
InitDevice
();
auto
dev_types
=
phi
::
DeviceManager
::
GetAllDeviceTypes
();
...
...
@@ -251,6 +298,7 @@ TEST(CustomDevice, Tensor) {
TestTensorShareDataWith
(
place
);
TestTensorUtils
(
place
);
TestCustomCCL
(
place
);
TestBlasAPI
(
place
);
}
}
...
...
paddle/phi/backends/custom/fake_cpu_device.h
浏览文件 @
0d51fcf1
...
...
@@ -210,6 +210,17 @@ C_Status XcclRecv(void *recv_buf,
return
C_SUCCESS
;
}
C_Status
BlasAXPBY
(
const
C_Device
device
,
C_Stream
stream
,
C_DataType
dtype
,
size_t
numel
,
float
alpha
,
void
*
x
,
float
beta
,
void
*
y
)
{
return
C_SUCCESS
;
}
#define DEVICE_TYPE "FakeCPU"
#define SUB_DEVICE_TYPE "V100"
...
...
@@ -278,4 +289,6 @@ void InitFakeCPUDevice(CustomRuntimeParams *params) {
params
->
interface
->
xccl_reduce_scatter
=
XcclReduceScatter
;
params
->
interface
->
xccl_send
=
XcclSend
;
params
->
interface
->
xccl_recv
=
XcclRecv
;
params
->
interface
->
blas_axpby
=
BlasAXPBY
;
}
paddle/phi/backends/device_base.cc
浏览文件 @
0d51fcf1
...
...
@@ -355,6 +355,18 @@ void DeviceInterface::CCLRecv(void* recvbuf,
INTERFACE_UNIMPLEMENT
;
}
// blas
void
DeviceInterface
::
BlasAXPBY
(
size_t
dev_id
,
const
stream
::
Stream
&
stream
,
paddle
::
experimental
::
DataType
dtype
,
size_t
numel
,
float
alpha
,
void
*
x
,
float
beta
,
void
*
y
)
{
INTERFACE_UNIMPLEMENT
;
}
#undef INTERFACE_UNIMPLEMENT
}
// namespace phi
paddle/phi/backends/device_base.h
浏览文件 @
0d51fcf1
...
...
@@ -225,6 +225,16 @@ class DeviceInterface { // Driver / Runtime
const
ccl
::
CCLComm
&
ccl_comm
,
const
stream
::
Stream
&
stream
);
// blas
virtual
void
BlasAXPBY
(
size_t
dev_id
,
const
stream
::
Stream
&
stream
,
paddle
::
experimental
::
DataType
dtype
,
size_t
numel
,
float
alpha
,
void
*
x
,
float
beta
,
void
*
y
);
private:
const
std
::
string
type_
;
const
uint8_t
priority_
;
...
...
paddle/phi/backends/device_ext.h
浏览文件 @
0d51fcf1
...
...
@@ -635,7 +635,19 @@ struct C_DeviceInterface {
// other api //
///////////////
void
*
reserved_other_api
[
8
];
/**
* @brief y = alpha * x + beta * y
*
*/
C_Status
(
*
blas_axpby
)(
const
C_Device
device
,
C_Stream
stream
,
C_DataType
dtype
,
size_t
numel
,
float
alpha
,
void
*
x
,
float
beta
,
void
*
y
);
void
*
reserved_other_api
[
7
];
};
struct
CustomRuntimeVersion
{
...
...
paddle/phi/backends/device_manager.cc
浏览文件 @
0d51fcf1
...
...
@@ -14,6 +14,7 @@
#ifdef PADDLE_WITH_CUSTOM_DEVICE
#include "paddle/phi/backends/device_manager.h"
#include "paddle/phi/common/complex.h"
#if !defined(_WIN32)
#include <dirent.h>
...
...
@@ -135,6 +136,80 @@ void Device::MemorySet(void* ptr, uint8_t value, size_t size) {
impl_
->
MemorySet
(
dev_id_
,
ptr
,
value
,
size
);
}
template
<
typename
T
>
void
Device
::
BlasAXPBY
(
const
stream
::
Stream
&
stream
,
size_t
numel
,
float
alpha
,
const
T
*
x
,
float
beta
,
T
*
y
)
{
impl_
->
BlasAXPBY
(
dev_id_
,
stream
,
paddle
::
experimental
::
CppTypeToDataType
<
T
>::
Type
(),
numel
,
alpha
,
reinterpret_cast
<
void
*>
(
const_cast
<
T
*>
(
x
)),
beta
,
reinterpret_cast
<
void
*>
(
y
));
}
template
void
Device
::
BlasAXPBY
<
paddle
::
float16
>(
const
stream
::
Stream
&
stream
,
size_t
numel
,
float
alpha
,
const
paddle
::
float16
*
x
,
float
beta
,
paddle
::
float16
*
y
);
template
void
Device
::
BlasAXPBY
<
float
>(
const
stream
::
Stream
&
stream
,
size_t
numel
,
float
alpha
,
const
float
*
x
,
float
beta
,
float
*
y
);
template
void
Device
::
BlasAXPBY
<
double
>(
const
stream
::
Stream
&
stream
,
size_t
numel
,
float
alpha
,
const
double
*
x
,
float
beta
,
double
*
y
);
template
void
Device
::
BlasAXPBY
<
int8_t
>(
const
stream
::
Stream
&
stream
,
size_t
numel
,
float
alpha
,
const
int8_t
*
x
,
float
beta
,
int8_t
*
y
);
template
void
Device
::
BlasAXPBY
<
int16_t
>(
const
stream
::
Stream
&
stream
,
size_t
numel
,
float
alpha
,
const
int16_t
*
x
,
float
beta
,
int16_t
*
y
);
template
void
Device
::
BlasAXPBY
<
int32_t
>(
const
stream
::
Stream
&
stream
,
size_t
numel
,
float
alpha
,
const
int32_t
*
x
,
float
beta
,
int32_t
*
y
);
template
void
Device
::
BlasAXPBY
<
int64_t
>(
const
stream
::
Stream
&
stream
,
size_t
numel
,
float
alpha
,
const
int64_t
*
x
,
float
beta
,
int64_t
*
y
);
template
void
Device
::
BlasAXPBY
<
phi
::
dtype
::
complex
<
float
>
>
(
const
stream
::
Stream
&
stream
,
size_t
numel
,
float
alpha
,
const
phi
::
dtype
::
complex
<
float
>*
x
,
float
beta
,
phi
::
dtype
::
complex
<
float
>*
y
);
template
void
Device
::
BlasAXPBY
<
phi
::
dtype
::
complex
<
double
>
>
(
const
stream
::
Stream
&
stream
,
size_t
numel
,
float
alpha
,
const
phi
::
dtype
::
complex
<
double
>*
x
,
float
beta
,
phi
::
dtype
::
complex
<
double
>*
y
);
std
::
string
Device
::
Type
()
{
return
impl_
->
Type
();
}
static
phi
::
RWLock
_global_device_manager_rw_lock
;
...
...
paddle/phi/backends/device_manager.h
浏览文件 @
0d51fcf1
...
...
@@ -17,14 +17,16 @@
#include <unordered_map>
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/utils/rw_lock.h"
#include "paddle/phi/backends/c_comm_lib.h"
#include "paddle/phi/backends/device_base.h"
#include "paddle/phi/backends/device_ext.h"
#include "paddle/phi/backends/dynload/port.h"
#include "paddle/phi/backends/event.h"
#include "paddle/phi/backends/stream.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/utils/rw_lock.h"
namespace
phi
{
class
Device
final
{
...
...
@@ -106,6 +108,16 @@ class Device final {
void
MemorySet
(
void
*
ptr
,
uint8_t
value
,
size_t
size
);
// Blas
// ! y = alpha * x + beta * y
template
<
typename
T
>
void
BlasAXPBY
(
const
stream
::
Stream
&
stream
,
size_t
numel
,
float
alpha
,
const
T
*
x
,
float
beta
,
T
*
y
);
std
::
string
Type
();
private:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录