Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
033ef5e9
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
033ef5e9
编写于
7月 13, 2022
作者:
R
ronnywang
提交者:
GitHub
7月 13, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[CustomKernel] capi add eager mode support (#44164)
* [CustomKernel] add capi eager mode support * add ut * add capi test
上级
961d6cce
变更
12
展开全部
隐藏空白更改
内联
并排
Showing
12 changed file
with
675 addition
and
415 deletion
+675
-415
paddle/phi/backends/custom/CMakeLists.txt
paddle/phi/backends/custom/CMakeLists.txt
+4
-0
paddle/phi/backends/custom/capi_test.cc
paddle/phi/backends/custom/capi_test.cc
+78
-0
paddle/phi/capi/include/c_kernel_context.h
paddle/phi/capi/include/c_kernel_context.h
+20
-0
paddle/phi/capi/include/c_tensor.h
paddle/phi/capi/include/c_tensor.h
+4
-0
paddle/phi/capi/include/kernel_registry.h
paddle/phi/capi/include/kernel_registry.h
+123
-1
paddle/phi/capi/include/kernel_utils.h
paddle/phi/capi/include/kernel_utils.h
+213
-116
paddle/phi/capi/lib/c_kernel_context.cc
paddle/phi/capi/lib/c_kernel_context.cc
+85
-0
paddle/phi/capi/lib/c_tensor.cc
paddle/phi/capi/lib/c_tensor.cc
+15
-0
python/paddle/fluid/tests/custom_runtime/CMakeLists.txt
python/paddle/fluid/tests/custom_runtime/CMakeLists.txt
+2
-1
python/paddle/fluid/tests/custom_runtime/custom_cpu_runtime.cc
...n/paddle/fluid/tests/custom_runtime/custom_cpu_runtime.cc
+0
-215
python/paddle/fluid/tests/custom_runtime/custom_cpu_setup.py
python/paddle/fluid/tests/custom_runtime/custom_cpu_setup.py
+0
-82
python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py
...ddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py
+131
-0
未找到文件。
paddle/phi/backends/custom/CMakeLists.txt
浏览文件 @
033ef5e9
...
...
@@ -11,4 +11,8 @@ if(WITH_CUSTOM_DEVICE)
custom_device_test
SRCS custom_device_test.cc
DEPS device_manager device_context
)
cc_test
(
capi_test
SRCS capi_test.cc
DEPS phi_capi
)
endif
()
paddle/phi/backends/custom/capi_test.cc
0 → 100644
浏览文件 @
033ef5e9
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <cstring>
#include <string>
#include "paddle/phi/capi/all.h"
#ifndef UNUSED
#define UNUSED __attribute__((unused))
#endif
#include "paddle/phi/capi/capi.h"
TEST
(
CustomKernel
,
CAPI
)
{
std
::
string
str
=
"capi"
;
EXPECT_EQ
(
str
.
data
(),
PD_StringAttr
(
&
str
));
std
::
vector
<
int32_t
>
int32_vec
({
1
,
2
,
3
});
auto
int32_list
=
PD_ListInt32Attr
(
&
int32_vec
);
EXPECT_EQ
(
int32_list
.
data
,
int32_vec
.
data
());
EXPECT_EQ
(
int32_list
.
size
,
int32_vec
.
size
());
std
::
vector
<
int64_t
>
int64_vec
({
1
,
2
,
3
});
auto
int64_list
=
PD_ListInt64Attr
(
&
int64_vec
);
EXPECT_EQ
(
int64_list
.
data
,
int64_vec
.
data
());
EXPECT_EQ
(
int64_list
.
size
,
int64_vec
.
size
());
std
::
vector
<
float
>
float_vec
({
1
,
2
,
3
});
auto
float_list
=
PD_ListFloatAttr
(
&
float_vec
);
EXPECT_EQ
(
float_list
.
data
,
float_vec
.
data
());
EXPECT_EQ
(
float_list
.
size
,
float_vec
.
size
());
std
::
vector
<
double
>
double_vec
({
1
,
2
,
3
});
auto
double_list
=
PD_ListDoubleAttr
(
&
double_vec
);
EXPECT_EQ
(
double_list
.
data
,
double_vec
.
data
());
EXPECT_EQ
(
double_list
.
size
,
double_vec
.
size
());
std
::
vector
<
std
::
string
>
string_vec
{
"capi"
,
"api"
};
auto
string_list
=
PD_ListStringAttr
(
&
string_vec
);
auto
string_data
=
reinterpret_cast
<
void
**>
(
string_list
.
data
);
for
(
size_t
i
=
0
;
i
<
string_vec
.
size
();
++
i
)
{
EXPECT_EQ
(
string_data
[
i
],
string_vec
[
i
].
data
());
}
std
::
vector
<
bool
>
bool_vec
{
true
,
false
,
true
};
auto
bool_list
=
PD_ListBoolAttr
(
&
bool_vec
);
auto
bool_data
=
reinterpret_cast
<
uint8_t
*>
(
bool_list
.
data
);
for
(
size_t
i
=
0
;
i
<
bool_vec
.
size
();
++
i
)
{
EXPECT_EQ
(
bool_data
[
i
],
static_cast
<
uint8_t
>
(
bool_vec
[
i
]));
}
std
::
vector
<
float
*>
ptr_vec
;
for
(
size_t
i
=
0
;
i
<
float_vec
.
size
();
++
i
)
{
ptr_vec
.
push_back
(
&
float_vec
[
i
]);
}
auto
ptr_list
=
PD_TensorVectorToList
(
reinterpret_cast
<
PD_Tensor
*>
(
&
ptr_vec
));
EXPECT_EQ
(
ptr_list
.
data
,
ptr_vec
.
data
());
EXPECT_EQ
(
ptr_list
.
size
,
ptr_vec
.
size
());
}
int
main
(
int
argc
,
char
**
argv
)
{
::
testing
::
InitGoogleTest
(
&
argc
,
argv
);
return
RUN_ALL_TESTS
();
}
paddle/phi/capi/include/c_kernel_context.h
浏览文件 @
033ef5e9
...
...
@@ -87,6 +87,26 @@ PD_List PD_KernelContextListScalarAttrAt(PD_KernelContext *ctx, size_t index);
PD_Place
*
PD_KernelContextPlaceAttrAt
(
PD_KernelContext
*
ctx
,
size_t
index
);
const
char
*
PD_StringAttr
(
void
*
attr
);
PD_DataType
PD_DatatTypeAttr
(
void
*
attr
);
PD_DataLayout
PD_DatatLayoutAttr
(
void
*
attr
);
PD_List
PD_ListInt32Attr
(
void
*
attr
);
PD_List
PD_ListInt64Attr
(
void
*
attr
);
PD_List
PD_ListFloatAttr
(
void
*
attr
);
PD_List
PD_ListDoubleAttr
(
void
*
attr
);
PD_List
PD_ListScalarAttr
(
void
*
attr
);
PD_List
PD_ListStringAttr
(
void
*
attr
);
PD_List
PD_ListBoolAttr
(
void
*
attr
);
#ifdef __cplusplus
}
// extern "C"
#endif
...
...
paddle/phi/capi/include/c_tensor.h
浏览文件 @
033ef5e9
...
...
@@ -82,6 +82,10 @@ void PD_TensorShareLoDWith(PD_Tensor *dst,
const
PD_Tensor
*
src
,
PD_Status
*
status
);
PD_Tensor
*
PD_OptionalTensorGetPointer
(
PD_Tensor
*
tensor
);
PD_List
PD_TensorVectorToList
(
PD_Tensor
*
tensor
);
#ifdef __cplusplus
}
// extern "C"
#endif
...
...
paddle/phi/capi/include/kernel_registry.h
浏览文件 @
033ef5e9
...
...
@@ -19,7 +19,129 @@
namespace
phi
{
namespace
capi
{
// eager mode
inline
std
::
vector
<
phi
::
capi
::
DenseTensor
>
PD_TensorVector
(
PD_Tensor
*
tensor
)
{
std
::
vector
<
phi
::
capi
::
DenseTensor
>
ret
;
auto
list
=
PD_TensorVectorToList
(
tensor
);
auto
data
=
reinterpret_cast
<
PD_Tensor
**>
(
list
.
data
);
for
(
size_t
i
=
0
;
i
<
list
.
size
;
++
i
)
{
ret
.
emplace_back
(
data
[
i
]);
}
return
ret
;
}
inline
paddle
::
optional
<
phi
::
capi
::
DenseTensor
>
PD_OptionalTensor
(
PD_Tensor
*
tensor
)
{
auto
ptr
=
PD_OptionalTensorGetPointer
(
tensor
);
return
ptr
?
paddle
::
optional
<
phi
::
capi
::
DenseTensor
>
(
phi
::
capi
::
DenseTensor
(
ptr
))
:
paddle
::
optional
<
phi
::
capi
::
DenseTensor
>
(
paddle
::
none
);
}
template
<
typename
T
>
inline
T
PD_Attr
(
void
*
attr
)
{
return
*
reinterpret_cast
<
T
*>
(
attr
);
}
template
<
>
inline
std
::
string
PD_Attr
<
std
::
string
>
(
void
*
attr
)
{
return
PD_StringAttr
(
attr
);
}
template
<
>
inline
PD_DataType
PD_Attr
<
PD_DataType
>
(
void
*
attr
)
{
return
PD_DatatTypeAttr
(
attr
);
}
template
<
>
inline
PD_DataLayout
PD_Attr
<
PD_DataLayout
>
(
void
*
attr
)
{
return
PD_DatatLayoutAttr
(
attr
);
}
template
<
>
inline
std
::
vector
<
int32_t
>
PD_Attr
<
std
::
vector
<
int32_t
>>
(
void
*
attr
)
{
auto
list
=
PD_ListInt32Attr
(
attr
);
auto
data
=
reinterpret_cast
<
int32_t
*>
(
list
.
data
);
std
::
vector
<
int32_t
>
cc_list
(
data
,
data
+
list
.
size
);
return
cc_list
;
}
template
<
>
inline
std
::
vector
<
int64_t
>
PD_Attr
<
std
::
vector
<
int64_t
>>
(
void
*
attr
)
{
auto
list
=
PD_ListInt64Attr
(
attr
);
auto
data
=
reinterpret_cast
<
int64_t
*>
(
list
.
data
);
std
::
vector
<
int64_t
>
cc_list
(
data
,
data
+
list
.
size
);
return
cc_list
;
}
template
<
>
inline
std
::
vector
<
float
>
PD_Attr
<
std
::
vector
<
float
>>
(
void
*
attr
)
{
auto
list
=
PD_ListFloatAttr
(
attr
);
auto
data
=
reinterpret_cast
<
float
*>
(
list
.
data
);
std
::
vector
<
float
>
cc_list
(
data
,
data
+
list
.
size
);
return
cc_list
;
}
template
<
>
inline
std
::
vector
<
double
>
PD_Attr
<
std
::
vector
<
double
>>
(
void
*
attr
)
{
auto
list
=
PD_ListDoubleAttr
(
attr
);
auto
data
=
reinterpret_cast
<
double
*>
(
list
.
data
);
std
::
vector
<
double
>
cc_list
(
data
,
data
+
list
.
size
);
return
cc_list
;
}
template
<
>
inline
phi
::
capi
::
Scalar
PD_Attr
<
phi
::
capi
::
Scalar
>
(
void
*
attr
)
{
return
phi
::
capi
::
Scalar
(
reinterpret_cast
<
PD_Scalar
*>
(
attr
));
}
template
<
>
inline
phi
::
capi
::
IntArray
PD_Attr
<
phi
::
capi
::
IntArray
>
(
void
*
attr
)
{
return
phi
::
capi
::
IntArray
(
reinterpret_cast
<
PD_IntArray
*>
(
attr
));
}
template
<
>
inline
phi
::
capi
::
Place
PD_Attr
<
phi
::
capi
::
Place
>
(
void
*
attr
)
{
return
phi
::
capi
::
Place
(
reinterpret_cast
<
PD_Place
*>
(
attr
));
}
template
<
>
inline
std
::
vector
<
phi
::
capi
::
Scalar
>
PD_Attr
<
std
::
vector
<
phi
::
capi
::
Scalar
>>
(
void
*
attr
)
{
auto
c_list
=
PD_ListScalarAttr
(
attr
);
auto
data
=
reinterpret_cast
<
PD_Scalar
**>
(
c_list
.
data
);
std
::
vector
<
phi
::
capi
::
Scalar
>
list
;
for
(
size_t
i
=
0
;
i
<
c_list
.
size
;
++
i
)
{
list
.
emplace_back
(
data
[
i
]);
}
PD_DeletePointerList
(
c_list
);
return
list
;
}
template
<
>
inline
std
::
vector
<
std
::
string
>
PD_Attr
<
std
::
vector
<
std
::
string
>>
(
void
*
attr
)
{
auto
c_list
=
PD_ListStringAttr
(
attr
);
auto
data
=
reinterpret_cast
<
char
**>
(
c_list
.
data
);
std
::
vector
<
std
::
string
>
list
;
for
(
size_t
i
=
0
;
i
<
c_list
.
size
;
++
i
)
{
list
.
emplace_back
(
data
[
i
]);
}
PD_DeletePointerList
(
c_list
);
return
list
;
}
template
<
>
inline
std
::
vector
<
bool
>
PD_Attr
<
std
::
vector
<
bool
>>
(
void
*
attr
)
{
auto
c_list
=
PD_ListBoolAttr
(
attr
);
std
::
vector
<
bool
>
list
;
auto
data
=
reinterpret_cast
<
uint8_t
*>
(
c_list
.
data
);
for
(
size_t
i
=
0
;
i
<
c_list
.
size
;
++
i
)
{
list
[
i
]
=
static_cast
<
bool
>
(
data
[
i
]);
}
PD_DeleteUInt8List
(
c_list
);
return
list
;
}
//
inline
phi
::
capi
::
DeviceContext
PD_GetDeviceContext
(
PD_KernelContext
*
ctx
)
{
return
phi
::
capi
::
DeviceContext
(
PD_KernelContextGetDeviceContext
(
ctx
));
}
...
...
@@ -189,7 +311,7 @@ inline std::vector<phi::capi::Scalar> PD_AttrAt<std::vector<phi::capi::Scalar>>(
template
<
>
inline
std
::
vector
<
std
::
string
>
PD_AttrAt
<
std
::
vector
<
std
::
string
>>
(
PD_KernelContext
*
ctx
,
size_t
index
)
{
auto
c_list
=
PD_KernelContextListS
calar
AttrAt
(
ctx
,
index
);
auto
c_list
=
PD_KernelContextListS
tring
AttrAt
(
ctx
,
index
);
auto
data
=
reinterpret_cast
<
char
**>
(
c_list
.
data
);
std
::
vector
<
std
::
string
>
list
;
for
(
size_t
i
=
0
;
i
<
c_list
.
size
;
++
i
)
{
...
...
paddle/phi/capi/include/kernel_utils.h
浏览文件 @
033ef5e9
此差异已折叠。
点击以展开。
paddle/phi/capi/lib/c_kernel_context.cc
浏览文件 @
033ef5e9
...
...
@@ -220,4 +220,89 @@ PD_DataLayout PD_KernelContextDataLayoutAttrAt(PD_KernelContext* ctx,
kernel_context
->
AttrAt
<
phi
::
DataLayout
>
(
index
));
}
// eager
const
char
*
PD_StringAttr
(
void
*
attr
)
{
auto
*
str
=
reinterpret_cast
<
std
::
string
*>
(
attr
);
return
str
->
c_str
();
}
PD_DataType
PD_DatatTypeAttr
(
void
*
attr
)
{
auto
*
dtype
=
reinterpret_cast
<
phi
::
DataType
*>
(
attr
);
return
phi
::
capi
::
ToPDDataType
(
*
dtype
);
}
PD_DataLayout
PD_DatatLayoutAttr
(
void
*
attr
)
{
auto
*
layout
=
reinterpret_cast
<
phi
::
DataLayout
*>
(
attr
);
return
phi
::
capi
::
ToPDDataLayout
(
*
layout
);
}
PD_List
PD_ListInt32Attr
(
void
*
attr
)
{
PD_List
list
;
const
auto
&
cc_list
=
*
reinterpret_cast
<
std
::
vector
<
int32_t
>*>
(
attr
);
list
.
size
=
cc_list
.
size
();
list
.
data
=
const_cast
<
int32_t
*>
(
cc_list
.
data
());
return
list
;
}
PD_List
PD_ListInt64Attr
(
void
*
attr
)
{
PD_List
list
;
const
auto
&
cc_list
=
*
reinterpret_cast
<
std
::
vector
<
int64_t
>*>
(
attr
);
list
.
size
=
cc_list
.
size
();
list
.
data
=
const_cast
<
int64_t
*>
(
cc_list
.
data
());
return
list
;
}
PD_List
PD_ListFloatAttr
(
void
*
attr
)
{
PD_List
list
;
const
auto
&
cc_list
=
*
reinterpret_cast
<
std
::
vector
<
float
>*>
(
attr
);
list
.
size
=
cc_list
.
size
();
list
.
data
=
const_cast
<
float
*>
(
cc_list
.
data
());
return
list
;
}
PD_List
PD_ListDoubleAttr
(
void
*
attr
)
{
PD_List
list
;
const
auto
&
cc_list
=
*
reinterpret_cast
<
std
::
vector
<
double
>*>
(
attr
);
list
.
size
=
cc_list
.
size
();
list
.
data
=
const_cast
<
double
*>
(
cc_list
.
data
());
return
list
;
}
PD_List
PD_ListScalarAttr
(
void
*
attr
)
{
PD_List
list
;
const
auto
&
cc_list
=
*
reinterpret_cast
<
std
::
vector
<
phi
::
Scalar
>*>
(
attr
);
list
.
size
=
cc_list
.
size
();
auto
data
=
new
PD_Scalar
*
[
list
.
size
];
for
(
size_t
i
=
0
;
i
<
list
.
size
;
++
i
)
{
data
[
i
]
=
const_cast
<
PD_Scalar
*>
(
reinterpret_cast
<
const
PD_Scalar
*>
(
&
cc_list
[
i
]));
}
list
.
data
=
data
;
return
list
;
}
PD_List
PD_ListStringAttr
(
void
*
attr
)
{
PD_List
list
;
const
auto
&
cc_list
=
*
reinterpret_cast
<
std
::
vector
<
std
::
string
>*>
(
attr
);
list
.
size
=
cc_list
.
size
();
auto
data
=
new
char
*
[
list
.
size
];
for
(
size_t
i
=
0
;
i
<
list
.
size
;
++
i
)
{
data
[
i
]
=
const_cast
<
char
*>
(
cc_list
[
i
].
data
());
}
list
.
data
=
reinterpret_cast
<
void
*>
(
data
);
return
list
;
}
PD_List
PD_ListBoolAttr
(
void
*
attr
)
{
PD_List
list
;
const
auto
&
cc_list
=
*
reinterpret_cast
<
std
::
vector
<
bool
>*>
(
attr
);
list
.
size
=
cc_list
.
size
();
auto
data
=
reinterpret_cast
<
uint8_t
*>
(
new
uint8_t
[
cc_list
.
size
()]);
for
(
size_t
i
=
0
;
i
<
cc_list
.
size
();
++
i
)
{
data
[
i
]
=
static_cast
<
uint8_t
>
(
cc_list
[
i
]);
}
list
.
data
=
data
;
return
list
;
}
PD_REGISTER_CAPI
(
kernel_context
);
paddle/phi/capi/lib/c_tensor.cc
浏览文件 @
033ef5e9
...
...
@@ -299,4 +299,19 @@ void PD_TensorShareLoDWith(PD_Tensor* dst,
meta_dst
.
share_lod
(
meta_src
);
}
PD_Tensor
*
PD_OptionalTensorGetPointer
(
PD_Tensor
*
tensor
)
{
auto
cc_tensor
=
reinterpret_cast
<
paddle
::
optional
<
phi
::
DenseTensor
>*>
(
tensor
);
return
reinterpret_cast
<
PD_Tensor
*>
(
cc_tensor
->
get_ptr
());
}
PD_List
PD_TensorVectorToList
(
PD_Tensor
*
tensor
)
{
auto
cc_tensor
=
reinterpret_cast
<
std
::
vector
<
const
phi
::
DenseTensor
*>*>
(
tensor
);
PD_List
list
;
list
.
size
=
cc_tensor
->
size
();
list
.
data
=
cc_tensor
->
data
();
return
list
;
}
PD_REGISTER_CAPI
(
tensor
);
python/paddle/fluid/tests/custom_runtime/CMakeLists.txt
浏览文件 @
033ef5e9
if
(
WITH_CUSTOM_DEVICE
)
py_test
(
test_custom_device_data_loader SRCS test_custom_device_data_loader.py
)
py_test
(
test_custom_cpu_plugin SRCS test_custom_cpu_plugin.py
)
set_tests_properties
(
test_custom_cpu_plugin PROPERTIES TIMEOUT 120
)
endif
()
python/paddle/fluid/tests/custom_runtime/custom_cpu_runtime.cc
已删除
100644 → 0
浏览文件 @
961d6cce
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <iostream>
#include "paddle/phi/backends/device_ext.h"
#define MEMORY_FRACTION 0.5f
C_Status
Init
()
{
return
C_SUCCESS
;
}
C_Status
InitDevice
(
const
C_Device
device
)
{
return
C_SUCCESS
;
}
C_Status
SetDevice
(
const
C_Device
device
)
{
return
C_SUCCESS
;
}
C_Status
GetDevice
(
const
C_Device
device
)
{
device
->
id
=
0
;
return
C_SUCCESS
;
}
C_Status
DestroyDevice
(
const
C_Device
device
)
{
return
C_SUCCESS
;
}
C_Status
Finalize
()
{
return
C_SUCCESS
;
}
C_Status
GetDevicesCount
(
size_t
*
count
)
{
*
count
=
1
;
return
C_SUCCESS
;
}
C_Status
GetDevicesList
(
size_t
*
devices
)
{
devices
[
0
]
=
0
;
return
C_SUCCESS
;
}
C_Status
MemCpy
(
const
C_Device
device
,
void
*
dst
,
const
void
*
src
,
size_t
size
)
{
memcpy
(
dst
,
src
,
size
);
return
C_SUCCESS
;
}
C_Status
AsyncMemCpy
(
const
C_Device
device
,
C_Stream
stream
,
void
*
dst
,
const
void
*
src
,
size_t
size
)
{
memcpy
(
dst
,
src
,
size
);
return
C_SUCCESS
;
}
C_Status
MemCpyP2P
(
const
C_Device
dst_device
,
const
C_Device
src_device
,
void
*
dst
,
const
void
*
src
,
size_t
size
)
{
memcpy
(
dst
,
src
,
size
);
return
C_SUCCESS
;
}
C_Status
AsyncMemCpyP2P
(
const
C_Device
dst_device
,
const
C_Device
src_device
,
C_Stream
stream
,
void
*
dst
,
const
void
*
src
,
size_t
size
)
{
memcpy
(
dst
,
src
,
size
);
return
C_SUCCESS
;
}
C_Status
Allocate
(
const
C_Device
device
,
void
**
ptr
,
size_t
size
)
{
auto
data
=
malloc
(
size
);
if
(
data
)
{
*
ptr
=
data
;
return
C_SUCCESS
;
}
else
{
*
ptr
=
nullptr
;
}
return
C_FAILED
;
}
C_Status
Deallocate
(
const
C_Device
device
,
void
*
ptr
,
size_t
size
)
{
free
(
ptr
);
return
C_SUCCESS
;
}
C_Status
CreateStream
(
const
C_Device
device
,
C_Stream
*
stream
)
{
stream
=
nullptr
;
return
C_SUCCESS
;
}
C_Status
DestroyStream
(
const
C_Device
device
,
C_Stream
stream
)
{
return
C_SUCCESS
;
}
C_Status
CreateEvent
(
const
C_Device
device
,
C_Event
*
event
)
{
return
C_SUCCESS
;
}
C_Status
RecordEvent
(
const
C_Device
device
,
C_Stream
stream
,
C_Event
event
)
{
return
C_SUCCESS
;
}
C_Status
DestroyEvent
(
const
C_Device
device
,
C_Event
event
)
{
return
C_SUCCESS
;
}
C_Status
SyncDevice
(
const
C_Device
device
)
{
return
C_SUCCESS
;
}
C_Status
SyncStream
(
const
C_Device
device
,
C_Stream
stream
)
{
return
C_SUCCESS
;
}
C_Status
SyncEvent
(
const
C_Device
device
,
C_Event
event
)
{
return
C_SUCCESS
;
}
C_Status
StreamWaitEvent
(
const
C_Device
device
,
C_Stream
stream
,
C_Event
event
)
{
return
C_SUCCESS
;
}
C_Status
VisibleDevices
(
size_t
*
devices
)
{
return
C_SUCCESS
;
}
C_Status
DeviceMemStats
(
const
C_Device
device
,
size_t
*
total_memory
,
size_t
*
free_memory
)
{
float
memusage
;
FILE
*
fp
;
char
buffer
[
1024
];
size_t
byte_read
;
char
*
pos
;
fp
=
fopen
(
"/proc/meminfo"
,
"r"
);
byte_read
=
fread
(
buffer
,
1
,
sizeof
(
buffer
),
fp
);
fclose
(
fp
);
buffer
[
byte_read
]
=
'\0'
;
pos
=
strstr
(
buffer
,
"MemTotal:"
);
sscanf
(
pos
,
"MemTotal: %lu kB"
,
total_memory
);
pos
=
strstr
(
pos
,
"MemFree:"
);
sscanf
(
pos
,
"MemFree: %lu kB"
,
free_memory
);
*
total_memory
=
*
total_memory
*
1024
;
*
free_memory
=
*
free_memory
*
1024
;
*
free_memory
=
*
free_memory
*
MEMORY_FRACTION
;
return
C_SUCCESS
;
}
C_Status
DeviceMinChunkSize
(
const
C_Device
device
,
size_t
*
size
)
{
*
size
=
512
;
return
C_SUCCESS
;
}
void
InitPlugin
(
CustomRuntimeParams
*
params
)
{
PADDLE_CUSTOM_RUNTIME_CHECK_VERSION
(
params
);
params
->
device_type
=
"custom_cpu"
;
params
->
sub_device_type
=
"v0.1"
;
memset
(
reinterpret_cast
<
void
*>
(
params
->
interface
),
0
,
sizeof
(
C_DeviceInterface
));
params
->
interface
->
initialize
=
Init
;
params
->
interface
->
finalize
=
Finalize
;
params
->
interface
->
init_device
=
InitDevice
;
params
->
interface
->
set_device
=
SetDevice
;
params
->
interface
->
get_device
=
GetDevice
;
params
->
interface
->
deinit_device
=
DestroyDevice
;
params
->
interface
->
create_stream
=
CreateStream
;
params
->
interface
->
destroy_stream
=
DestroyStream
;
params
->
interface
->
create_event
=
CreateEvent
;
params
->
interface
->
destroy_event
=
DestroyEvent
;
params
->
interface
->
record_event
=
RecordEvent
;
params
->
interface
->
synchronize_device
=
SyncDevice
;
params
->
interface
->
synchronize_stream
=
SyncStream
;
params
->
interface
->
synchronize_event
=
SyncEvent
;
params
->
interface
->
stream_wait_event
=
StreamWaitEvent
;
params
->
interface
->
memory_copy_h2d
=
MemCpy
;
params
->
interface
->
memory_copy_d2d
=
MemCpy
;
params
->
interface
->
memory_copy_d2h
=
MemCpy
;
params
->
interface
->
memory_copy_p2p
=
MemCpyP2P
;
params
->
interface
->
async_memory_copy_h2d
=
AsyncMemCpy
;
params
->
interface
->
async_memory_copy_d2d
=
AsyncMemCpy
;
params
->
interface
->
async_memory_copy_d2h
=
AsyncMemCpy
;
params
->
interface
->
async_memory_copy_p2p
=
AsyncMemCpyP2P
;
params
->
interface
->
device_memory_allocate
=
Allocate
;
params
->
interface
->
host_memory_allocate
=
Allocate
;
params
->
interface
->
unified_memory_allocate
=
Allocate
;
params
->
interface
->
device_memory_deallocate
=
Deallocate
;
params
->
interface
->
host_memory_deallocate
=
Deallocate
;
params
->
interface
->
unified_memory_deallocate
=
Deallocate
;
params
->
interface
->
get_device_count
=
GetDevicesCount
;
params
->
interface
->
get_device_list
=
GetDevicesList
;
params
->
interface
->
device_memory_stats
=
DeviceMemStats
;
params
->
interface
->
device_min_chunk_size
=
DeviceMinChunkSize
;
}
python/paddle/fluid/tests/custom_runtime/custom_cpu_setup.py
已删除
100644 → 0
浏览文件 @
961d6cce
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
site
from
paddle.fluid
import
core
from
distutils.sysconfig
import
get_python_lib
from
distutils.core
import
setup
,
Extension
from
setuptools.command.build_ext
import
build_ext
# refer: https://note.qidong.name/2018/03/setup-warning-strict-prototypes
# Avoid a gcc warning below:
# cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid
# for C/ObjC but not for C++
class
BuildExt
(
build_ext
):
def
build_extensions
(
self
):
if
'-Wstrict-prototypes'
in
self
.
compiler
.
compiler_so
:
self
.
compiler
.
compiler_so
.
remove
(
'-Wstrict-prototypes'
)
super
(
BuildExt
,
self
).
build_extensions
()
# cc flags
paddle_extra_compile_args
=
[
'-std=c++14'
,
'-shared'
,
'-fPIC'
,
'-Wno-parentheses'
,
'-DPADDLE_WITH_CUSTOM_KERNEL'
,
'-DPADDLE_WITH_CUSTOM_DEVICE'
,
]
if
core
.
is_compiled_with_npu
():
paddle_extra_compile_args
+=
[
'-D_GLIBCXX_USE_CXX11_ABI=0'
]
# include path
site_packages_path
=
site
.
getsitepackages
()
include_dirs
=
list
(
map
(
lambda
path
:
os
.
path
.
join
(
path
,
'paddle'
,
'include'
),
site_packages_path
))
# include path third_party
compile_third_party_path
=
os
.
path
.
join
(
os
.
environ
[
'PADDLE_ROOT'
],
'build/third_party'
)
include_dirs
+=
[
os
.
path
.
join
(
compile_third_party_path
,
'boost/src/extern_boost'
),
# boost
os
.
path
.
join
(
compile_third_party_path
,
'install/gflags/include'
),
# gflags
os
.
path
.
join
(
compile_third_party_path
,
'install/glog/include'
),
# glog
]
# libs path
library_dirs
=
list
(
map
(
lambda
path
:
os
.
path
.
join
(
path
,
'paddle'
,
'fluid'
),
site_packages_path
))
# libs
libs
=
[
':core_avx.so'
]
if
not
core
.
has_avx_core
and
core
.
has_noavx_core
:
libs
=
[
':core_noavx.so'
]
custom_cpu_plugin_so
=
Extension
(
'custom_cpu_runtime'
,
sources
=
[
'custom_cpu_runtime.cc'
],
include_dirs
=
include_dirs
,
library_dirs
=
library_dirs
,
libraries
=
libs
,
extra_compile_args
=
paddle_extra_compile_args
)
setup
(
name
=
'custom_kernel_dot'
,
version
=
'1.0'
,
description
=
'custom kernel fot compiling'
,
cmdclass
=
{
'build_ext'
:
BuildExt
},
ext_modules
=
[
custom_cpu_plugin_so
])
python/paddle/fluid/tests/custom_runtime/test_custom_
device_data_loader
.py
→
python/paddle/fluid/tests/custom_runtime/test_custom_
cpu_plugin
.py
浏览文件 @
033ef5e9
...
...
@@ -19,24 +19,29 @@ import unittest
import
numpy
as
np
class
TestCustom
DeviceDataLoader
(
unittest
.
TestCase
):
class
TestCustom
CPUPlugin
(
unittest
.
TestCase
):
def
setUp
(
self
):
# compile so and set to current path
cur_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
# --inplace to place output so file to current dir
cmd
=
'cd {} && {} custom_cpu_setup.py build_ext --inplace'
.
format
(
cur_dir
,
sys
.
executable
)
cmd
=
'rm -rf PaddleCustomDevice && git clone https://github.com/PaddlePaddle/PaddleCustomDevice.git && cd PaddleCustomDevice/backends/custom_cpu && mkdir build && cd build && cmake .. && make -j8'
os
.
system
(
cmd
)
# set environment for loading and registering compiled custom kernels
# only valid in current process
os
.
environ
[
'CUSTOM_DEVICE_ROOT'
]
=
cur_dir
os
.
environ
[
'CUSTOM_DEVICE_ROOT'
]
=
os
.
path
.
join
(
cur_dir
,
'PaddleCustomDevice/backends/custom_cpu/build'
)
def
test_custom_device_dataloader
(
self
):
import
paddle
with
paddle
.
fluid
.
framework
.
_test_eager_guard
():
self
.
_test_custom_device_dataloader
()
self
.
_test_custom_device_dataloader
()
def
_test_custom_device_dataloader
(
self
):
import
paddle
paddle
.
set_device
(
'custom_cpu'
)
dataset
=
paddle
.
vision
.
datasets
.
MNIST
(
mode
=
'test'
,
...
...
@@ -55,6 +60,66 @@ class TestCustomDeviceDataLoader(unittest.TestCase):
self
.
assertTrue
(
label
.
place
.
is_custom_place
())
break
def
test_custom_device_mnist
(
self
):
import
paddle
with
paddle
.
fluid
.
framework
.
_test_eager_guard
():
self
.
_test_custom_device_mnist
()
self
.
_test_custom_device_mnist
()
def
_test_custom_device_mnist
(
self
):
import
paddle
class
MNIST
(
paddle
.
nn
.
Layer
):
def
__init__
(
self
):
super
(
MNIST
,
self
).
__init__
()
self
.
shape
=
1
*
28
*
28
self
.
size
=
10
self
.
output_weight
=
self
.
create_parameter
(
[
self
.
shape
,
self
.
size
])
self
.
accuracy
=
paddle
.
metric
.
Accuracy
()
def
forward
(
self
,
inputs
,
label
=
None
):
x
=
paddle
.
reshape
(
inputs
,
shape
=
[
-
1
,
self
.
shape
])
x
=
paddle
.
matmul
(
x
,
self
.
output_weight
)
x
=
paddle
.
nn
.
functional
.
softmax
(
x
)
if
label
is
not
None
:
self
.
accuracy
.
reset
()
correct
=
self
.
accuracy
.
compute
(
x
,
label
)
self
.
accuracy
.
update
(
correct
)
acc
=
self
.
accuracy
.
accumulate
()
return
x
,
acc
else
:
return
x
paddle
.
set_device
(
'custom_cpu'
)
dataset
=
paddle
.
vision
.
datasets
.
MNIST
(
mode
=
'train'
,
transform
=
paddle
.
vision
.
transforms
.
Compose
(
[
paddle
.
vision
.
transforms
.
ToTensor
()]))
loader
=
paddle
.
io
.
DataLoader
(
dataset
,
batch_size
=
64
,
num_workers
=
1
,
shuffle
=
True
)
mnist
=
MNIST
()
sgd
=
paddle
.
optimizer
.
SGD
(
learning_rate
=
0.01
,
parameters
=
mnist
.
parameters
())
data
=
next
(
loader
())
img
=
data
[
0
]
label
=
data
[
1
]
label_int32
=
paddle
.
cast
(
label
,
'int32'
)
pred
,
acc
=
mnist
(
img
,
label_int32
)
avg_loss
=
paddle
.
nn
.
functional
.
cross_entropy
(
pred
,
label_int32
)
avg_loss
.
backward
()
sgd
.
step
()
sgd
.
clear_grad
()
self
.
assertTrue
(
pred
.
place
.
is_custom_place
())
def
tearDown
(
self
):
del
os
.
environ
[
'CUSTOM_DEVICE_ROOT'
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录