Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
a0dc361c
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a0dc361c
编写于
7月 05, 2022
作者:
R
ronnywang
提交者:
GitHub
7月 05, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Dataloader add custom device support (#44013)
* Dataloader add custom device support * update test=document_fix
上级
29b55009
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
509 addition
and
7 deletion
+509
-7
paddle/fluid/memory/memcpy.cc
paddle/fluid/memory/memcpy.cc
+22
-0
paddle/fluid/operators/reader/buffered_reader.cc
paddle/fluid/operators/reader/buffered_reader.cc
+76
-0
paddle/fluid/operators/reader/buffered_reader.h
paddle/fluid/operators/reader/buffered_reader.h
+11
-1
paddle/phi/backends/device_guard.h
paddle/phi/backends/device_guard.h
+4
-0
paddle/phi/backends/device_manager.cc
paddle/phi/backends/device_manager.cc
+4
-2
paddle/phi/backends/event.cc
paddle/phi/backends/event.cc
+6
-2
paddle/phi/backends/event.h
paddle/phi/backends/event.h
+1
-0
paddle/phi/backends/stream.cc
paddle/phi/backends/stream.cc
+5
-2
python/paddle/fluid/tests/CMakeLists.txt
python/paddle/fluid/tests/CMakeLists.txt
+1
-0
python/paddle/fluid/tests/custom_runtime/CMakeLists.txt
python/paddle/fluid/tests/custom_runtime/CMakeLists.txt
+3
-0
python/paddle/fluid/tests/custom_runtime/__init__.py
python/paddle/fluid/tests/custom_runtime/__init__.py
+13
-0
python/paddle/fluid/tests/custom_runtime/custom_cpu_runtime.cc
...n/paddle/fluid/tests/custom_runtime/custom_cpu_runtime.cc
+215
-0
python/paddle/fluid/tests/custom_runtime/custom_cpu_setup.py
python/paddle/fluid/tests/custom_runtime/custom_cpu_setup.py
+82
-0
python/paddle/fluid/tests/custom_runtime/test_custom_device_data_loader.py
...id/tests/custom_runtime/test_custom_device_data_loader.py
+66
-0
未找到文件。
paddle/fluid/memory/memcpy.cc
浏览文件 @
a0dc361c
...
...
@@ -1442,6 +1442,28 @@ void Copy<phi::Place, phi::Place>(phi::Place dst_place,
return
Copy
(
place_dst
,
dst
,
place_src
,
src
,
num
);
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
else
if
(
src_place
.
GetType
()
==
phi
::
AllocationType
::
CPU
&&
// NOLINT
dst_place
.
GetType
()
==
phi
::
AllocationType
::
CUSTOM
)
{
platform
::
CustomPlace
place_dst
(
dst_place
.
GetDeviceType
(),
dst_place
.
GetDeviceId
());
platform
::
CPUPlace
place_src
;
return
Copy
(
place_dst
,
dst
,
place_src
,
src
,
num
,
nullptr
);
}
else
if
(
src_place
.
GetType
()
==
phi
::
AllocationType
::
CUSTOM
&&
dst_place
.
GetType
()
==
phi
::
AllocationType
::
CPU
)
{
platform
::
CustomPlace
place_src
(
src_place
.
GetDeviceType
(),
src_place
.
GetDeviceId
());
platform
::
CPUPlace
place_dst
;
return
Copy
(
place_dst
,
dst
,
place_src
,
src
,
num
,
nullptr
);
}
else
if
(
src_place
.
GetType
()
==
phi
::
AllocationType
::
CUSTOM
&&
dst_place
.
GetType
()
==
phi
::
AllocationType
::
CUSTOM
)
{
platform
::
CustomPlace
place_src
(
src_place
.
GetDeviceType
(),
src_place
.
GetDeviceId
());
platform
::
CustomPlace
place_dst
(
dst_place
.
GetDeviceType
(),
dst_place
.
GetDeviceId
());
return
Copy
(
place_dst
,
dst
,
place_src
,
src
,
num
,
nullptr
);
}
#endif
}
// NOTE: Only for (CPUPlace) -> (CPUPlace and PinnedPlace).
...
...
paddle/fluid/operators/reader/buffered_reader.cc
浏览文件 @
a0dc361c
...
...
@@ -19,6 +19,9 @@
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/phi/backends/device_guard.h"
#include "paddle/phi/backends/device_manager.h"
namespace
paddle
{
namespace
operators
{
namespace
reader
{
...
...
@@ -105,11 +108,30 @@ BufferedReader::BufferedReader(
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
if
(
platform
::
is_custom_place
(
place_
))
{
auto
stream
=
((
platform
::
CustomDeviceContext
*
)(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)))
->
stream
();
custom_device_compute_stream_
=
std
::
make_shared
<
phi
::
stream
::
Stream
>
(
place_
,
stream
);
custom_device_events_
.
resize
(
buffer_size
);
for
(
auto
&
event
:
custom_device_events_
)
{
event
=
std
::
make_shared
<
phi
::
event
::
Event
>
();
event
->
Init
(
place_
);
}
custom_device_stream_
=
std
::
make_shared
<
phi
::
stream
::
Stream
>
();
custom_device_stream_
->
Init
(
place_
);
}
#endif
cpu_buffer_
.
resize
(
buffer_size
);
cuda_buffer_
.
resize
(
buffer_size
);
npu_buffer_
.
resize
(
buffer_size
);
mlu_buffer_
.
resize
(
buffer_size
);
xpu_buffer_
.
resize
(
buffer_size
);
custom_device_buffer_
.
resize
(
buffer_size
);
ReadTillBufferFullAsync
();
}
...
...
@@ -410,6 +432,58 @@ void BufferedReader::ReadAsync(size_t i) {
platform
::
XPUStreamSync
(
stream_
.
get
());
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
if
(
platform
::
is_custom_place
(
place_
))
{
TensorVec
&
custom_device
=
custom_device_buffer_
[
i
];
if
(
custom_device
.
empty
())
{
custom_device
.
resize
(
cpu
.
size
());
}
else
{
PADDLE_ENFORCE_EQ
(
custom_device
.
size
(),
cpu
.
size
(),
platform
::
errors
::
InvalidArgument
(
"Input tensor number on CustomDevice and CPU "
"devices are not matched. "
"The number on CustomDevice is %d, on CPU is %d"
,
custom_device
.
size
(),
cpu
.
size
()));
}
std
::
vector
<
void
*>
custom_device_ptrs
;
custom_device_ptrs
.
reserve
(
cpu
.
size
());
for
(
size_t
i
=
0
;
i
<
cpu
.
size
();
++
i
)
{
custom_device
[
i
].
Resize
(
cpu
[
i
].
dims
());
custom_device
[
i
].
set_layout
(
cpu
[
i
].
layout
());
custom_device_ptrs
.
emplace_back
(
custom_device
[
i
].
mutable_data
(
place_
,
cpu
[
i
].
type
()));
}
phi
::
DeviceManager
::
SetDevice
(
place_
);
phi
::
DeviceManager
::
GetDeviceWithPlace
(
place_
)
->
RecordEvent
(
custom_device_events_
[
i
].
get
(),
custom_device_compute_stream_
.
get
());
phi
::
DeviceManager
::
GetDeviceWithPlace
(
place_
)
->
StreamWaitEvent
(
custom_device_stream_
.
get
(),
custom_device_events_
[
i
].
get
());
platform
::
RecordEvent
record_event
(
"BufferedReader:MemoryCopy"
,
platform
::
TracerEventType
::
UserDefined
,
1
);
for
(
size_t
i
=
0
;
i
<
cpu
.
size
();
++
i
)
{
auto
cpu_place
=
cpu
[
i
].
place
();
auto
cpu_ptr
=
cpu
[
i
].
data
();
auto
custom_device_ptr
=
custom_device_ptrs
[
i
];
auto
size
=
cpu
[
i
].
numel
()
*
paddle
::
framework
::
DataTypeSize
(
cpu
[
i
].
dtype
());
if
((
platform
::
is_custom_place
(
cpu_place
)))
{
memory
::
Copy
(
place_
,
custom_device_ptr
,
cpu_place
,
cpu_ptr
,
size
);
custom_device_stream_
->
Synchronize
();
}
else
{
memory
::
Copy
(
place_
,
custom_device_ptr
,
cpu_place
,
cpu_ptr
,
size
);
}
custom_device
[
i
].
set_lod
(
cpu
[
i
].
lod
());
}
custom_device_stream_
->
Synchronize
();
}
#endif
return
i
;
}));
}
...
...
@@ -449,6 +523,8 @@ void BufferedReader::ReadNextImpl(std::vector<framework::LoDTensor> *out) {
*
out
=
std
::
move
(
mlu_buffer_
[
i
]);
}
else
if
(
platform
::
is_xpu_place
(
place_
))
{
*
out
=
std
::
move
(
xpu_buffer_
[
i
]);
}
else
if
(
platform
::
is_custom_place
(
place_
))
{
*
out
=
std
::
move
(
custom_device_buffer_
[
i
]);
}
else
{
*
out
=
std
::
move
(
cpu_buffer_
[
i
]);
}
...
...
paddle/fluid/operators/reader/buffered_reader.h
浏览文件 @
a0dc361c
...
...
@@ -37,7 +37,10 @@
#include "paddle/fluid/platform/device/xpu/xpu_info.h"
#include "paddle/fluid/platform/device/xpu/xpu_resource_pool.h"
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
#include "paddle/phi/backends/event.h"
#include "paddle/phi/backends/stream.h"
#endif
namespace
paddle
{
namespace
operators
{
namespace
reader
{
...
...
@@ -82,6 +85,7 @@ class BufferedReader : public framework::DecoratedReader {
std
::
vector
<
TensorVec
>
npu_buffer_
;
std
::
vector
<
TensorVec
>
mlu_buffer_
;
std
::
vector
<
TensorVec
>
xpu_buffer_
;
std
::
vector
<
TensorVec
>
custom_device_buffer_
;
size_t
prev_pos_
{
-
1UL
};
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
gpuStream_t
compute_stream_
;
...
...
@@ -106,6 +110,12 @@ class BufferedReader : public framework::DecoratedReader {
std
::
shared_ptr
<
platform
::
XpuStreamObject
>
stream_
;
std
::
vector
<
std
::
shared_ptr
<
platform
::
XpuEventObject
>>
events_
;
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
std
::
shared_ptr
<
phi
::
stream
::
Stream
>
custom_device_compute_stream_
;
std
::
shared_ptr
<
phi
::
stream
::
Stream
>
custom_device_stream_
;
std
::
vector
<
std
::
shared_ptr
<
phi
::
event
::
Event
>>
custom_device_events_
;
#endif
};
}
// namespace reader
...
...
paddle/phi/backends/device_guard.h
浏览文件 @
a0dc361c
...
...
@@ -13,6 +13,8 @@
// limitations under the License.
#pragma once
#ifdef PADDLE_WITH_CUSTOM_DEVICE
#include "paddle/phi/backends/device_manager.h"
namespace
phi
{
...
...
@@ -44,3 +46,5 @@ class DeviceGuard {
};
}
// namespace phi
#endif
paddle/phi/backends/device_manager.cc
浏览文件 @
a0dc361c
...
...
@@ -394,8 +394,10 @@ DeviceManager& DeviceManager::Instance() {
}
void
DeviceManager
::
Clear
()
{
Instance
().
device_map_
.
clear
();
Instance
().
device_impl_map_
.
clear
();
// TODO(wangran16): fix coredump when using npu plugin
// Instance().device_map_.clear();
// Instance().device_impl_map_.clear();
}
std
::
vector
<
std
::
string
>
ListAllLibraries
(
const
std
::
string
&
library_dir
)
{
...
...
paddle/phi/backends/event.cc
浏览文件 @
a0dc361c
...
...
@@ -35,7 +35,11 @@ Event::~Event() { Destroy(); }
bool
Event
::
Init
(
const
Place
&
place
,
Flag
flags
)
{
place_
=
place
;
DeviceGuard
guard
(
place_
);
device_
=
phi
::
DeviceManager
::
GetDeviceWithPlace
(
place
);
// note(wangran16): bind device to the current thread. fix npu plugin null
// context bug.
phi
::
DeviceManager
::
SetDevice
(
place_
);
device_
->
CreateEvent
(
this
,
flags
);
VLOG
(
3
)
<<
"Init Event: "
<<
event_
<<
", place: "
<<
place_
<<
", flag:"
<<
static_cast
<
int
>
(
flags
);
...
...
@@ -45,7 +49,7 @@ bool Event::Init(const Place& place, Flag flags) {
void
Event
::
Destroy
()
{
if
(
own_data_
)
{
DeviceGuard
guard
(
place_
);
phi
::
DeviceManager
::
SetDevice
(
place_
);
device_
->
DestroyEvent
(
this
);
own_data_
=
false
;
}
...
...
paddle/phi/backends/event.h
浏览文件 @
a0dc361c
...
...
@@ -36,6 +36,7 @@ class Event {
Interprocess
=
0x4
,
};
Event
()
=
default
;
// For compatible
Event
(
const
Place
&
place
,
event_t
event
);
~
Event
();
...
...
paddle/phi/backends/stream.cc
浏览文件 @
a0dc361c
...
...
@@ -40,7 +40,10 @@ bool Stream::Init(const Place& place,
const
Flag
&
flag
)
{
place_
=
place
;
device_
=
phi
::
DeviceManager
::
GetDeviceWithPlace
(
place
);
DeviceGuard
guard
(
place_
);
// note(wangran16): bind device to the current thread. fix npu plugin null
// context bug.
phi
::
DeviceManager
::
SetDevice
(
place_
);
device_
->
CreateStream
(
this
,
priority
,
flag
);
callback_manager_
.
reset
(
new
CallbackManager
(
this
));
...
...
@@ -80,7 +83,7 @@ void Stream::WaitCallback() const { callback_manager_->Wait(); }
void
Stream
::
Destroy
()
{
if
(
own_data_
)
{
DeviceGuard
guard
(
place_
);
phi
::
DeviceManager
::
SetDevice
(
place_
);
device_
->
DestroyStream
(
this
);
own_data_
=
false
;
}
...
...
python/paddle/fluid/tests/CMakeLists.txt
浏览文件 @
a0dc361c
...
...
@@ -12,5 +12,6 @@ add_subdirectory(unittests)
add_subdirectory
(
book
)
add_subdirectory
(
custom_op
)
add_subdirectory
(
custom_kernel
)
add_subdirectory
(
custom_runtime
)
set_tests_properties
(
test_beam_search_decoder PROPERTIES TIMEOUT 120
)
python/paddle/fluid/tests/custom_runtime/CMakeLists.txt
0 → 100644
浏览文件 @
a0dc361c
if
(
WITH_CUSTOM_DEVICE
)
py_test
(
test_custom_device_data_loader SRCS test_custom_device_data_loader.py
)
endif
()
python/paddle/fluid/tests/custom_runtime/__init__.py
0 → 100644
浏览文件 @
a0dc361c
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
python/paddle/fluid/tests/custom_runtime/custom_cpu_runtime.cc
0 → 100644
浏览文件 @
a0dc361c
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <iostream>
#include "paddle/phi/backends/device_ext.h"
#define MEMORY_FRACTION 0.5f
C_Status
Init
()
{
return
C_SUCCESS
;
}
C_Status
InitDevice
(
const
C_Device
device
)
{
return
C_SUCCESS
;
}
C_Status
SetDevice
(
const
C_Device
device
)
{
return
C_SUCCESS
;
}
C_Status
GetDevice
(
const
C_Device
device
)
{
device
->
id
=
0
;
return
C_SUCCESS
;
}
C_Status
DestroyDevice
(
const
C_Device
device
)
{
return
C_SUCCESS
;
}
C_Status
Finalize
()
{
return
C_SUCCESS
;
}
C_Status
GetDevicesCount
(
size_t
*
count
)
{
*
count
=
1
;
return
C_SUCCESS
;
}
C_Status
GetDevicesList
(
size_t
*
devices
)
{
devices
[
0
]
=
0
;
return
C_SUCCESS
;
}
C_Status
MemCpy
(
const
C_Device
device
,
void
*
dst
,
const
void
*
src
,
size_t
size
)
{
memcpy
(
dst
,
src
,
size
);
return
C_SUCCESS
;
}
C_Status
AsyncMemCpy
(
const
C_Device
device
,
C_Stream
stream
,
void
*
dst
,
const
void
*
src
,
size_t
size
)
{
memcpy
(
dst
,
src
,
size
);
return
C_SUCCESS
;
}
C_Status
MemCpyP2P
(
const
C_Device
dst_device
,
const
C_Device
src_device
,
void
*
dst
,
const
void
*
src
,
size_t
size
)
{
memcpy
(
dst
,
src
,
size
);
return
C_SUCCESS
;
}
C_Status
AsyncMemCpyP2P
(
const
C_Device
dst_device
,
const
C_Device
src_device
,
C_Stream
stream
,
void
*
dst
,
const
void
*
src
,
size_t
size
)
{
memcpy
(
dst
,
src
,
size
);
return
C_SUCCESS
;
}
C_Status
Allocate
(
const
C_Device
device
,
void
**
ptr
,
size_t
size
)
{
auto
data
=
malloc
(
size
);
if
(
data
)
{
*
ptr
=
data
;
return
C_SUCCESS
;
}
else
{
*
ptr
=
nullptr
;
}
return
C_FAILED
;
}
C_Status
Deallocate
(
const
C_Device
device
,
void
*
ptr
,
size_t
size
)
{
free
(
ptr
);
return
C_SUCCESS
;
}
C_Status
CreateStream
(
const
C_Device
device
,
C_Stream
*
stream
)
{
stream
=
nullptr
;
return
C_SUCCESS
;
}
C_Status
DestroyStream
(
const
C_Device
device
,
C_Stream
stream
)
{
return
C_SUCCESS
;
}
C_Status
CreateEvent
(
const
C_Device
device
,
C_Event
*
event
)
{
return
C_SUCCESS
;
}
C_Status
RecordEvent
(
const
C_Device
device
,
C_Stream
stream
,
C_Event
event
)
{
return
C_SUCCESS
;
}
C_Status
DestroyEvent
(
const
C_Device
device
,
C_Event
event
)
{
return
C_SUCCESS
;
}
C_Status
SyncDevice
(
const
C_Device
device
)
{
return
C_SUCCESS
;
}
C_Status
SyncStream
(
const
C_Device
device
,
C_Stream
stream
)
{
return
C_SUCCESS
;
}
C_Status
SyncEvent
(
const
C_Device
device
,
C_Event
event
)
{
return
C_SUCCESS
;
}
C_Status
StreamWaitEvent
(
const
C_Device
device
,
C_Stream
stream
,
C_Event
event
)
{
return
C_SUCCESS
;
}
C_Status
VisibleDevices
(
size_t
*
devices
)
{
return
C_SUCCESS
;
}
C_Status
DeviceMemStats
(
const
C_Device
device
,
size_t
*
total_memory
,
size_t
*
free_memory
)
{
float
memusage
;
FILE
*
fp
;
char
buffer
[
1024
];
size_t
byte_read
;
char
*
pos
;
fp
=
fopen
(
"/proc/meminfo"
,
"r"
);
byte_read
=
fread
(
buffer
,
1
,
sizeof
(
buffer
),
fp
);
fclose
(
fp
);
buffer
[
byte_read
]
=
'\0'
;
pos
=
strstr
(
buffer
,
"MemTotal:"
);
sscanf
(
pos
,
"MemTotal: %lu kB"
,
total_memory
);
pos
=
strstr
(
pos
,
"MemFree:"
);
sscanf
(
pos
,
"MemFree: %lu kB"
,
free_memory
);
*
total_memory
=
*
total_memory
*
1024
;
*
free_memory
=
*
free_memory
*
1024
;
*
free_memory
=
*
free_memory
*
MEMORY_FRACTION
;
return
C_SUCCESS
;
}
C_Status
DeviceMinChunkSize
(
const
C_Device
device
,
size_t
*
size
)
{
*
size
=
512
;
return
C_SUCCESS
;
}
void
InitPlugin
(
CustomRuntimeParams
*
params
)
{
PADDLE_CUSTOM_RUNTIME_CHECK_VERSION
(
params
);
params
->
device_type
=
"custom_cpu"
;
params
->
sub_device_type
=
"v0.1"
;
memset
(
reinterpret_cast
<
void
*>
(
params
->
interface
),
0
,
sizeof
(
C_DeviceInterface
));
params
->
interface
->
initialize
=
Init
;
params
->
interface
->
finalize
=
Finalize
;
params
->
interface
->
init_device
=
InitDevice
;
params
->
interface
->
set_device
=
SetDevice
;
params
->
interface
->
get_device
=
GetDevice
;
params
->
interface
->
deinit_device
=
DestroyDevice
;
params
->
interface
->
create_stream
=
CreateStream
;
params
->
interface
->
destroy_stream
=
DestroyStream
;
params
->
interface
->
create_event
=
CreateEvent
;
params
->
interface
->
destroy_event
=
DestroyEvent
;
params
->
interface
->
record_event
=
RecordEvent
;
params
->
interface
->
synchronize_device
=
SyncDevice
;
params
->
interface
->
synchronize_stream
=
SyncStream
;
params
->
interface
->
synchronize_event
=
SyncEvent
;
params
->
interface
->
stream_wait_event
=
StreamWaitEvent
;
params
->
interface
->
memory_copy_h2d
=
MemCpy
;
params
->
interface
->
memory_copy_d2d
=
MemCpy
;
params
->
interface
->
memory_copy_d2h
=
MemCpy
;
params
->
interface
->
memory_copy_p2p
=
MemCpyP2P
;
params
->
interface
->
async_memory_copy_h2d
=
AsyncMemCpy
;
params
->
interface
->
async_memory_copy_d2d
=
AsyncMemCpy
;
params
->
interface
->
async_memory_copy_d2h
=
AsyncMemCpy
;
params
->
interface
->
async_memory_copy_p2p
=
AsyncMemCpyP2P
;
params
->
interface
->
device_memory_allocate
=
Allocate
;
params
->
interface
->
host_memory_allocate
=
Allocate
;
params
->
interface
->
unified_memory_allocate
=
Allocate
;
params
->
interface
->
device_memory_deallocate
=
Deallocate
;
params
->
interface
->
host_memory_deallocate
=
Deallocate
;
params
->
interface
->
unified_memory_deallocate
=
Deallocate
;
params
->
interface
->
get_device_count
=
GetDevicesCount
;
params
->
interface
->
get_device_list
=
GetDevicesList
;
params
->
interface
->
device_memory_stats
=
DeviceMemStats
;
params
->
interface
->
device_min_chunk_size
=
DeviceMinChunkSize
;
}
python/paddle/fluid/tests/custom_runtime/custom_cpu_setup.py
0 → 100644
浏览文件 @
a0dc361c
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
site
from
paddle.fluid
import
core
from
distutils.sysconfig
import
get_python_lib
from
distutils.core
import
setup
,
Extension
from
setuptools.command.build_ext
import
build_ext
# refer: https://note.qidong.name/2018/03/setup-warning-strict-prototypes
# Avoid a gcc warning below:
# cc1plus: warning: command line option ‘-Wstrict-prototypes’ is valid
# for C/ObjC but not for C++
class
BuildExt
(
build_ext
):
def
build_extensions
(
self
):
if
'-Wstrict-prototypes'
in
self
.
compiler
.
compiler_so
:
self
.
compiler
.
compiler_so
.
remove
(
'-Wstrict-prototypes'
)
super
(
BuildExt
,
self
).
build_extensions
()
# cc flags
paddle_extra_compile_args
=
[
'-std=c++14'
,
'-shared'
,
'-fPIC'
,
'-Wno-parentheses'
,
'-DPADDLE_WITH_CUSTOM_KERNEL'
,
'-DPADDLE_WITH_CUSTOM_DEVICE'
,
]
if
core
.
is_compiled_with_npu
():
paddle_extra_compile_args
+=
[
'-D_GLIBCXX_USE_CXX11_ABI=0'
]
# include path
site_packages_path
=
site
.
getsitepackages
()
include_dirs
=
list
(
map
(
lambda
path
:
os
.
path
.
join
(
path
,
'paddle'
,
'include'
),
site_packages_path
))
# include path third_party
compile_third_party_path
=
os
.
path
.
join
(
os
.
environ
[
'PADDLE_ROOT'
],
'build/third_party'
)
include_dirs
+=
[
os
.
path
.
join
(
compile_third_party_path
,
'boost/src/extern_boost'
),
# boost
os
.
path
.
join
(
compile_third_party_path
,
'install/gflags/include'
),
# gflags
os
.
path
.
join
(
compile_third_party_path
,
'install/glog/include'
),
# glog
]
# libs path
library_dirs
=
list
(
map
(
lambda
path
:
os
.
path
.
join
(
path
,
'paddle'
,
'fluid'
),
site_packages_path
))
# libs
libs
=
[
':core_avx.so'
]
if
not
core
.
has_avx_core
and
core
.
has_noavx_core
:
libs
=
[
':core_noavx.so'
]
custom_cpu_plugin_so
=
Extension
(
'custom_cpu_runtime'
,
sources
=
[
'custom_cpu_runtime.cc'
],
include_dirs
=
include_dirs
,
library_dirs
=
library_dirs
,
libraries
=
libs
,
extra_compile_args
=
paddle_extra_compile_args
)
setup
(
name
=
'custom_kernel_dot'
,
version
=
'1.0'
,
description
=
'custom kernel fot compiling'
,
cmdclass
=
{
'build_ext'
:
BuildExt
},
ext_modules
=
[
custom_cpu_plugin_so
])
python/paddle/fluid/tests/custom_runtime/test_custom_device_data_loader.py
0 → 100644
浏览文件 @
a0dc361c
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
import
site
import
unittest
import
numpy
as
np
class
TestCustomDeviceDataLoader
(
unittest
.
TestCase
):
def
setUp
(
self
):
# compile so and set to current path
cur_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
# --inplace to place output so file to current dir
cmd
=
'cd {} && {} custom_cpu_setup.py build_ext --inplace'
.
format
(
cur_dir
,
sys
.
executable
)
os
.
system
(
cmd
)
# set environment for loading and registering compiled custom kernels
# only valid in current process
os
.
environ
[
'CUSTOM_DEVICE_ROOT'
]
=
cur_dir
def
test_custom_device_dataloader
(
self
):
import
paddle
paddle
.
set_device
(
'custom_cpu'
)
dataset
=
paddle
.
vision
.
datasets
.
MNIST
(
mode
=
'test'
,
transform
=
paddle
.
vision
.
transforms
.
Compose
([
paddle
.
vision
.
transforms
.
CenterCrop
(
20
),
paddle
.
vision
.
transforms
.
RandomResizedCrop
(
14
),
paddle
.
vision
.
transforms
.
Normalize
(),
paddle
.
vision
.
transforms
.
ToTensor
()
]))
loader
=
paddle
.
io
.
DataLoader
(
dataset
,
batch_size
=
32
,
num_workers
=
1
,
shuffle
=
True
)
for
image
,
label
in
loader
:
self
.
assertTrue
(
image
.
place
.
is_custom_place
())
self
.
assertTrue
(
label
.
place
.
is_custom_place
())
break
def
tearDown
(
self
):
del
os
.
environ
[
'CUSTOM_DEVICE_ROOT'
]
if
__name__
==
'__main__'
:
if
os
.
name
==
'nt'
or
sys
.
platform
.
startswith
(
'darwin'
):
# only support Linux now
exit
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录