Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
d972de56
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d972de56
编写于
3月 31, 2023
作者:
陈
陈沧夜
提交者:
GitHub
3月 31, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
删除paddle/fluid/platform/device/mlu目录 (#52382)
上级
fb276f23
变更
15
显示空白变更内容
内联
并排
Showing
15 changed file
with
0 addition
and
2039 deletion
+0
-2039
paddle/fluid/platform/device/mlu/CMakeLists.txt
paddle/fluid/platform/device/mlu/CMakeLists.txt
+0
-29
paddle/fluid/platform/device/mlu/cncl_helper.h
paddle/fluid/platform/device/mlu/cncl_helper.h
+0
-57
paddle/fluid/platform/device/mlu/device_context.cc
paddle/fluid/platform/device/mlu/device_context.cc
+0
-86
paddle/fluid/platform/device/mlu/device_context.h
paddle/fluid/platform/device/mlu/device_context.h
+0
-186
paddle/fluid/platform/device/mlu/device_context_allocator.h
paddle/fluid/platform/device/mlu/device_context_allocator.h
+0
-162
paddle/fluid/platform/device/mlu/device_context_test.cc
paddle/fluid/platform/device/mlu/device_context_test.cc
+0
-82
paddle/fluid/platform/device/mlu/enforce.h
paddle/fluid/platform/device/mlu/enforce.h
+0
-167
paddle/fluid/platform/device/mlu/enforce_test.cc
paddle/fluid/platform/device/mlu/enforce_test.cc
+0
-72
paddle/fluid/platform/device/mlu/mlu_collective_helper.cc
paddle/fluid/platform/device/mlu/mlu_collective_helper.cc
+0
-190
paddle/fluid/platform/device/mlu/mlu_info.cc
paddle/fluid/platform/device/mlu/mlu_info.cc
+0
-464
paddle/fluid/platform/device/mlu/mlu_info.h
paddle/fluid/platform/device/mlu/mlu_info.h
+0
-190
paddle/fluid/platform/device/mlu/mlu_resource_pool.cc
paddle/fluid/platform/device/mlu/mlu_resource_pool.cc
+0
-105
paddle/fluid/platform/device/mlu/mlu_resource_pool.h
paddle/fluid/platform/device/mlu/mlu_resource_pool.h
+0
-64
paddle/fluid/platform/device/mlu/mlu_stream.cc
paddle/fluid/platform/device/mlu/mlu_stream.cc
+0
-84
paddle/fluid/platform/device/mlu/mlu_stream.h
paddle/fluid/platform/device/mlu/mlu_stream.h
+0
-101
未找到文件。
paddle/fluid/platform/device/mlu/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
fb276f23
if
(
NOT WITH_MLU
)
return
()
endif
()
cc_test
(
mlu_enforce_test SRCS enforce_test.cc
)
cc_library
(
mlu_info
SRCS mlu_info.cc
DEPS enforce glog malloc monitor neuware_lib
)
cc_library
(
mlu_stream
SRCS mlu_stream.cc
DEPS mlu_info stream_callback_manager eigen3
${
MKLDNN_CTX_DEPS
}
)
cc_library
(
mlu_device_context
SRCS device_context.cc
DEPS mlu_stream
)
cc_test
(
mlu_device_context_test
SRCS device_context_test.cc
DEPS mlu_device_context
)
cc_library
(
mlu_collective_helper
SRCS mlu_collective_helper.cc
DEPS mlu_stream mlu_info
)
cc_library
(
mlu_resource_pool
SRCS mlu_resource_pool.cc
DEPS mlu_info
)
paddle/fluid/platform/device/mlu/cncl_helper.h
已删除
100644 → 0
浏览文件 @
fb276f23
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_CNCL
#include <cncl.h>
#include <stdio.h>
#include <memory>
#include <string>
#include <thread> // NOLINT
#include <typeindex>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/mlu/enforce.h"
#include "paddle/fluid/platform/float16.h"
namespace
paddle
{
namespace
platform
{
inline
cnclDataType_t
ToCNCLDataType
(
framework
::
proto
::
VarType
::
Type
type
)
{
if
(
type
==
framework
::
proto
::
VarType
::
FP32
)
{
return
cnclFloat32
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
FP16
)
{
return
cnclFloat16
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
INT32
)
{
return
cnclInt32
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
INT16
)
{
return
cnclInt16
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
INT8
)
{
return
cnclInt8
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
UINT8
)
{
return
cnclUint8
;
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"This datatype in cncl is not supported."
));
}
}
}
// namespace platform
}
// namespace paddle
#endif
paddle/fluid/platform/device/mlu/device_context.cc
已删除
100644 → 0
浏览文件 @
fb276f23
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_MLU
#include "paddle/fluid/platform/device/mlu/device_context.h"
#endif
namespace
paddle
{
namespace
platform
{
#ifdef PADDLE_WITH_MLU
thread_local
std
::
unordered_map
<
const
MLUDeviceContext
*
,
std
::
shared_ptr
<
MLUContext
>>
MLUDeviceContext
::
thread_ctx_
;
thread_local
std
::
mutex
MLUDeviceContext
::
ctx_mtx_
;
MLUContext
::
MLUContext
(
const
MLUPlace
&
place
,
const
int
priority
)
{
place_
=
place
;
MLUDeviceGuard
guard
(
place_
.
device
);
stream_
.
reset
(
new
stream
::
MLUStream
(
place_
,
priority
));
InitCNNLContext
();
InitMLUOPContext
();
}
MLUContext
::~
MLUContext
()
{
MLUDeviceGuard
guard
(
place_
.
device
);
DestoryCNNLContext
();
DestoryMLUOPContext
();
}
MLUDeviceContext
::
MLUDeviceContext
(
MLUPlace
place
)
:
place_
(
place
)
{
MLUDeviceGuard
guard
(
place_
.
device
);
compute_capability_
=
GetMLUComputeCapability
(
place_
.
device
);
driver_version_
=
GetMLUDriverVersion
(
place_
.
device
);
runtime_version_
=
GetMLURuntimeVersion
(
place_
.
device
);
cnnl_version_
=
GetMLUCnnlVersion
(
place_
.
device
);
mluOp_version_
=
GetMLUOpVersion
(
place_
.
device
);
LOG_FIRST_N
(
WARNING
,
1
)
<<
"Please NOTE: device: "
<<
static_cast
<
int
>
(
place_
.
device
)
<<
", MLU Compute Capability: "
<<
compute_capability_
/
10
<<
"."
<<
compute_capability_
%
10
<<
", Driver API Version: "
<<
driver_version_
/
10000
<<
"."
<<
(
driver_version_
/
100
)
%
100
<<
"."
<<
driver_version_
%
100
<<
", Runtime API Version: "
<<
runtime_version_
/
10000
<<
"."
<<
(
runtime_version_
/
100
)
%
100
<<
"."
<<
runtime_version_
%
100
<<
", Cnnl API Version: "
<<
cnnl_version_
/
10000
<<
"."
<<
(
cnnl_version_
/
100
)
%
100
<<
"."
<<
cnnl_version_
%
100
<<
", MluOp API Version: "
<<
mluOp_version_
/
10000
<<
"."
<<
(
mluOp_version_
/
100
)
%
100
<<
"."
<<
mluOp_version_
%
100
;
default_ctx_
.
reset
(
new
MLUContext
(
place_
));
}
MLUDeviceContext
::~
MLUDeviceContext
()
{}
const
Place
&
MLUDeviceContext
::
GetPlace
()
const
{
return
place_
;
}
void
MLUDeviceContext
::
Wait
()
const
{
context
()
->
Stream
()
->
Wait
();
}
int
MLUDeviceContext
::
GetComputeCapability
()
const
{
return
compute_capability_
;
}
mluCnnlHandle
MLUDeviceContext
::
cnnl_handle
()
const
{
return
context
()
->
CnnlHandle
();
}
mluOpHandle
MLUDeviceContext
::
mluOp_handle
()
const
{
return
context
()
->
MluOpHandle
();
}
mluStream
MLUDeviceContext
::
stream
()
const
{
return
context
()
->
RawStream
();
}
#endif
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/mlu/device_context.h
已删除
100644 → 0
浏览文件 @
fb276f23
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_MLU
#include <mutex>
#include "paddle/fluid/platform/device/mlu/enforce.h"
#include "paddle/fluid/platform/device/mlu/mlu_stream.h"
#include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_CNCL
#include <cncl.h>
#endif
namespace
Eigen
{
struct
DefaultDevice
;
struct
GpuDevice
;
}
// namespace Eigen
namespace
paddle
{
namespace
platform
{
class
MLUContext
{
public:
MLUContext
()
=
default
;
explicit
MLUContext
(
const
MLUPlace
&
place
,
const
int
priority
=
0
);
~
MLUContext
();
const
MLUPlace
&
Place
()
const
{
return
place_
;
}
const
std
::
unique_ptr
<
Eigen
::
DefaultDevice
>&
EigenDevice
()
const
{
return
eigen_device_
;
}
const
std
::
unique_ptr
<
stream
::
MLUStream
>&
Stream
()
const
{
return
stream_
;
}
stream
::
MLUStream
*
SetStream
(
stream
::
MLUStream
*
new_stream_ptr
)
{
auto
*
old_stream_ptr
=
stream_
.
release
();
stream_
.
reset
(
new_stream_ptr
);
return
old_stream_ptr
;
}
const
mluStream
&
RawStream
()
{
return
stream_
->
raw_stream
();
}
const
mluCnnlHandle
&
CnnlHandle
()
const
{
return
cnnl_handle_
;
}
const
mluOpHandle
&
MluOpHandle
()
const
{
return
mluOp_handle_
;
}
private:
void
InitCNNLContext
()
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreate
(
&
cnnl_handle_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetQueue
(
cnnl_handle_
,
RawStream
()));
}
void
InitMLUOPContext
()
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpCreate
(
&
mluOp_handle_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetQueue
(
mluOp_handle_
,
RawStream
()));
}
void
DestoryCNNLContext
()
{
if
(
cnnl_handle_
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlDestroy
(
cnnl_handle_
));
}
cnnl_handle_
=
nullptr
;
}
void
DestoryMLUOPContext
()
{
if
(
mluOp_handle_
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpDestroy
(
mluOp_handle_
));
}
mluOp_handle_
=
nullptr
;
}
MLUPlace
place_
;
std
::
unique_ptr
<
Eigen
::
DefaultDevice
>
eigen_device_
;
std
::
unique_ptr
<
stream
::
MLUStream
>
stream_
;
mluCnnlHandle
cnnl_handle_
;
mluOpHandle
mluOp_handle_
;
DISABLE_COPY_AND_ASSIGN
(
MLUContext
);
};
class
MLUDeviceContext
:
public
DeviceContext
,
public
phi
::
TypeInfoTraits
<
DeviceContext
,
MLUDeviceContext
>
{
public:
explicit
MLUDeviceContext
(
MLUPlace
place
);
virtual
~
MLUDeviceContext
();
Eigen
::
DefaultDevice
*
eigen_device
()
const
{
return
nullptr
;
}
const
Place
&
GetPlace
()
const
override
;
int
GetComputeCapability
()
const
;
/*! \brief Wait for all operations completion in the stream. */
void
Wait
()
const
override
;
/*! \brief Return cnnl handle in the device context. */
mluCnnlHandle
cnnl_handle
()
const
;
/*! \brief Return mluOp handle in the device context. */
mluOpHandle
mluOp_handle
()
const
;
/*! \brief Return mlu stream in the device context. */
mluStream
stream
()
const
;
#ifdef PADDLE_WITH_CNCL
/*! \brief Return cncl communicators. */
cnclComm_t
cncl_comm
()
const
{
return
cncl_comm_
;
}
/*! \brief Set cncl communicators. */
void
set_cncl_comm
(
cnclComm_t
comm
)
{
cncl_comm_
=
comm
;
}
#endif
template
<
typename
Callback
>
void
RecordEvent
(
mluEventHandle
ev
,
Callback
callback
)
const
{
return
context
()
->
Stream
()
->
RecordEvent
(
ev
,
callback
);
}
template
<
typename
Callback
>
void
AddStreamCallback
(
Callback
&&
callback
)
const
{
return
context
()
->
Stream
()
->
AddCallback
(
callback
);
}
void
WaitStreamCallback
()
const
{
return
context
()
->
Stream
()
->
WaitCallback
();
}
void
ResetDefaultContext
(
const
int
priority
)
{
default_ctx_
.
reset
(
new
MLUContext
(
place_
,
priority
));
}
void
ResetThreadContext
(
const
int
priority
)
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
ctx_mtx_
);
thread_ctx_
[
this
].
reset
(
new
MLUContext
(
place_
,
priority
));
}
std
::
shared_ptr
<
MLUContext
>
context
()
const
{
if
(
!
thread_ctx_
.
count
(
this
))
{
return
default_ctx_
;
}
return
thread_ctx_
.
at
(
this
);
}
static
const
char
*
name
()
{
return
"MLUDeviceContext"
;
}
private:
int
compute_capability_
;
int
driver_version_
;
int
runtime_version_
;
int
cnnl_version_
;
int
mluOp_version_
;
MLUPlace
place_
;
std
::
shared_ptr
<
MLUContext
>
default_ctx_
;
// The thread_local static variable will be released before the
// global static variable, so avoid using it in dtor.
static
thread_local
std
::
unordered_map
<
const
MLUDeviceContext
*
,
std
::
shared_ptr
<
MLUContext
>>
thread_ctx_
;
static
thread_local
std
::
mutex
ctx_mtx_
;
#ifdef PADDLE_WITH_CNCL
cnclComm_t
cncl_comm_
{
nullptr
};
#endif
DISABLE_COPY_AND_ASSIGN
(
MLUDeviceContext
);
};
template
<
>
struct
DefaultDeviceContextType
<
platform
::
MLUPlace
>
{
using
TYPE
=
MLUDeviceContext
;
};
#endif
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/mlu/device_context_allocator.h
已删除
100644 → 0
浏览文件 @
fb276f23
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <utility>
#include <vector>
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/device/mlu/device_context.h"
#include "paddle/fluid/platform/device/mlu/mlu_info.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
platform
{
class
MLUDeviceContext
;
}
// namespace platform
namespace
memory
{
namespace
allocation
{
/**
* MLUDeviceContextAllocation is a wrapper of the underbeneath allocation.
* MLUDeviceContextAllocation adds a MLU stream callback for the underbeneath
* allocation so that MLUDeviceContextAllocation can be used in a MLU stream
* which deletes allocation in the callback.
*/
class
MLUDeviceContextAllocation
:
public
Allocation
{
public:
explicit
MLUDeviceContextAllocation
(
AllocationPtr
allocation
)
:
Allocation
(
allocation
->
ptr
(),
allocation
->
size
(),
allocation
->
place
()),
underlying_allocation_
(
std
::
move
(
allocation
))
{}
~
MLUDeviceContextAllocation
()
{
PADDLE_ENFORCE_NOT_NULL
(
dev_ctx_
,
platform
::
errors
::
PreconditionNotMet
(
"Device context is not set for MLUDeviceContextAllocation"
));
auto
*
p_allocation
=
underlying_allocation_
.
release
();
VLOG
(
4
)
<<
"Adding callback to delete MLUDeviceContextAllocation at "
<<
p_allocation
;
dev_ctx_
->
AddStreamCallback
([
p_allocation
]
{
VLOG
(
4
)
<<
"Delete MLUDeviceContextAllocation at "
<<
p_allocation
;
Allocator
::
AllocationDeleter
(
p_allocation
);
});
}
void
SetMLUDeviceContext
(
const
platform
::
MLUDeviceContext
*
dev_ctx
)
{
dev_ctx_
=
dev_ctx
;
}
private:
AllocationPtr
underlying_allocation_
;
const
platform
::
MLUDeviceContext
*
dev_ctx_
{
nullptr
};
};
/**
* MLUDeviceContextAllocator will allocate a MLUDeviceContextAllocation
* after waiting for a self-created event on the default stream. It does so to
* let the non-default stream be able to allocate GPU memory which will be
* released by stream callback
*/
class
MLUDeviceContextAllocator
:
public
Allocator
{
public:
explicit
MLUDeviceContextAllocator
(
platform
::
MLUPlace
place
,
mluStream
default_stream
)
:
place_
(
place
),
default_stream_
(
default_stream
)
{
platform
::
MLUDeviceGuard
guard
(
place_
.
device
);
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtNotifierCreate
(
&
event_
));
}
~
MLUDeviceContextAllocator
()
{
if
(
event_
)
{
platform
::
MLUDeviceGuard
guard
(
place_
.
device
);
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtNotifierDestroy
(
event_
));
}
}
protected:
phi
::
Allocation
*
AllocateImpl
(
size_t
size
)
override
{
PADDLE_ENFORCE_NOT_NULL
(
default_stream_
,
platform
::
errors
::
PreconditionNotMet
(
"Default stream is not set for MLUDeviceContextAllocator"
));
platform
::
MLUDeviceGuard
guard
(
place_
.
device
);
auto
allocation
=
new
MLUDeviceContextAllocation
(
memory
::
Alloc
(
place_
,
size
));
// Wait for the event on stream
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtPlaceNotifier
(
event_
,
default_stream_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtWaitNotifier
(
event_
));
return
allocation
;
}
void
FreeImpl
(
phi
::
Allocation
*
allocation
)
override
{
delete
allocation
;
}
private:
platform
::
MLUPlace
place_
;
mluEventHandle
event_
{
nullptr
};
mluStream
default_stream_
{
nullptr
};
};
/**
* MLUDeviceContextAllocatorPool is a singletion stores mapping from
* MLUPlace(s) to std::shared_ptr<MLUDeviceContextAllocator>. When a
* MLUDeviceContext's compute stream isn't default stream, it can call this
* class to allocate GPU memory which will be released by a callback after
* stream execution.
*/
class
MLUDeviceContextAllocatorPool
{
public:
static
MLUDeviceContextAllocatorPool
&
Instance
()
{
static
MLUDeviceContextAllocatorPool
pool
;
return
pool
;
}
AllocationPtr
Alloc
(
const
platform
::
MLUDeviceContext
&
dev_ctx
,
size_t
size
)
{
auto
iter
=
allocators_
.
find
(
dev_ctx
.
GetPlace
());
PADDLE_ENFORCE_NE
(
iter
,
allocators_
.
end
(),
platform
::
errors
::
NotFound
(
"No allocator found for MLUPlace."
));
auto
&
allocator
=
iter
->
second
;
AllocationPtr
allocation
=
allocator
->
Allocate
(
size
);
static_cast
<
MLUDeviceContextAllocation
*>
(
allocation
.
get
())
->
SetMLUDeviceContext
(
&
dev_ctx
);
return
allocation
;
}
private:
MLUDeviceContextAllocatorPool
()
{
std
::
vector
<
int
>
devices
=
platform
::
GetMLUSelectedDevices
();
for
(
int
i
:
devices
)
{
auto
place
=
platform
::
MLUPlace
(
i
);
auto
compute_stream
=
platform
::
DeviceContextPool
::
Instance
().
GetByPlace
(
place
)
->
stream
();
auto
allocator
=
std
::
shared_ptr
<
MLUDeviceContextAllocator
>
(
new
MLUDeviceContextAllocator
(
place
,
compute_stream
));
allocators_
.
insert
(
make_pair
(
place
,
allocator
));
}
}
std
::
map
<
platform
::
MLUPlace
,
std
::
shared_ptr
<
MLUDeviceContextAllocator
>>
allocators_
;
};
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/platform/device/mlu/device_context_test.cc
已删除
100644 → 0
浏览文件 @
fb276f23
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/device/mlu/device_context.h"
#include <vector>
#include "glog/logging.h"
#include "gtest/gtest.h"
TEST
(
Device
,
Init
)
{
using
paddle
::
platform
::
DeviceContext
;
using
paddle
::
platform
::
MLUContext
;
using
paddle
::
platform
::
MLUDeviceContext
;
using
paddle
::
platform
::
MLUPlace
;
int
count
=
paddle
::
platform
::
GetMLUDeviceCount
();
for
(
int
i
=
0
;
i
<
count
;
i
++
)
{
MLUDeviceContext
*
device_context
=
new
MLUDeviceContext
(
MLUPlace
(
i
));
std
::
shared_ptr
<
MLUContext
>
ctx
=
device_context
->
context
();
ASSERT_NE
(
nullptr
,
ctx
);
delete
device_context
;
}
}
TEST
(
Device
,
MLUDeviceContext
)
{
using
paddle
::
mluCnnlHandle
;
using
paddle
::
platform
::
MLUDeviceContext
;
using
paddle
::
platform
::
MLUPlace
;
int
count
=
paddle
::
platform
::
GetMLUDeviceCount
();
for
(
int
i
=
0
;
i
<
count
;
i
++
)
{
MLUDeviceContext
*
device_context
=
new
MLUDeviceContext
(
MLUPlace
(
i
));
mluCnnlHandle
mlu_handle
=
device_context
->
cnnl_handle
();
ASSERT_NE
(
nullptr
,
mlu_handle
);
delete
device_context
;
}
}
TEST
(
Device
,
MLUStream
)
{
using
paddle
::
mluStream
;
using
paddle
::
platform
::
MLUDeviceContext
;
using
paddle
::
platform
::
MLUPlace
;
int
count
=
paddle
::
platform
::
GetMLUDeviceCount
();
for
(
int
i
=
0
;
i
<
count
;
i
++
)
{
MLUDeviceContext
*
device_context
=
new
MLUDeviceContext
(
MLUPlace
(
i
));
mluStream
mlu_stream
=
device_context
->
stream
();
ASSERT_NE
(
nullptr
,
mlu_stream
);
delete
device_context
;
}
}
TEST
(
Device
,
DeviceContextPool
)
{
using
paddle
::
platform
::
CPUPlace
;
using
paddle
::
platform
::
DeviceContextPool
;
using
paddle
::
platform
::
MLUDeviceContext
;
using
paddle
::
platform
::
MLUPlace
;
using
paddle
::
platform
::
Place
;
DeviceContextPool
&
pool
=
DeviceContextPool
::
Instance
();
auto
cpu_dev_ctx1
=
pool
.
Get
(
CPUPlace
());
auto
cpu_dev_ctx2
=
pool
.
Get
(
CPUPlace
());
ASSERT_EQ
(
cpu_dev_ctx2
,
cpu_dev_ctx1
);
std
::
vector
<
Place
>
mlu_places
;
int
count
=
paddle
::
platform
::
GetMLUDeviceCount
();
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
auto
dev_ctx
=
pool
.
Get
(
MLUPlace
(
i
));
ASSERT_NE
(
dev_ctx
,
nullptr
);
}
}
paddle/fluid/platform/device/mlu/enforce.h
已删除
100644 → 0
浏览文件 @
fb276f23
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_MLU
#include "paddle/fluid/platform/device/mlu/mlu_info.h"
#endif // PADDLE_WITH_MLU
#ifdef PADDLE_WITH_MLU
DECLARE_int64
(
gpu_allocator_retry_time
);
#endif
namespace
paddle
{
namespace
platform
{
#ifdef PADDLE_WITH_MLU
namespace
details
{
template
<
typename
T
>
struct
MLUStatusType
{};
#define DEFINE_MLU_STATUS_TYPE(type, success_value, proto_type) \
template <> \
struct MLUStatusType<type> { \
using Type = type; \
static constexpr Type kSuccess = success_value; \
static constexpr const char* kTypeString = #proto_type; \
}
DEFINE_MLU_STATUS_TYPE
(
cnrtStatus
,
cnrtSuccess
,
CNRT
);
DEFINE_MLU_STATUS_TYPE
(
cnnlStatus
,
CNNL_STATUS_SUCCESS
,
CNNL
);
DEFINE_MLU_STATUS_TYPE
(
mluOpStatus
,
MLUOP_STATUS_SUCCESS
,
MLUOP
);
DEFINE_MLU_STATUS_TYPE
(
cnStatus
,
CN_SUCCESS
,
CN
);
#ifdef PADDLE_WITH_CNCL
DEFINE_MLU_STATUS_TYPE
(
cnclStatus
,
CNCL_RET_SUCCESS
,
CNCL
);
#endif
}
// namespace details
/*************** CNRT ERROR ***************/
inline
bool
is_error
(
cnrtStatus
e
)
{
return
e
!=
cnrtSuccess
;
}
inline
std
::
string
build_mlu_error_msg
(
cnrtStatus
e
)
{
std
::
ostringstream
sout
;
sout
<<
"MLU CNRT error("
<<
e
<<
"), "
<<
cnrtGetErrorName
(
e
)
<<
": "
<<
cnrtGetErrorStr
(
e
);
return
sout
.
str
();
}
/*************** CNNL ERROR ***************/
inline
bool
is_error
(
cnnlStatus
stat
)
{
return
stat
!=
CNNL_STATUS_SUCCESS
;
}
inline
std
::
string
build_mlu_error_msg
(
cnnlStatus
stat
)
{
std
::
ostringstream
sout
;
sout
<<
"MLU CNNL error("
<<
stat
<<
"), "
<<
cnnlGetErrorString
(
stat
)
<<
". "
;
return
sout
.
str
();
}
/*************** MLU OP ERROR ***************/
inline
bool
is_error
(
mluOpStatus
stat
)
{
return
stat
!=
MLUOP_STATUS_SUCCESS
;
}
inline
std
::
string
build_mlu_error_msg
(
mluOpStatus
stat
)
{
std
::
ostringstream
sout
;
sout
<<
"MLU OP error("
<<
stat
<<
"), "
<<
mluOpGetErrorString
(
stat
)
<<
". "
;
return
sout
.
str
();
}
/*************** CN API ERROR ***************/
inline
bool
is_error
(
cnStatus
stat
)
{
return
stat
!=
CN_SUCCESS
;
}
inline
std
::
string
build_mlu_error_msg
(
cnStatus
stat
)
{
const
char
*
error_name
;
const
char
*
error_string
;
cnGetErrorName
(
stat
,
&
error_name
);
cnGetErrorString
(
stat
,
&
error_string
);
std
::
ostringstream
sout
;
sout
<<
"MLU CN error("
<<
static_cast
<
int
>
(
stat
)
<<
"), "
<<
error_name
<<
" : "
<<
error_string
<<
". "
;
return
sout
.
str
();
}
/*************** CNCL ERROR ***************/
#ifdef PADDLE_WITH_CNCL
inline
bool
is_error
(
cnclStatus
e
)
{
return
e
!=
CNCL_RET_SUCCESS
;
}
inline
std
::
string
build_mlu_error_msg
(
cnclStatus
e
)
{
std
::
ostringstream
sout
;
sout
<<
"MLU CNCL error("
<<
e
<<
"), "
<<
cnclGetErrorStr
(
e
)
<<
". "
;
return
sout
.
str
();
}
#endif
#define PADDLE_ENFORCE_MLU_SUCCESS(COND) \
do { \
auto __cond__ = (COND); \
using __MLU_STATUS_TYPE__ = decltype(__cond__); \
constexpr auto __success_type__ = \
::paddle::platform::details::MLUStatusType< \
__MLU_STATUS_TYPE__>::kSuccess; \
if (UNLIKELY(__cond__ != __success_type__)) { \
auto __summary__ = ::paddle::platform::errors::External( \
::paddle::platform::build_mlu_error_msg(__cond__)); \
__THROW_ERROR_INTERNAL__(__summary__); \
} \
} while (0)
#define PADDLE_ENFORCE_MLU_LAUNCH_SUCCESS(OP) \
do { \
auto res = cnrtGetLastError(); \
if (UNLIKELY(res != cnrtSuccess)) { \
auto msg = ::paddle::platform::build_mlu_error_msg(res); \
PADDLE_THROW(platform::errors::Fatal( \
"CNRT error after kernel (%s): %s", OP, msg)); \
} \
} while (0)
inline
void
retry_sleep
(
unsigned
milliseconds
)
{
if
(
milliseconds
<
1000
)
{
// usleep argument must be less than 1,000,000. Reference:
// https://pubs.opengroup.org/onlinepubs/7908799/xsh/usleep.html
usleep
(
milliseconds
*
1000
);
}
else
{
// clip to sleep in seconds because we can not and don't have to
// sleep for exact milliseconds
sleep
(
milliseconds
/
1000
);
}
}
#define PADDLE_RETRY_MLU_SUCCESS(COND) \
do { \
auto __cond__ = (COND); \
int retry_count = 1; \
using __MLU_STATUS_TYPE__ = decltype(__cond__); \
constexpr auto __success_type__ = \
::paddle::platform::details::MLUStatusType< \
__MLU_STATUS_TYPE__>::kSuccess; \
while (UNLIKELY(__cond__ != __success_type__) && retry_count < 5) { \
retry_sleep(FLAGS_gpu_allocator_retry_time); \
__cond__ = (COND); \
++retry_count; \
} \
if (UNLIKELY(__cond__ != __success_type__)) { \
auto __summary__ = ::paddle::platform::errors::External( \
::paddle::platform::build_mlu_error_msg(__cond__)); \
__THROW_ERROR_INTERNAL__(__summary__); \
} \
} while (0)
#undef DEFINE_MLU_STATUS_TYPE
#endif // PADDLE_WITH_MLU
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/mlu/enforce_test.cc
已删除
100644 → 0
浏览文件 @
fb276f23
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/device/mlu/enforce.h"
#include <list>
#include "gtest/gtest.h"
#ifdef PADDLE_WITH_MLU
template
<
typename
T
>
bool
CheckMluStatusSuccess
(
T
value
,
const
std
::
string
&
msg
=
"success"
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
value
);
return
true
;
}
template
<
typename
T
>
bool
CheckMluStatusFailure
(
T
value
,
const
std
::
string
&
msg
)
{
try
{
PADDLE_ENFORCE_MLU_SUCCESS
(
value
);
return
false
;
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
std
::
string
ex_msg
=
error
.
what
();
std
::
cout
<<
ex_msg
<<
std
::
endl
;
return
ex_msg
.
find
(
msg
)
!=
std
::
string
::
npos
;
}
}
TEST
(
mlu_enforce
,
mlu_success
)
{
EXPECT_TRUE
(
CheckMluStatusSuccess
(
cnrtSuccess
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
cnrtErrorArgsInvalid
,
"invalid argument"
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
cnrtErrorMemcpyDirectionInvalid
,
"invalid memcpy direction"
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
cnrtErrorDeviceInvalid
,
"invalid device ordinal"
));
EXPECT_TRUE
(
CheckMluStatusSuccess
(
CNNL_STATUS_SUCCESS
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CNNL_STATUS_NOT_INITIALIZED
,
"CNNL error"
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CNNL_STATUS_ALLOC_FAILED
,
"CNNL error"
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CNNL_STATUS_BAD_PARAM
,
"CNNL error"
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CNNL_STATUS_INTERNAL_ERROR
,
"CNNL error"
));
EXPECT_TRUE
(
CheckMluStatusSuccess
(
CN_SUCCESS
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CN_ERROR_NOT_READY
,
"Asynchronous operations issued previously not completed yet"
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CN_ERROR_NOT_INITIALIZED
,
"initialization error"
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CN_ERROR_INVALID_VALUE
,
"invalid argument"
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CN_MEMORY_ERROR_OUT_OF_MEMORY
,
"device has no memory to alloc"
));
#ifdef PADDLE_WITH_CNCL
EXPECT_TRUE
(
CheckMluStatusSuccess
(
CNCL_RET_SUCCESS
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CNCL_RET_ERR_INTERNAL
,
"CNCL error"
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CNCL_RET_ERR_NULL_POINTER
,
"CNCL error"
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CNCL_RET_ERR_INIT
,
"CNCL error"
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CNCL_RET_ERR_NOT_INIT
,
"CNCL error"
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CNCL_RET_ERR_REINIT
,
"CNCL error"
));
EXPECT_TRUE
(
CheckMluStatusFailure
(
CNCL_RET_ERR_INVALID_VERSION
,
"CNCL error"
));
#endif
}
#endif
paddle/fluid/platform/device/mlu/mlu_collective_helper.cc
已删除
100644 → 0
浏览文件 @
fb276f23
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if defined(PADDLE_WITH_CNCL)
#include <utility>
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/mlu/enforce.h"
namespace
paddle
{
namespace
platform
{
class
CNCLCommImpl
:
public
CNCLComm
{
public:
void
set_ring_id
(
int
ring_id
)
{
ring_id_
=
ring_id
;
}
int
ring_id
()
const
override
{
return
ring_id_
;
}
void
set_nranks
(
int
nranks
)
{
nranks_
=
nranks
;
}
int
nranks
()
const
override
{
return
nranks_
;
}
void
set_rank
(
int
rank
)
{
rank_
=
rank
;
}
int
rank
()
const
override
{
return
rank_
;
}
int
device_id
()
const
override
{
return
dev_ctx_
->
GetPlace
().
device
;
}
void
set_comm
(
cnclComm_t
comm
)
{
comm_
=
comm
;
}
cnclComm_t
comm
()
const
override
{
return
comm_
;
}
mluStream
stream
()
const
override
{
return
dev_ctx_
->
stream
();
}
void
set_dev_ctx
(
std
::
unique_ptr
<
MLUDeviceContext
>&&
dev_ctx
)
{
dev_ctx_
=
std
::
move
(
dev_ctx
);
}
MLUDeviceContext
*
dev_context
()
const
override
{
return
dev_ctx_
.
get
();
}
~
CNCLCommImpl
()
{
if
(
comm_
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnclFreeComm
(
comm_
));
}
}
private:
int
ring_id_
;
int
nranks_
;
int
rank_
;
cnclComm_t
comm_
;
std
::
unique_ptr
<
MLUDeviceContext
>
dev_ctx_
;
};
CNCLComm
*
CNCLCommContext
::
CreateComm
(
cnclCliqueId
*
cncl_id
,
int
nranks
,
int
rank
,
int
dev_id
,
int
ring_id
)
{
PADDLE_ENFORCE_NOT_NULL
(
cncl_id
,
platform
::
errors
::
InvalidArgument
(
"The cncl unique id should not be null."
));
PADDLE_ENFORCE_GT
(
nranks
,
1
,
platform
::
errors
::
InvalidArgument
(
"Expected nranks > 1. But received nranks is %d."
,
nranks
));
PADDLE_ENFORCE_GE
(
rank
,
0
,
platform
::
errors
::
InvalidArgument
(
"Expected rank >= 0. But received rank is %d."
,
rank
));
PADDLE_ENFORCE_LT
(
rank
,
nranks
,
platform
::
errors
::
InvalidArgument
(
"Expected rank < nranks. But received rank is %d, nranks is %d."
,
rank
,
nranks
));
PADDLE_ENFORCE_GE
(
dev_id
,
0
,
platform
::
errors
::
InvalidArgument
(
"Expected dev_id >= 0. But received dev_id is %d."
,
dev_id
));
cnclComm_t
comm
;
int
dev_list
[]
=
{
dev_id
};
int
rank_list
[]
=
{
rank
};
SetMLUDeviceId
(
dev_id
);
PADDLE_ENFORCE_MLU_SUCCESS
(
cnclInitComms
(
&
comm
,
1
,
dev_list
,
rank_list
,
nranks
,
cncl_id
));
auto
*
comm_wrapper
=
AssignCNCLComm
(
comm
,
nranks
,
rank
,
dev_id
,
ring_id
);
VLOG
(
1
)
<<
"cncl communicator of rank "
<<
rank
<<
" in ring "
<<
ring_id
<<
" has been created on device "
<<
dev_id
;
std
::
call_once
(
once_flag_
,
[]()
{
std
::
atexit
([]()
{
CNCLCommContext
::
Instance
().
ReleaseCNCLComms
();
});
});
return
comm_wrapper
;
}
void
CNCLCommContext
::
CreateAllCNCLComms
(
const
std
::
vector
<
int
>&
dev_ids
,
int
ring_id
)
{
PADDLE_ENFORCE_GT
(
dev_ids
.
size
(),
0
,
platform
::
errors
::
InvalidArgument
(
"Expected the size of dev_ids > 0. But "
"received the size of dev_ids is %d."
,
dev_ids
.
size
()));
const
int
kDevices
=
dev_ids
.
size
();
cnclComm_t
comms
[
kDevices
];
int
*
rank_list
=
new
int
[
kDevices
];
for
(
int
i
=
0
;
i
<
kDevices
;
i
++
)
{
rank_list
[
i
]
=
i
;
}
cnclCliqueId
clique_id
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnclGetCliqueId
(
&
clique_id
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnclInitComms
(
comms
,
dev_ids
.
size
(),
dev_ids
.
data
(),
rank_list
,
dev_ids
.
size
(),
&
clique_id
));
PADDLE_ENFORCE_EQ
(
comm_map_
.
count
(
ring_id
),
0
,
platform
::
errors
::
InvalidArgument
(
"Expected comm_map_.count(ring_id) = 0. But received "
"comm_map_.count(ring_id) is %d."
,
comm_map_
.
count
(
ring_id
)));
for
(
size_t
i
=
0
;
i
<
dev_ids
.
size
();
++
i
)
{
AssignCNCLComm
(
comms
[
i
],
dev_ids
.
size
(),
i
,
dev_ids
[
i
],
ring_id
);
VLOG
(
1
)
<<
"cncl communicator of rank "
<<
i
<<
" in ring "
<<
ring_id
<<
" has been created on device "
<<
dev_ids
[
i
];
}
std
::
call_once
(
once_flag_
,
[]()
{
std
::
atexit
([]()
{
CNCLCommContext
::
Instance
().
ReleaseCNCLComms
();
});
});
delete
[]
rank_list
;
}
CNCLComm
*
CNCLCommContext
::
AssignCNCLComm
(
cnclComm_t
comm
,
int
nranks
,
int
rank
,
int
dev_id
,
int
ring_id
)
{
std
::
unique_ptr
<
MLUDeviceContext
>
dev_ctx
(
new
MLUDeviceContext
(
MLUPlace
(
dev_id
)));
CNCLCommImpl
*
c
=
new
CNCLCommImpl
;
c
->
set_ring_id
(
ring_id
);
c
->
set_nranks
(
nranks
);
c
->
set_rank
(
rank
);
c
->
set_comm
(
comm
);
c
->
set_dev_ctx
(
std
::
move
(
dev_ctx
));
comm_map_mutex_
.
lock
();
if
(
comm_map_
.
count
(
ring_id
)
==
0
)
{
comm_map_
.
emplace
(
ring_id
,
std
::
map
<
int
,
std
::
unique_ptr
<
CNCLComm
>>
());
}
auto
&
dev2comm
=
comm_map_
[
ring_id
];
dev2comm
.
emplace
(
dev_id
,
std
::
unique_ptr
<
CNCLComm
>
(
c
));
comm_map_mutex_
.
unlock
();
if
(
ring_id
==
0
)
{
auto
*
dev_ctx
=
static_cast
<
platform
::
MLUDeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
MLUPlace
(
dev_id
)));
dev_ctx
->
set_cncl_comm
(
comm
);
}
return
comm_map_
[
ring_id
][
dev_id
].
get
();
}
void
CNCLCommContext
::
ReleaseCNCLComms
()
{
for
(
auto
&
p
:
comm_map_
)
{
for
(
auto
&
q
:
p
.
second
)
{
q
.
second
.
reset
();
}
}
}
}
// namespace platform
}
// namespace paddle
#endif
paddle/fluid/platform/device/mlu/mlu_info.cc
已删除
100644 → 0
浏览文件 @
fb276f23
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/device/mlu/mlu_info.h"
#include <mutex>
#include <vector>
#include "gflags/gflags.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/device/mlu/enforce.h"
#include "paddle/fluid/platform/lock_guard_ptr.h"
#include "paddle/fluid/platform/monitor.h"
#include "paddle/fluid/string/split.h"
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
DECLARE_uint64
(
reallocate_gpu_memory_in_mb
);
DECLARE_uint64
(
gpu_memory_limit_mb
);
constexpr
static
float
fraction_reserve_mlu_memory
=
0.05
f
;
PADDLE_DEFINE_EXPORTED_string
(
selected_mlus
,
""
,
"A list of device ids separated by comma, like: 0,1,2,3. "
"This option is useful when doing multi process training and "
"each process have only one device (MLU). If you want to use "
"all visible devices, set this to empty string. NOTE: the "
"reason of doing this is that we want to use P2P communication"
"between MLU devices, use MLU_VISIBLE_DEVICES can only use"
"share-memory only."
);
USE_MLU_MEM_STAT
;
namespace
paddle
{
namespace
platform
{
static
int
GetMLUDeviceCountImpl
()
{
int
x
,
y
,
z
;
// When cnrtDriverGetVersion is executed, the device is initialized,
// no longer needs to call cnrtInit().
cnrtStatus
stat
=
cnrtDriverGetVersion
(
&
x
,
&
y
,
&
z
);
if
(
stat
!=
cnrtSuccess
)
{
VLOG
(
2
)
<<
"MLU Driver Version can't be detected. No MLU driver!"
;
return
0
;
}
const
auto
*
mlu_visible_devices
=
std
::
getenv
(
"MLU_VISIBLE_DEVICES"
);
if
(
mlu_visible_devices
!=
nullptr
)
{
std
::
string
mlu_visible_devices_str
(
mlu_visible_devices
);
if
(
std
::
all_of
(
mlu_visible_devices_str
.
begin
(),
mlu_visible_devices_str
.
end
(),
[](
char
ch
)
{
return
ch
==
' '
;
}))
{
VLOG
(
2
)
<<
"MLU_VISIBLE_DEVICES is set to be "
"empty. No MLU detected."
;
return
0
;
}
}
int
count
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnDeviceGetCount
(
&
count
));
return
count
;
}
int
GetMLUDeviceCount
()
{
static
auto
dev_cnt
=
GetMLUDeviceCountImpl
();
return
dev_cnt
;
}
std
::
vector
<
int
>
GetMLUSelectedDevices
()
{
// use user specified MLUs in single-node multi-process mode.
std
::
vector
<
int
>
devices
;
if
(
!
FLAGS_selected_mlus
.
empty
())
{
auto
devices_str
=
paddle
::
string
::
Split
(
FLAGS_selected_mlus
,
','
);
for
(
auto
id
:
devices_str
)
{
devices
.
push_back
(
atoi
(
id
.
c_str
()));
}
}
else
{
int
count
=
GetMLUDeviceCount
();
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
devices
.
push_back
(
i
);
}
}
return
devices
;
}
void
CheckDeviceId
(
int
id
)
{
PADDLE_ENFORCE_LT
(
id
,
GetMLUDeviceCount
(),
platform
::
errors
::
InvalidArgument
(
"Device id must be less than MLU count, "
"but received id is: %d. MLU count is: %d."
,
id
,
GetMLUDeviceCount
()));
}
int
GetMLUDriverVersion
(
int
id
)
{
CheckDeviceId
(
id
);
int
x
,
y
,
z
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtDriverGetVersion
(
&
x
,
&
y
,
&
z
));
return
x
*
10000
+
y
*
100
+
z
;
}
int
GetMLURuntimeVersion
(
int
id
)
{
CheckDeviceId
(
id
);
int
x
,
y
,
z
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtGetLibVersion
(
&
x
,
&
y
,
&
z
));
return
x
*
10000
+
y
*
100
+
z
;
}
int
GetMLUCnnlVersion
(
int
id
)
{
CheckDeviceId
(
id
);
int
x
,
y
,
z
;
cnnlGetLibVersion
(
&
x
,
&
y
,
&
z
);
return
x
*
10000
+
y
*
100
+
z
;
}
int
GetMLUOpVersion
(
int
id
)
{
CheckDeviceId
(
id
);
int
x
,
y
,
z
;
mluOpGetLibVersion
(
&
x
,
&
y
,
&
z
);
return
x
*
10000
+
y
*
100
+
z
;
}
int
GetMLUCurrentDeviceId
()
{
int
device_id
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtGetDevice
(
&
device_id
));
return
device_id
;
}
void
SetMLUDeviceId
(
int
id
)
{
CheckDeviceId
(
id
);
PADDLE_RETRY_MLU_SUCCESS
(
cnrtSetDevice
(
id
));
}
void
GetMLUDeviceHandle
(
int
device_ordinal
,
mluDeviceHandle
*
device
)
{
cnStatus
res
=
cnDeviceGet
(
device
,
device_ordinal
);
if
(
res
!=
CN_SUCCESS
)
{
VLOG
(
2
)
<<
"failed to get handle of MLU Device."
;
}
PADDLE_ENFORCE_MLU_SUCCESS
(
res
);
}
int
GetMLUComputeCapability
(
int
id
)
{
CheckDeviceId
(
id
);
mluDeviceHandle
device
;
GetMLUDeviceHandle
(
id
,
&
device
);
int
major
,
minor
;
cnStatus
major_stat
=
cnDeviceGetAttribute
(
&
major
,
CN_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR
,
device
);
cnStatus
minor_stat
=
cnDeviceGetAttribute
(
&
minor
,
CN_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR
,
device
);
PADDLE_ENFORCE_MLU_SUCCESS
(
major_stat
);
PADDLE_ENFORCE_MLU_SUCCESS
(
minor_stat
);
return
major
*
10
+
minor
;
}
void
MLUMemoryUsage
(
size_t
*
available
,
size_t
*
total
)
{
size_t
actual_available
,
actual_total
;
RecordedMLUMemGetInfo
(
available
,
total
,
&
actual_available
,
&
actual_total
,
platform
::
GetMLUCurrentDeviceId
());
}
size_t
MLUAvailableMemToAlloc
()
{
size_t
total
=
0
;
size_t
available
=
0
;
MLUMemoryUsage
(
&
available
,
&
total
);
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_mlu_memory
*
available
);
// If available size is less than minimum chunk size, no usable memory exists
size_t
available_to_alloc
=
available
-
reserving
;
size_t
min_chunk_size
=
MLUMinChunkSize
();
if
(
available_to_alloc
<
min_chunk_size
)
{
available_to_alloc
=
0
;
}
VLOG
(
10
)
<<
"MLU usage "
<<
((
total
-
available
)
>>
20
)
<<
"M/"
<<
(
total
>>
20
)
<<
"M, "
<<
(
available_to_alloc
>>
20
)
<<
"M available to allocate"
;
return
available_to_alloc
;
}
size_t
MLUMaxAllocSize
()
{
return
std
::
max
(
MLUInitAllocSize
(),
MLUReallocSize
());
}
static
size_t
MLUAllocSize
(
bool
realloc
)
{
size_t
available_to_alloc
=
MLUAvailableMemToAlloc
();
PADDLE_ENFORCE_GT
(
available_to_alloc
,
0
,
platform
::
errors
::
ResourceExhausted
(
"Not enough available MLU memory."
));
// If FLAGS_initial_gpu_memory_in_mb is 0, then initial memory will be
// allocated by fraction
size_t
flag_mb
=
realloc
?
FLAGS_reallocate_gpu_memory_in_mb
:
FLAGS_initial_gpu_memory_in_mb
;
size_t
alloc_bytes
=
(
flag_mb
>
0ul
?
flag_mb
<<
20
:
available_to_alloc
*
FLAGS_fraction_of_gpu_memory_to_use
);
PADDLE_ENFORCE_GE
(
available_to_alloc
,
alloc_bytes
,
platform
::
errors
::
ResourceExhausted
(
"Not enough available MLU memory."
));
VLOG
(
10
)
<<
"Alloc size is "
<<
(
alloc_bytes
>>
20
)
<<
" MiB, is it Re-alloc: "
<<
realloc
;
return
alloc_bytes
;
}
size_t
MLUInitAllocSize
()
{
return
MLUAllocSize
(
/* realloc = */
false
);
}
size_t
MLUReallocSize
()
{
return
MLUAllocSize
(
/* realloc = */
true
);
}
size_t
MLUMaxChunkSize
()
{
size_t
max_chunk_size
=
MLUMaxAllocSize
();
VLOG
(
10
)
<<
"Max chunk size "
<<
(
max_chunk_size
>>
20
)
<<
"M"
;
return
max_chunk_size
;
}
void
MLUMemcpyD2HAsync
(
void
*
dst
,
const
void
*
src
,
size_t
num
,
mluStream
stream
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtMemcpyAsync
(
dst
,
const_cast
<
void
*>
(
src
),
num
,
stream
,
cnrtMemcpyDevToHost
));
}
void
MLUMemcpyD2HSync
(
void
*
dst
,
const
void
*
src
,
size_t
num
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtMemcpy
(
dst
,
const_cast
<
void
*>
(
src
),
num
,
cnrtMemcpyDevToHost
));
}
void
MLUMemcpyH2DAsync
(
void
*
dst
,
const
void
*
src
,
size_t
num
,
mluStream
stream
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtMemcpyAsync
(
dst
,
const_cast
<
void
*>
(
src
),
num
,
stream
,
cnrtMemcpyHostToDev
));
}
void
MLUMemcpyH2DSync
(
void
*
dst
,
const
void
*
src
,
size_t
num
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtMemcpy
(
dst
,
const_cast
<
void
*>
(
src
),
num
,
cnrtMemcpyHostToDev
));
}
void
MLUMemcpyD2DAsync
(
void
*
dst
,
const
void
*
src
,
size_t
num
,
mluStream
stream
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtMemcpyAsync
(
dst
,
const_cast
<
void
*>
(
src
),
num
,
stream
,
cnrtMemcpyDevToDev
));
}
void
MLUMemcpyD2DSync
(
void
*
dst
,
const
void
*
src
,
size_t
num
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtMemcpy
(
dst
,
const_cast
<
void
*>
(
src
),
num
,
cnrtMemcpyDevToDev
));
}
void
MLUMemcpyPeerAsync
(
void
*
dst
,
int
dst_device
,
const
void
*
src
,
int
src_device
,
size_t
num
,
mluStream
stream
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtMemcpyPeerAsync
(
dst
,
dst_device
,
const_cast
<
void
*>
(
src
),
src_device
,
num
,
stream
));
}
void
MLUMemcpyPeerSync
(
void
*
dst
,
int
dst_device
,
const
void
*
src
,
int
src_device
,
size_t
num
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtMemcpyPeer
(
dst
,
dst_device
,
const_cast
<
void
*>
(
src
),
src_device
,
num
));
}
void
MLUMemsetAsync
(
void
*
dst
,
int
value
,
size_t
count
,
mluStream
stream
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtMemsetAsync
(
dst
,
value
,
count
,
stream
));
}
void
MLUStreamSync
(
mluStream
stream
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtQueueSync
(
stream
));
}
static
void
RaiseNonOutOfMemoryError
(
cnrtStatus
*
status
)
{
if
(
*
status
==
cnrtErrorNoMem
)
{
*
status
=
cnrtSuccess
;
}
PADDLE_ENFORCE_MLU_SUCCESS
(
*
status
);
*
status
=
cnrtGetLastError
();
if
(
*
status
==
cnrtErrorNoMem
)
{
*
status
=
cnrtSuccess
;
}
PADDLE_ENFORCE_MLU_SUCCESS
(
*
status
);
}
class
RecordedMLUMallocHelper
{
private:
explicit
RecordedMLUMallocHelper
(
int
dev_id
,
uint64_t
limit_size
=
0
)
:
dev_id_
(
dev_id
),
limit_size_
(
limit_size
)
{
if
(
NeedRecord
())
{
mtx_
.
reset
(
new
std
::
mutex
());
}
}
DISABLE_COPY_AND_ASSIGN
(
RecordedMLUMallocHelper
);
public:
static
RecordedMLUMallocHelper
*
Instance
(
int
dev_id
)
{
std
::
call_once
(
once_flag_
,
[]
{
int
dev_cnt
=
GetMLUDeviceCount
();
instances_
.
reserve
(
dev_cnt
);
for
(
int
i
=
0
;
i
<
dev_cnt
;
++
i
)
{
instances_
.
emplace_back
(
new
RecordedMLUMallocHelper
(
i
,
FLAGS_gpu_memory_limit_mb
<<
20
));
}
});
PADDLE_ENFORCE_GE
(
dev_id
,
0
,
platform
::
errors
::
OutOfRange
(
"Device id must be not less than 0, but got %d."
,
dev_id
));
PADDLE_ENFORCE_LT
(
dev_id
,
instances_
.
size
(),
platform
::
errors
::
OutOfRange
(
"Device id %d exceeds mlu card number %d."
,
dev_id
,
instances_
.
size
()));
return
instances_
[
dev_id
].
get
();
}
/**
* Try to allocate `size` mlu memory. Only cnrtErrorNoMem
* or cnrtSuccess would be returned, and the cnrtGetLastError() flag
* would be clear.
*/
cnrtStatus
Malloc
(
void
**
ptr
,
size_t
size
)
{
LockGuardPtr
<
std
::
mutex
>
lock
(
mtx_
);
if
(
UNLIKELY
(
NeedRecord
()
&&
cur_size_
.
load
()
+
size
>
limit_size_
))
{
return
cnrtErrorNoMem
;
}
MLUDeviceGuard
guard
(
dev_id_
);
auto
result
=
cnrtMalloc
(
ptr
,
size
);
if
(
result
==
cnrtSuccess
)
{
cur_size_
.
fetch_add
(
size
);
STAT_INT_ADD
(
"STAT_mlu"
+
std
::
to_string
(
dev_id_
)
+
"_mem_size"
,
size
);
return
cnrtSuccess
;
}
else
{
RaiseNonOutOfMemoryError
(
&
result
);
// Non out of memory error would be raised inside
// RaiseNonOutOfMemoryError.
// Therefore, we can return cnrtErrorNoMem directly here.
return
cnrtErrorNoMem
;
}
}
/**
* Free mlu memory. Usually, free is not allowed to raise error.
* If it does raise error, the process should be crashed.
*/
void
Free
(
void
*
ptr
,
size_t
size
)
{
MLUDeviceGuard
guard
(
dev_id_
);
auto
err
=
cnrtFree
(
ptr
);
PADDLE_ENFORCE_MLU_SUCCESS
(
err
);
if
(
NeedRecord
())
{
cur_size_
.
fetch_sub
(
size
);
}
STAT_INT_SUB
(
"STAT_mlu"
+
std
::
to_string
(
dev_id_
)
+
"_mem_size"
,
size
);
}
bool
GetMemInfo
(
size_t
*
avail
,
size_t
*
total
,
size_t
*
actual_avail
,
size_t
*
actual_total
)
{
{
MLUDeviceGuard
guard
(
dev_id_
);
auto
result
=
cnrtMemGetInfo
(
actual_avail
,
actual_total
);
if
(
result
!=
cnrtSuccess
)
{
*
actual_avail
=
0
;
}
RaiseNonOutOfMemoryError
(
&
result
);
}
if
(
NeedRecord
())
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
*
mtx_
);
*
avail
=
std
::
min
(
*
actual_avail
,
limit_size_
-
cur_size_
.
load
());
*
total
=
std
::
min
(
*
actual_total
,
limit_size_
);
return
*
total
<
*
actual_total
;
}
else
{
*
avail
=
*
actual_avail
;
*
total
=
*
actual_total
;
return
false
;
}
}
inline
bool
NeedRecord
()
const
{
return
limit_size_
!=
0
;
}
uint64_t
RecordedSize
()
const
{
return
cur_size_
.
load
();
}
uint64_t
LimitSize
()
const
{
return
limit_size_
;
}
private:
const
int
dev_id_
;
const
uint64_t
limit_size_
;
std
::
atomic
<
uint64_t
>
cur_size_
{
0
};
mutable
std
::
unique_ptr
<
std
::
mutex
>
mtx_
;
static
std
::
once_flag
once_flag_
;
static
std
::
vector
<
std
::
unique_ptr
<
RecordedMLUMallocHelper
>>
instances_
;
};
// NOLINT
std
::
once_flag
RecordedMLUMallocHelper
::
once_flag_
;
std
::
vector
<
std
::
unique_ptr
<
RecordedMLUMallocHelper
>>
RecordedMLUMallocHelper
::
instances_
;
cnrtStatus
RecordedMLUMalloc
(
void
**
ptr
,
size_t
size
,
int
dev_id
)
{
return
RecordedMLUMallocHelper
::
Instance
(
dev_id
)
->
Malloc
(
ptr
,
size
);
}
void
RecordedMLUFree
(
void
*
p
,
size_t
size
,
int
dev_id
)
{
return
RecordedMLUMallocHelper
::
Instance
(
dev_id
)
->
Free
(
p
,
size
);
}
bool
RecordedMLUMemGetInfo
(
size_t
*
avail
,
size_t
*
total
,
size_t
*
actual_avail
,
size_t
*
actual_total
,
int
dev_id
)
{
return
RecordedMLUMallocHelper
::
Instance
(
dev_id
)
->
GetMemInfo
(
avail
,
total
,
actual_avail
,
actual_total
);
}
uint64_t
RecordedMLUMallocSize
(
int
dev_id
)
{
return
RecordedMLUMallocHelper
::
Instance
(
dev_id
)
->
RecordedSize
();
}
bool
IsMLUMallocRecorded
(
int
dev_id
)
{
return
RecordedMLUMallocHelper
::
Instance
(
dev_id
)
->
NeedRecord
();
}
void
EmptyCache
(
void
)
{
std
::
vector
<
int
>
devices
=
GetMLUSelectedDevices
();
for
(
auto
device
:
devices
)
{
memory
::
Release
(
MLUPlace
(
device
));
}
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/mlu/mlu_info.h
已删除
100644 → 0
浏览文件 @
fb276f23
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_MLU
#include <cn_api.h>
#include <cnnl.h>
#include <cnpapi.h>
#include <cnpapi_cndrv_id.h>
#include <cnrt.h>
#include <mlu_op.h>
#ifdef PADDLE_WITH_CNCL
#include <cncl.h>
#endif
#include <vector>
#include "paddle/phi/backends/mlu/mlu_info.h"
namespace
paddle
{
using
cnStatus
=
CNresult
;
using
cnrtStatus
=
cnrtRet_t
;
using
cnnlStatus
=
cnnlStatus_t
;
using
mluOpStatus
=
mluOpStatus_t
;
#ifdef PADDLE_WITH_CNCL
using
cnclStatus
=
cnclResult_t
;
#endif
using
mluStream
=
cnrtQueue_t
;
using
mluCnnlHandle
=
cnnlHandle_t
;
using
mluOpHandle
=
mluOpHandle_t
;
using
mluEventHandle
=
cnrtNotifier_t
;
using
mluDeviceHandle
=
CNdev
;
namespace
platform
{
//! Get the driver version of the ith MLU.
int
GetMLUDriverVersion
(
int
id
);
//! Get the runtime version of the ith MLU.
int
GetMLURuntimeVersion
(
int
id
);
//! Get the cnnl version of the ith MLU.
int
GetMLUCnnlVersion
(
int
id
);
//! Get the mluOp version of the ith MLU.
int
GetMLUOpVersion
(
int
id
);
//! Get the total number of MLU devices in system.
int
GetMLUDeviceCount
();
//! Get a list of device ids from environment variable or use all.
std
::
vector
<
int
>
GetMLUSelectedDevices
();
//! Get the current MLU device id in system.
int
GetMLUCurrentDeviceId
();
//! Set the MLU device id for next execution.
void
SetMLUDeviceId
(
int
device_id
);
//! Get a handle of device ids.
void
GetMLUDeviceHandle
(
int
device_ordinal
,
mluDeviceHandle
*
device
);
//! Get the compute capability of the ith MLU (format: major * 10 + minor)
int
GetMLUComputeCapability
(
int
id
);
//! Get the memory usage of current MLU device.
void
MLUMemoryUsage
(
size_t
*
available
,
size_t
*
total
);
//! Get the available memory to allocate, which is the size of available mlu
//! minus reserving.
size_t
MLUAvailableMemToAlloc
();
//! Get the maximum allocation size of current MLU device.
size_t
MLUMaxAllocSize
();
//! Get the initial allocation size of current MLU device.
size_t
MLUInitAllocSize
();
//! Get the re-allocation size of current MLU device.
size_t
MLUReallocSize
();
using
phi
::
backends
::
mlu
::
MLUMinChunkSize
;
//! Get the maximum chunk size for MLU buddy allocator.
size_t
MLUMaxChunkSize
();
//! Copy memory from address device to host asynchronously.
void
MLUMemcpyD2HAsync
(
void
*
dst
,
const
void
*
src
,
size_t
num
,
mluStream
stream
);
//! Copy memory from address device to host synchronously.
void
MLUMemcpyD2HSync
(
void
*
dst
,
const
void
*
src
,
size_t
num
);
//! Copy memory from address host to device asynchronously.
void
MLUMemcpyH2DAsync
(
void
*
dst
,
const
void
*
src
,
size_t
num
,
mluStream
stream
);
//! Copy memory from address host to device synchronously.
void
MLUMemcpyH2DSync
(
void
*
dst
,
const
void
*
src
,
size_t
num
);
//! Copy memory from address device to device asynchronously in a single device.
void
MLUMemcpyD2DAsync
(
void
*
dst
,
const
void
*
src
,
size_t
num
,
mluStream
stream
);
//! Copy memory from address device to device synchronously in a single device.
void
MLUMemcpyD2DSync
(
void
*
dst
,
const
void
*
src
,
size_t
num
);
//! Copy memory from one device to another device asynchronously.
void
MLUMemcpyPeerAsync
(
void
*
dst
,
int
dst_place
,
const
void
*
src
,
int
src_place
,
size_t
num
,
mluStream
stream
);
//! Copy memory from one device to another device synchronously.
void
MLUMemcpyPeerSync
(
void
*
dst
,
int
dst_place
,
const
void
*
src
,
int
src_place
,
size_t
num
);
//! Set memory dst with value count size asynchronously
void
MLUMemsetAsync
(
void
*
dst
,
int
value
,
size_t
count
,
mluStream
stream
);
//! Blocks until stream has completed all operations.
void
MLUStreamSync
(
mluStream
stream
);
//! MLUMalloc with recorded info
cnrtStatus
RecordedMLUMalloc
(
void
**
ptr
,
size_t
size
,
int
dev_id
);
//! MLUFree with recorded info
void
RecordedMLUFree
(
void
*
p
,
size_t
size
,
int
dev_id
);
//! Get available and total mlu memory with considering limitation
bool
RecordedMLUMemGetInfo
(
size_t
*
avail
,
size_t
*
total
,
size_t
*
actual_avail
,
size_t
*
actual_total
,
int
dev_id
);
//! Get recorded mluMalloc size. If record is disabled, return 0.
uint64_t
RecordedMLUMallocSize
(
int
dev_id
);
bool
IsMLUMallocRecorded
(
int
dev_id
);
//! Empty idle cached memory held by the allocator.
void
EmptyCache
(
void
);
class
MLUDeviceGuard
{
public:
explicit
inline
MLUDeviceGuard
(
int
dev_id
)
{
int
prev_id
=
platform
::
GetMLUCurrentDeviceId
();
if
(
prev_id
!=
dev_id
)
{
prev_id_
=
prev_id
;
platform
::
SetMLUDeviceId
(
dev_id
);
}
}
inline
~
MLUDeviceGuard
()
{
if
(
prev_id_
!=
-
1
)
{
platform
::
SetMLUDeviceId
(
prev_id_
);
}
}
MLUDeviceGuard
(
const
MLUDeviceGuard
&
o
)
=
delete
;
MLUDeviceGuard
&
operator
=
(
const
MLUDeviceGuard
&
o
)
=
delete
;
private:
int
prev_id_
{
-
1
};
};
}
// namespace platform
}
// namespace paddle
#endif
paddle/fluid/platform/device/mlu/mlu_resource_pool.cc
已删除
100644 → 0
浏览文件 @
fb276f23
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#if defined(PADDLE_WITH_MLU)
#include "paddle/fluid/platform/device/mlu/mlu_resource_pool.h"
namespace
paddle
{
namespace
platform
{
MluStreamResourcePool
::
MluStreamResourcePool
()
{
int
dev_cnt
=
platform
::
GetMLUDeviceCount
();
pool_
.
reserve
(
dev_cnt
);
for
(
int
dev_idx
=
0
;
dev_idx
<
dev_cnt
;
++
dev_idx
)
{
auto
creator
=
[
dev_idx
]
{
platform
::
SetMLUDeviceId
(
dev_idx
);
mluStream
stream
;
cnrtQueueCreate
(
&
stream
);
return
stream
;
};
auto
deleter
=
[
dev_idx
](
mluStream
stream
)
{
platform
::
SetMLUDeviceId
(
dev_idx
);
cnrtQueueDestroy
(
stream
);
};
pool_
.
emplace_back
(
ResourcePool
<
MluStreamObject
>::
Create
(
creator
,
deleter
));
}
}
MluStreamResourcePool
&
MluStreamResourcePool
::
Instance
()
{
static
MluStreamResourcePool
pool
;
return
pool
;
}
std
::
shared_ptr
<
MluStreamObject
>
MluStreamResourcePool
::
New
(
int
dev_idx
)
{
PADDLE_ENFORCE_GE
(
dev_idx
,
0
,
platform
::
errors
::
InvalidArgument
(
"The dev_idx should be not less than 0, but got %d."
,
dev_idx
));
PADDLE_ENFORCE_LT
(
dev_idx
,
pool_
.
size
(),
platform
::
errors
::
OutOfRange
(
"The dev_idx should be less than device count %d, but got %d."
,
pool_
.
size
(),
dev_idx
));
return
pool_
[
dev_idx
]
->
New
();
}
MluEventResourcePool
::
MluEventResourcePool
()
{
int
dev_cnt
=
platform
::
GetMLUDeviceCount
();
pool_
.
reserve
(
dev_cnt
);
for
(
int
dev_idx
=
0
;
dev_idx
<
dev_cnt
;
++
dev_idx
)
{
auto
creator
=
[
dev_idx
]
{
platform
::
SetMLUDeviceId
(
dev_idx
);
mluEventHandle
event
;
cnrtNotifierCreate
(
&
event
);
return
event
;
};
auto
deleter
=
[
dev_idx
](
mluEventHandle
event
)
{
platform
::
SetMLUDeviceId
(
dev_idx
);
cnrtNotifierDestroy
(
event
);
};
pool_
.
emplace_back
(
ResourcePool
<
MluEventObject
>::
Create
(
creator
,
deleter
));
}
}
MluEventResourcePool
&
MluEventResourcePool
::
Instance
()
{
static
MluEventResourcePool
pool
;
return
pool
;
}
std
::
shared_ptr
<
MluEventObject
>
MluEventResourcePool
::
New
(
int
dev_idx
)
{
PADDLE_ENFORCE_GE
(
dev_idx
,
0
,
platform
::
errors
::
InvalidArgument
(
"The dev_idx should be not less than 0, but got %d."
,
dev_idx
));
PADDLE_ENFORCE_LT
(
dev_idx
,
pool_
.
size
(),
platform
::
errors
::
OutOfRange
(
"The dev_idx should be less than device count %d, but got %d."
,
pool_
.
size
(),
dev_idx
));
return
pool_
[
dev_idx
]
->
New
();
}
}
// namespace platform
}
// namespace paddle
#endif
paddle/fluid/platform/device/mlu/mlu_resource_pool.h
已删除
100644 → 0
浏览文件 @
fb276f23
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#if defined(PADDLE_WITH_MLU)
#include <memory>
#include <type_traits>
#include <vector>
#include "paddle/fluid/platform/device/mlu/mlu_info.h"
#include "paddle/fluid/platform/resource_pool.h"
namespace
paddle
{
namespace
platform
{
using
MluStreamObject
=
std
::
remove_pointer
<
mluStream
>::
type
;
using
MluEventObject
=
std
::
remove_pointer
<
mluEventHandle
>::
type
;
class
MluStreamResourcePool
{
public:
std
::
shared_ptr
<
MluStreamObject
>
New
(
int
dev_idx
);
static
MluStreamResourcePool
&
Instance
();
private:
MluStreamResourcePool
();
DISABLE_COPY_AND_ASSIGN
(
MluStreamResourcePool
);
private:
std
::
vector
<
std
::
shared_ptr
<
ResourcePool
<
MluStreamObject
>>>
pool_
;
};
class
MluEventResourcePool
{
public:
std
::
shared_ptr
<
MluEventObject
>
New
(
int
dev_idx
);
static
MluEventResourcePool
&
Instance
();
private:
MluEventResourcePool
();
DISABLE_COPY_AND_ASSIGN
(
MluEventResourcePool
);
private:
std
::
vector
<
std
::
shared_ptr
<
ResourcePool
<
MluEventObject
>>>
pool_
;
};
}
// namespace platform
}
// namespace paddle
#endif
paddle/fluid/platform/device/mlu/mlu_stream.cc
已删除
100644 → 0
浏览文件 @
fb276f23
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/device/mlu/mlu_stream.h"
#include "paddle/fluid/platform/device/mlu/device_context.h"
namespace
paddle
{
namespace
platform
{
namespace
stream
{
bool
MLUStream
::
Init
(
const
MLUPlace
&
place
,
const
int
priority
)
{
PADDLE_ENFORCE_EQ
(
is_mlu_place
(
place
),
true
,
platform
::
errors
::
InvalidArgument
(
"MLU stream must be created using mlu place."
));
place_
=
place
;
MLUDeviceGuard
guard
(
place_
.
device
);
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtQueueCreate
(
&
stream_
));
callback_manager_
.
reset
(
new
StreamCallbackManager
<
mluStream
>
(
stream_
));
VLOG
(
3
)
<<
"MLUStream Init stream: "
<<
stream_
;
return
true
;
}
void
MLUStream
::
Destroy
()
{
MLUDeviceGuard
guard
(
place_
.
device
);
Wait
();
WaitCallback
();
if
(
stream_
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtQueueDestroy
(
stream_
));
}
stream_
=
nullptr
;
}
void
MLUStream
::
Wait
()
const
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtQueueSync
(
stream_
));
}
MLUStream
*
get_current_mlu_stream
(
int
deviceId
)
{
#ifdef PADDLE_WITH_MLU
if
(
deviceId
==
-
1
)
{
deviceId
=
platform
::
GetMLUCurrentDeviceId
();
}
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
Place
device
=
MLUPlace
(
deviceId
);
auto
stream
=
static_cast
<
platform
::
MLUDeviceContext
*>
(
pool
.
Get
(
device
))
->
context
()
->
Stream
()
.
get
();
return
stream
;
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Paddle is not compiled with MLU. Cannot visit mlu current stream."
));
return
nullptr
;
#endif
}
MLUStream
*
set_current_mlu_stream
(
MLUStream
*
stream
)
{
#ifdef PADDLE_WITH_MLU
auto
&
device
=
stream
->
GetPlace
();
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
return
static_cast
<
platform
::
MLUDeviceContext
*>
(
pool
.
Get
(
device
))
->
context
()
->
SetStream
(
stream
);
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Paddle is not compiled with MLU. Cannot visit mlu current stream."
));
return
nullptr
;
#endif
}
}
// namespace stream
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/device/mlu/mlu_stream.h
已删除
100644 → 0
浏览文件 @
fb276f23
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
#include <memory>
#include "paddle/fluid/platform/device/mlu/enforce.h"
#include "paddle/fluid/platform/device/mlu/mlu_info.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/stream_callback_manager.h"
namespace
paddle
{
namespace
platform
{
namespace
stream
{
#ifdef PADDLE_WITH_MLU
class
MLUStream
final
{
public:
MLUStream
()
=
default
;
explicit
MLUStream
(
const
MLUPlace
&
place
,
const
int
priority
=
0
)
{
Init
(
place
,
priority
);
}
virtual
~
MLUStream
()
{
Destroy
();
}
bool
Init
(
const
MLUPlace
&
place
,
const
int
priority
=
0
);
template
<
typename
Callback
>
void
AddCallback
(
Callback
&&
callback
)
const
{
callback_manager_
->
AddCallback
(
callback
);
}
template
<
typename
Callback
>
void
RecordEvent
(
mluEventHandle
event
,
Callback
callback
)
const
{
callback
();
PADDLE_ENFORCE_MLU_SUCCESS
(
cnPlaceNotifier
(
event
,
stream_
));
}
void
RecordEvent
(
mluEventHandle
event
)
const
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnPlaceNotifier
(
event
,
stream_
));
}
void
WaitEvent
(
mluEventHandle
event
)
const
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnWaitNotifier
(
event
));
}
void
Wait
()
const
;
void
WaitCallback
()
const
{
callback_manager_
->
Wait
();
}
const
mluStream
&
raw_stream
()
const
{
return
stream_
;
}
void
Destroy
();
bool
Query
()
const
{
cnrtStatus
stat
=
cnrtQueueQuery
(
stream_
);
if
(
stat
==
cnrtSuccess
)
{
return
true
;
}
if
(
stat
==
cnrtErrorNotReady
)
{
return
false
;
}
PADDLE_ENFORCE_MLU_SUCCESS
(
stat
);
return
false
;
}
void
Synchronize
()
const
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtQueueSync
(
stream_
));
}
const
MLUPlace
&
GetPlace
()
const
{
return
place_
;
}
private:
MLUPlace
place_
;
mluStream
stream_
{
nullptr
};
int
priority_
{
0
};
std
::
unique_ptr
<
StreamCallbackManager
<
mluStream
>>
callback_manager_
;
DISABLE_COPY_AND_ASSIGN
(
MLUStream
);
};
MLUStream
*
get_current_mlu_stream
(
int
deviceId
);
MLUStream
*
set_current_mlu_stream
(
MLUStream
*
stream
);
#endif
}
// namespace stream
}
// namespace platform
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录