Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
3e1e482b
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3e1e482b
编写于
9月 26, 2022
作者:
C
cifar10
提交者:
GitHub
9月 26, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[MLU] fluid: add mluop (#46429)
上级
b0ec8efb
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
434 addition
and
47 deletion
+434
-47
cmake/neuware.cmake
cmake/neuware.cmake
+3
-1
paddle/fluid/operators/mlu/mlu_baseop.cc
paddle/fluid/operators/mlu/mlu_baseop.cc
+209
-11
paddle/fluid/operators/mlu/mlu_baseop.h
paddle/fluid/operators/mlu/mlu_baseop.h
+121
-0
paddle/fluid/platform/device/mlu/device_context.cc
paddle/fluid/platform/device/mlu/device_context.cc
+10
-1
paddle/fluid/platform/device/mlu/device_context.h
paddle/fluid/platform/device/mlu/device_context.h
+19
-0
paddle/fluid/platform/device/mlu/enforce.h
paddle/fluid/platform/device/mlu/enforce.h
+10
-0
paddle/fluid/platform/device/mlu/mlu_info.cc
paddle/fluid/platform/device/mlu/mlu_info.cc
+7
-0
paddle/fluid/platform/device/mlu/mlu_info.h
paddle/fluid/platform/device/mlu/mlu_info.h
+7
-1
python/paddle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py
...dle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py
+19
-20
python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py
...addle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py
+5
-0
tools/dockerfile/Dockerfile.mlu
tools/dockerfile/Dockerfile.mlu
+24
-13
未找到文件。
cmake/neuware.cmake
浏览文件 @
3e1e482b
...
@@ -15,12 +15,14 @@ set(NEUWARE_LIB_DIR ${NEUWARE_HOME}/lib64)
...
@@ -15,12 +15,14 @@ set(NEUWARE_LIB_DIR ${NEUWARE_HOME}/lib64)
include_directories
(
${
NEUWARE_INCLUDE_DIR
}
)
include_directories
(
${
NEUWARE_INCLUDE_DIR
}
)
set
(
CNNL_LIB
${
NEUWARE_LIB_DIR
}
/libcnnl.so
)
set
(
CNNL_LIB
${
NEUWARE_LIB_DIR
}
/libcnnl.so
)
set
(
MLUOP_LIB
${
NEUWARE_LIB_DIR
}
/libmluops.so
)
set
(
CNRT_LIB
${
NEUWARE_LIB_DIR
}
/libcnrt.so
)
set
(
CNRT_LIB
${
NEUWARE_LIB_DIR
}
/libcnrt.so
)
set
(
CNDRV_LIB
${
NEUWARE_LIB_DIR
}
/libcndrv.so
)
set
(
CNDRV_LIB
${
NEUWARE_LIB_DIR
}
/libcndrv.so
)
set
(
CNPAPI_LIB
${
NEUWARE_LIB_DIR
}
/libcnpapi.so
)
set
(
CNPAPI_LIB
${
NEUWARE_LIB_DIR
}
/libcnpapi.so
)
generate_dummy_static_lib
(
LIB_NAME
"neuware_lib"
GENERATOR
"neuware.cmake"
)
generate_dummy_static_lib
(
LIB_NAME
"neuware_lib"
GENERATOR
"neuware.cmake"
)
set
(
NEUWARE_LIB_DEPS
${
CNNL_LIB
}
${
CNRT_LIB
}
${
CNDRV_LIB
}
${
CNPAPI_LIB
}
)
set
(
NEUWARE_LIB_DEPS
${
CNNL_LIB
}
${
MLUOP_LIB
}
${
CNRT_LIB
}
${
CNDRV_LIB
}
${
CNPAPI_LIB
}
)
if
(
WITH_CNCL
)
if
(
WITH_CNCL
)
message
(
STATUS
"Compile with CNCL!"
)
message
(
STATUS
"Compile with CNCL!"
)
...
...
paddle/fluid/operators/mlu/mlu_baseop.cc
浏览文件 @
3e1e482b
...
@@ -256,6 +256,186 @@ MLUCnnlTensorDesc::~MLUCnnlTensorDesc() {
...
@@ -256,6 +256,186 @@ MLUCnnlTensorDesc::~MLUCnnlTensorDesc() {
}
}
}
}
class
MLUOpTensorDescPool
{
public:
mluOpTensorDescriptor_t
Pop
()
{
mluOpTensorDescriptor_t
raw_desc
;
if
(
q_
.
try_dequeue
(
raw_desc
))
{
return
raw_desc
;
}
else
{
mluOpCreateTensorDescriptor
(
&
raw_desc
);
return
raw_desc
;
}
}
void
Recycle
(
mluOpTensorDescriptor_t
desc
)
{
mluOpResetTensorDescriptor
(
desc
);
q_
.
enqueue
(
desc
);
}
~
MLUOpTensorDescPool
()
{
auto
size
=
q_
.
size_approx
();
if
(
size
>
0
)
{
std
::
vector
<
mluOpTensorDescriptor_t
>
vec
(
size
);
q_
.
try_dequeue_bulk
(
vec
.
data
(),
size
);
for
(
auto
desc
:
vec
)
{
mluOpDestroyTensorDescriptor
(
desc
);
}
}
}
private:
moodycamel
::
ConcurrentQueue
<
mluOpTensorDescriptor_t
>
q_
;
};
static
MLUOpTensorDescPool
g_mluop_tensor_desc_pool
;
MLUOpTensorDesc
&
MLUOpTensorDesc
::
operator
=
(
MLUOpTensorDesc
&&
rhs
)
{
if
(
raw_tensor_desc
)
{
g_mluop_tensor_desc_pool
.
Recycle
(
raw_tensor_desc
);
}
raw_tensor_desc
=
rhs
.
raw_tensor_desc
;
rhs
.
raw_tensor_desc
=
nullptr
;
return
*
this
;
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
)
{
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
MLUOP_LAYOUT_ARRAY
,
tensor_dtype
,
tensor_dim
,
dim_sizes
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
const
mluOpTensorLayout_t
layout
)
{
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
layout
,
tensor_dtype
,
tensor_dim
,
dim_sizes
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
int
position
)
:
MLUOpTensorDesc
(
tensor_dim
,
dim_sizes
,
tensor_dtype
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptorPosition
(
raw_tensor_desc
,
position
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
)
{
std
::
vector
<
int
>
dim_sizes_int32
(
tensor_dim
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cbegin
(
dim_sizes
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cend
(
dim_sizes
+
tensor_dim
);
std
::
transform
(
int64_cbegin
,
int64_cend
,
dim_sizes_int32
.
begin
(),
&
CheckedNarrowing
<
int64_t
,
int
>
);
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
MLUOP_LAYOUT_ARRAY
,
tensor_dtype
,
tensor_dim
,
dim_sizes_int32
.
data
()));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
const
mluOpTensorLayout_t
layout
)
{
std
::
vector
<
int
>
dim_sizes_int32
(
tensor_dim
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cbegin
(
dim_sizes
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cend
(
dim_sizes
+
tensor_dim
);
std
::
transform
(
int64_cbegin
,
int64_cend
,
dim_sizes_int32
.
begin
(),
&
CheckedNarrowing
<
int64_t
,
int
>
);
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
layout
,
tensor_dtype
,
tensor_dim
,
dim_sizes_int32
.
data
()));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
int
position
)
{
std
::
vector
<
int
>
dim_sizes_int32
(
tensor_dim
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cbegin
(
dim_sizes
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cend
(
dim_sizes
+
tensor_dim
);
std
::
transform
(
int64_cbegin
,
int64_cend
,
dim_sizes_int32
.
begin
(),
&
CheckedNarrowing
<
int64_t
,
int
>
);
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
MLUOP_LAYOUT_ARRAY
,
tensor_dtype
,
tensor_dim
,
dim_sizes_int32
.
data
()));
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptorPosition
(
raw_tensor_desc
,
position
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
const
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
)
{
auto
dims
=
phi
::
vectorize
<
int
>
(
tensor
.
dims
());
int
tensor_dim
=
dims
.
size
();
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
if
(
tensor_dim
==
0
)
{
int
scalar_dims
[
1
]
=
{
1
};
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
layout
,
tensor_dtype
,
1
,
scalar_dims
));
}
else
{
std
::
vector
<
int
>
tensor_dim_sizes_int
(
dims
.
begin
(),
dims
.
end
());
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
layout
,
tensor_dtype
,
tensor_dim
,
tensor_dim_sizes_int
.
data
()));
}
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
Tensor
&
tensor
)
:
MLUOpTensorDesc
(
tensor
,
MLUOP_LAYOUT_ARRAY
,
ToMluOpDataType
(
tensor
.
dtype
()))
{}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
,
int
position
)
:
MLUOpTensorDesc
(
tensor
,
layout
,
tensor_dtype
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptorPosition
(
raw_tensor_desc
,
position
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
,
int
position
,
float
scale
)
:
MLUOpTensorDesc
(
tensor
,
layout
,
tensor_dtype
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptorPositionAndScale
(
raw_tensor_desc
,
position
,
scale
));
}
MLUOpTensorDesc
::~
MLUOpTensorDesc
()
{
if
(
raw_tensor_desc
)
{
g_mluop_tensor_desc_pool
.
Recycle
(
raw_tensor_desc
);
}
}
MLUCnnlActivationDesc
::
MLUCnnlActivationDesc
(
MLUCnnlActivationDesc
::
MLUCnnlActivationDesc
(
const
cnnlActivationMode_t
act_mode
,
const
float
ceof
)
{
const
cnnlActivationMode_t
act_mode
,
const
float
ceof
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreateActivationDescriptor
(
&
active_desc_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreateActivationDescriptor
(
&
active_desc_
));
...
@@ -1563,17 +1743,35 @@ MLURNNDesc::~MLURNNDesc() {
...
@@ -1563,17 +1743,35 @@ MLURNNDesc::~MLURNNDesc() {
void
*
indices_out
)
{
void
*
indices_out
)
{
cnnlHandle_t
handle
=
GetHandleFromCTX
(
ctx
);
cnnlHandle_t
handle
=
GetHandleFromCTX
(
ctx
);
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlTopKTensor
(
handle
,
size_t
workspace_size
;
input_desc
,
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlGetTopKTensorWorkspaceSize
(
handle
,
input
,
input_desc
,
k
,
k
,
dim
,
dim
,
largest
,
largest
,
sorted
,
values_output_desc
,
values_output_desc
,
indices_output_desc
,
values_out
,
&
workspace_size
));
indices_output_desc
,
indices_out
));
auto
&
dev_ctx
=
GetDevCtxFromCTX
(
ctx
);
Tensor
workspace
=
ctx
.
AllocateTmpTensor
<
int8_t
,
MLUDeviceContext
>
(
{
static_cast
<
int64_t
>
(
workspace_size
)},
dev_ctx
);
void
*
workspace_ptr
=
workspace
.
mutable_data
(
ctx
.
GetPlace
());
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlTopKTensor_v3
(
handle
,
input_desc
,
input
,
k
,
dim
,
largest
,
sorted
,
false
/*lower_index_first*/
,
workspace_ptr
,
workspace_size
,
values_output_desc
,
values_out
,
indices_output_desc
,
indices_out
));
}
}
/* static */
void
MLUCnnl
::
StridedSlice
(
/* static */
void
MLUCnnl
::
StridedSlice
(
...
...
paddle/fluid/operators/mlu/mlu_baseop.h
浏览文件 @
3e1e482b
...
@@ -16,6 +16,7 @@ limitations under the License. */
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <cn_api.h>
#include <cn_api.h>
#include <cnnl.h>
#include <cnnl.h>
#include <concurrentqueue.h>
#include <concurrentqueue.h>
#include <mlu_op.h>
#include <string>
#include <string>
#include <vector>
#include <vector>
...
@@ -138,6 +139,54 @@ inline cnnlDataType_t ToCnnlDataType() {
...
@@ -138,6 +139,54 @@ inline cnnlDataType_t ToCnnlDataType() {
return
ToCnnlDataType
(
type
);
return
ToCnnlDataType
(
type
);
}
}
inline
mluOpDataType_t
ToMluOpDataType
(
const
paddle
::
experimental
::
DataType
&
dtype
)
{
mluOpDataType_t
type
=
MLUOP_DTYPE_FLOAT
;
switch
(
dtype
)
{
case
DataType
::
FLOAT16
:
type
=
MLUOP_DTYPE_HALF
;
break
;
case
DataType
::
FLOAT32
:
type
=
MLUOP_DTYPE_FLOAT
;
break
;
case
DataType
::
FLOAT64
:
type
=
MLUOP_DTYPE_DOUBLE
;
break
;
case
DataType
::
INT8
:
type
=
MLUOP_DTYPE_INT8
;
break
;
case
DataType
::
INT16
:
type
=
MLUOP_DTYPE_INT16
;
break
;
case
DataType
::
INT32
:
type
=
MLUOP_DTYPE_INT32
;
break
;
case
DataType
::
INT64
:
type
=
MLUOP_DTYPE_INT64
;
break
;
case
DataType
::
BOOL
:
type
=
MLUOP_DTYPE_BOOL
;
break
;
case
DataType
::
UINT8
:
type
=
MLUOP_DTYPE_UINT8
;
break
;
default:
break
;
}
return
type
;
}
inline
mluOpDataType_t
ToMluOpDataType
(
const
paddle
::
framework
::
proto
::
VarType
::
Type
&
type
)
{
return
ToMluOpDataType
(
framework
::
TransToPhiDataType
(
type
));
}
template
<
typename
T
>
inline
mluOpDataType_t
ToMluOpDataType
()
{
auto
type
=
framework
::
ToDataType
(
std
::
type_index
(
typeid
(
T
)));
return
ToMluOpDataType
(
type
);
}
// Converts (via narrowing) a type T value to a type U, and checks that the
// Converts (via narrowing) a type T value to a type U, and checks that the
// value has no value change due to the conversion.
// value has no value change due to the conversion.
template
<
typename
WideT
,
typename
NarrowT
>
template
<
typename
WideT
,
typename
NarrowT
>
...
@@ -152,6 +201,10 @@ inline static cnnlHandle_t GetHandleFromCTX(const ExecutionContext& ctx) {
...
@@ -152,6 +201,10 @@ inline static cnnlHandle_t GetHandleFromCTX(const ExecutionContext& ctx) {
return
ctx
.
template
device_context
<
MLUDeviceContext
>().
cnnl_handle
();
return
ctx
.
template
device_context
<
MLUDeviceContext
>().
cnnl_handle
();
}
}
inline
static
mluOpHandle_t
GetMLUOpHandleFromCTX
(
const
ExecutionContext
&
ctx
)
{
return
ctx
.
template
device_context
<
MLUDeviceContext
>().
mluOp_handle
();
}
inline
static
const
MLUDeviceContext
&
GetDevCtxFromCTX
(
inline
static
const
MLUDeviceContext
&
GetDevCtxFromCTX
(
const
ExecutionContext
&
ctx
)
{
const
ExecutionContext
&
ctx
)
{
return
ctx
.
template
device_context
<
MLUDeviceContext
>();
return
ctx
.
template
device_context
<
MLUDeviceContext
>();
...
@@ -281,6 +334,74 @@ class MLUCnnlTensorDesc {
...
@@ -281,6 +334,74 @@ class MLUCnnlTensorDesc {
cnnlTensorDescriptor_t
raw_tensor_desc
=
nullptr
;
cnnlTensorDescriptor_t
raw_tensor_desc
=
nullptr
;
};
};
class
MLUOpTensorDesc
{
public:
MLUOpTensorDesc
()
{}
// SE_DISALLOW_COPY_AND_ASSIGN
MLUOpTensorDesc
(
const
MLUOpTensorDesc
&
desc
)
=
delete
;
MLUOpTensorDesc
&
operator
=
(
const
MLUOpTensorDesc
&
)
=
delete
;
MLUOpTensorDesc
(
MLUOpTensorDesc
&&
rhs
)
:
raw_tensor_desc
(
rhs
.
raw_tensor_desc
)
{
rhs
.
raw_tensor_desc
=
nullptr
;
}
MLUOpTensorDesc
&
operator
=
(
MLUOpTensorDesc
&&
rhs
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
const
mluOpTensorLayout_t
layout
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
int
position
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
const
mluOpTensorLayout_t
layout
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
int
position
);
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
const
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
);
explicit
MLUOpTensorDesc
(
const
Tensor
&
tensor
);
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
,
int
position
);
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
,
int
position
,
float
scale
);
~
MLUOpTensorDesc
();
const
mluOpTensorDescriptor_t
get
()
const
{
return
raw_tensor_desc
;
}
private:
mluOpTensorDescriptor_t
raw_tensor_desc
=
nullptr
;
};
class
MLUCnnlActivationDesc
{
class
MLUCnnlActivationDesc
{
public:
public:
MLUCnnlActivationDesc
(
const
MLUCnnlActivationDesc
&
desc
)
=
delete
;
MLUCnnlActivationDesc
(
const
MLUCnnlActivationDesc
&
desc
)
=
delete
;
...
...
paddle/fluid/platform/device/mlu/device_context.cc
浏览文件 @
3e1e482b
...
@@ -28,11 +28,13 @@ MLUContext::MLUContext(const MLUPlace& place, const int priority) {
...
@@ -28,11 +28,13 @@ MLUContext::MLUContext(const MLUPlace& place, const int priority) {
MLUDeviceGuard
guard
(
place_
.
device
);
MLUDeviceGuard
guard
(
place_
.
device
);
stream_
.
reset
(
new
stream
::
MLUStream
(
place_
,
priority
));
stream_
.
reset
(
new
stream
::
MLUStream
(
place_
,
priority
));
InitCNNLContext
();
InitCNNLContext
();
InitMLUOPContext
();
}
}
MLUContext
::~
MLUContext
()
{
MLUContext
::~
MLUContext
()
{
MLUDeviceGuard
guard
(
place_
.
device
);
MLUDeviceGuard
guard
(
place_
.
device
);
DestoryCNNLContext
();
DestoryCNNLContext
();
DestoryMLUOPContext
();
}
}
MLUDeviceContext
::
MLUDeviceContext
(
MLUPlace
place
)
:
place_
(
place
)
{
MLUDeviceContext
::
MLUDeviceContext
(
MLUPlace
place
)
:
place_
(
place
)
{
...
@@ -41,6 +43,7 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
...
@@ -41,6 +43,7 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
driver_version_
=
GetMLUDriverVersion
(
place_
.
device
);
driver_version_
=
GetMLUDriverVersion
(
place_
.
device
);
runtime_version_
=
GetMLURuntimeVersion
(
place_
.
device
);
runtime_version_
=
GetMLURuntimeVersion
(
place_
.
device
);
cnnl_version_
=
GetMLUCnnlVersion
(
place_
.
device
);
cnnl_version_
=
GetMLUCnnlVersion
(
place_
.
device
);
mluOp_version_
=
GetMLUOpVersion
(
place_
.
device
);
LOG_FIRST_N
(
WARNING
,
1
)
LOG_FIRST_N
(
WARNING
,
1
)
<<
"Please NOTE: device: "
<<
static_cast
<
int
>
(
place_
.
device
)
<<
"Please NOTE: device: "
<<
static_cast
<
int
>
(
place_
.
device
)
...
@@ -51,7 +54,9 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
...
@@ -51,7 +54,9 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
<<
", Runtime API Version: "
<<
runtime_version_
/
10000
<<
"."
<<
", Runtime API Version: "
<<
runtime_version_
/
10000
<<
"."
<<
(
runtime_version_
/
100
)
%
100
<<
"."
<<
runtime_version_
%
100
<<
(
runtime_version_
/
100
)
%
100
<<
"."
<<
runtime_version_
%
100
<<
", Cnnl API Version: "
<<
cnnl_version_
/
10000
<<
"."
<<
", Cnnl API Version: "
<<
cnnl_version_
/
10000
<<
"."
<<
(
cnnl_version_
/
100
)
%
100
<<
"."
<<
cnnl_version_
%
100
;
<<
(
cnnl_version_
/
100
)
%
100
<<
"."
<<
cnnl_version_
%
100
<<
", MluOp API Version: "
<<
mluOp_version_
/
10000
<<
"."
<<
(
mluOp_version_
/
100
)
%
100
<<
"."
<<
mluOp_version_
%
100
;
default_ctx_
.
reset
(
new
MLUContext
(
place_
));
default_ctx_
.
reset
(
new
MLUContext
(
place_
));
}
}
...
@@ -70,6 +75,10 @@ mluCnnlHandle MLUDeviceContext::cnnl_handle() const {
...
@@ -70,6 +75,10 @@ mluCnnlHandle MLUDeviceContext::cnnl_handle() const {
return
context
()
->
CnnlHandle
();
return
context
()
->
CnnlHandle
();
}
}
mluOpHandle
MLUDeviceContext
::
mluOp_handle
()
const
{
return
context
()
->
MluOpHandle
();
}
mluStream
MLUDeviceContext
::
stream
()
const
{
return
context
()
->
RawStream
();
}
mluStream
MLUDeviceContext
::
stream
()
const
{
return
context
()
->
RawStream
();
}
#endif
#endif
...
...
paddle/fluid/platform/device/mlu/device_context.h
浏览文件 @
3e1e482b
...
@@ -53,12 +53,19 @@ class MLUContext {
...
@@ -53,12 +53,19 @@ class MLUContext {
const
mluCnnlHandle
&
CnnlHandle
()
const
{
return
cnnl_handle_
;
}
const
mluCnnlHandle
&
CnnlHandle
()
const
{
return
cnnl_handle_
;
}
const
mluOpHandle
&
MluOpHandle
()
const
{
return
mluOp_handle_
;
}
private:
private:
void
InitCNNLContext
()
{
void
InitCNNLContext
()
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreate
(
&
cnnl_handle_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreate
(
&
cnnl_handle_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetQueue
(
cnnl_handle_
,
RawStream
()));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetQueue
(
cnnl_handle_
,
RawStream
()));
}
}
void
InitMLUOPContext
()
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpCreate
(
&
mluOp_handle_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetQueue
(
mluOp_handle_
,
RawStream
()));
}
void
DestoryCNNLContext
()
{
void
DestoryCNNLContext
()
{
if
(
cnnl_handle_
)
{
if
(
cnnl_handle_
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlDestroy
(
cnnl_handle_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlDestroy
(
cnnl_handle_
));
...
@@ -66,10 +73,18 @@ class MLUContext {
...
@@ -66,10 +73,18 @@ class MLUContext {
cnnl_handle_
=
nullptr
;
cnnl_handle_
=
nullptr
;
}
}
void
DestoryMLUOPContext
()
{
if
(
mluOp_handle_
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpDestroy
(
mluOp_handle_
));
}
mluOp_handle_
=
nullptr
;
}
MLUPlace
place_
;
MLUPlace
place_
;
std
::
unique_ptr
<
Eigen
::
DefaultDevice
>
eigen_device_
;
std
::
unique_ptr
<
Eigen
::
DefaultDevice
>
eigen_device_
;
std
::
unique_ptr
<
stream
::
MLUStream
>
stream_
;
std
::
unique_ptr
<
stream
::
MLUStream
>
stream_
;
mluCnnlHandle
cnnl_handle_
;
mluCnnlHandle
cnnl_handle_
;
mluOpHandle
mluOp_handle_
;
DISABLE_COPY_AND_ASSIGN
(
MLUContext
);
DISABLE_COPY_AND_ASSIGN
(
MLUContext
);
};
};
...
@@ -89,6 +104,9 @@ class MLUDeviceContext : public DeviceContext {
...
@@ -89,6 +104,9 @@ class MLUDeviceContext : public DeviceContext {
/*! \brief Return cnnl handle in the device context. */
/*! \brief Return cnnl handle in the device context. */
mluCnnlHandle
cnnl_handle
()
const
;
mluCnnlHandle
cnnl_handle
()
const
;
/*! \brief Return mluOp handle in the device context. */
mluOpHandle
mluOp_handle
()
const
;
/*! \brief Return mlu stream in the device context. */
/*! \brief Return mlu stream in the device context. */
mluStream
stream
()
const
;
mluStream
stream
()
const
;
...
@@ -135,6 +153,7 @@ class MLUDeviceContext : public DeviceContext {
...
@@ -135,6 +153,7 @@ class MLUDeviceContext : public DeviceContext {
int
driver_version_
;
int
driver_version_
;
int
runtime_version_
;
int
runtime_version_
;
int
cnnl_version_
;
int
cnnl_version_
;
int
mluOp_version_
;
MLUPlace
place_
;
MLUPlace
place_
;
std
::
shared_ptr
<
MLUContext
>
default_ctx_
;
std
::
shared_ptr
<
MLUContext
>
default_ctx_
;
...
...
paddle/fluid/platform/device/mlu/enforce.h
浏览文件 @
3e1e482b
...
@@ -41,6 +41,7 @@ struct MLUStatusType {};
...
@@ -41,6 +41,7 @@ struct MLUStatusType {};
DEFINE_MLU_STATUS_TYPE
(
cnrtStatus
,
cnrtSuccess
,
CNRT
);
DEFINE_MLU_STATUS_TYPE
(
cnrtStatus
,
cnrtSuccess
,
CNRT
);
DEFINE_MLU_STATUS_TYPE
(
cnnlStatus
,
CNNL_STATUS_SUCCESS
,
CNNL
);
DEFINE_MLU_STATUS_TYPE
(
cnnlStatus
,
CNNL_STATUS_SUCCESS
,
CNNL
);
DEFINE_MLU_STATUS_TYPE
(
mluOpStatus
,
MLUOP_STATUS_SUCCESS
,
MLUOP
);
DEFINE_MLU_STATUS_TYPE
(
cnStatus
,
CN_SUCCESS
,
CN
);
DEFINE_MLU_STATUS_TYPE
(
cnStatus
,
CN_SUCCESS
,
CN
);
#ifdef PADDLE_WITH_CNCL
#ifdef PADDLE_WITH_CNCL
DEFINE_MLU_STATUS_TYPE
(
cnclStatus
,
CNCL_RET_SUCCESS
,
CNCL
);
DEFINE_MLU_STATUS_TYPE
(
cnclStatus
,
CNCL_RET_SUCCESS
,
CNCL
);
...
@@ -68,6 +69,15 @@ inline std::string build_mlu_error_msg(cnnlStatus stat) {
...
@@ -68,6 +69,15 @@ inline std::string build_mlu_error_msg(cnnlStatus stat) {
return
sout
.
str
();
return
sout
.
str
();
}
}
/*************** MLU OP ERROR ***************/
inline
bool
is_error
(
mluOpStatus
stat
)
{
return
stat
!=
MLUOP_STATUS_SUCCESS
;
}
inline
std
::
string
build_mlu_error_msg
(
mluOpStatus
stat
)
{
std
::
ostringstream
sout
;
sout
<<
"MLU OP error("
<<
stat
<<
"), "
<<
mluOpGetErrorString
(
stat
)
<<
". "
;
return
sout
.
str
();
}
/*************** CN API ERROR ***************/
/*************** CN API ERROR ***************/
inline
bool
is_error
(
cnStatus
stat
)
{
return
stat
!=
CN_SUCCESS
;
}
inline
bool
is_error
(
cnStatus
stat
)
{
return
stat
!=
CN_SUCCESS
;
}
...
...
paddle/fluid/platform/device/mlu/mlu_info.cc
浏览文件 @
3e1e482b
...
@@ -126,6 +126,13 @@ int GetMLUCnnlVersion(int id) {
...
@@ -126,6 +126,13 @@ int GetMLUCnnlVersion(int id) {
return
x
*
10000
+
y
*
100
+
z
;
return
x
*
10000
+
y
*
100
+
z
;
}
}
int
GetMLUOpVersion
(
int
id
)
{
CheckDeviceId
(
id
);
int
x
,
y
,
z
;
mluOpGetLibVersion
(
&
x
,
&
y
,
&
z
);
return
x
*
10000
+
y
*
100
+
z
;
}
int
GetMLUCurrentDeviceId
()
{
int
GetMLUCurrentDeviceId
()
{
int
device_id
;
int
device_id
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtGetDevice
(
&
device_id
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtGetDevice
(
&
device_id
));
...
...
paddle/fluid/platform/device/mlu/mlu_info.h
浏览文件 @
3e1e482b
...
@@ -16,10 +16,11 @@ limitations under the License. */
...
@@ -16,10 +16,11 @@ limitations under the License. */
#ifdef PADDLE_WITH_MLU
#ifdef PADDLE_WITH_MLU
#include <cn_api.h>
#include <cn_api.h>
#include <cndrv_id.h>
#include <cnnl.h>
#include <cnnl.h>
#include <cnpapi.h>
#include <cnpapi.h>
#include <cnpapi_cndrv_id.h>
#include <cnrt.h>
#include <cnrt.h>
#include <mlu_op.h>
#ifdef PADDLE_WITH_CNCL
#ifdef PADDLE_WITH_CNCL
#include <cncl.h>
#include <cncl.h>
#endif
#endif
...
@@ -30,11 +31,13 @@ namespace paddle {
...
@@ -30,11 +31,13 @@ namespace paddle {
using
cnStatus
=
CNresult
;
using
cnStatus
=
CNresult
;
using
cnrtStatus
=
cnrtRet_t
;
using
cnrtStatus
=
cnrtRet_t
;
using
cnnlStatus
=
cnnlStatus_t
;
using
cnnlStatus
=
cnnlStatus_t
;
using
mluOpStatus
=
mluOpStatus_t
;
#ifdef PADDLE_WITH_CNCL
#ifdef PADDLE_WITH_CNCL
using
cnclStatus
=
cnclResult_t
;
using
cnclStatus
=
cnclResult_t
;
#endif
#endif
using
mluStream
=
cnrtQueue_t
;
using
mluStream
=
cnrtQueue_t
;
using
mluCnnlHandle
=
cnnlHandle_t
;
using
mluCnnlHandle
=
cnnlHandle_t
;
using
mluOpHandle
=
mluOpHandle_t
;
using
mluEventHandle
=
cnrtNotifier_t
;
using
mluEventHandle
=
cnrtNotifier_t
;
using
mluDeviceHandle
=
CNdev
;
using
mluDeviceHandle
=
CNdev
;
...
@@ -49,6 +52,9 @@ int GetMLURuntimeVersion(int id);
...
@@ -49,6 +52,9 @@ int GetMLURuntimeVersion(int id);
//! Get the cnnl version of the ith MLU.
//! Get the cnnl version of the ith MLU.
int
GetMLUCnnlVersion
(
int
id
);
int
GetMLUCnnlVersion
(
int
id
);
//! Get the mluOp version of the ith MLU.
int
GetMLUOpVersion
(
int
id
);
//! Get the total number of MLU devices in system.
//! Get the total number of MLU devices in system.
int
GetMLUDeviceCount
();
int
GetMLUDeviceCount
();
...
...
python/paddle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py
浏览文件 @
3e1e482b
...
@@ -186,16 +186,15 @@ class TestGridSamplerOp(OpTest):
...
@@ -186,16 +186,15 @@ class TestGridSamplerOp(OpTest):
self
.
mode
=
"bilinear"
self
.
mode
=
"bilinear"
# TODO(fwg): Test this case when cnnl support align_corners = True.
class
Case1
(
TestGridSamplerOp
):
# class Case1(TestGridSamplerOp):
#
def
initTestCase
(
self
):
# def initTestCase(self):
self
.
x_shape
=
(
2
,
3
,
5
,
6
)
# self.x_shape = (2, 3, 5, 6)
self
.
grid_shape
=
(
2
,
8
,
9
,
2
)
# self.grid_shape = (2, 8, 9, 2)
self
.
theta_shape
=
(
2
,
2
,
3
)
# self.theta_shape = (2, 2, 3)
self
.
align_corners
=
True
# self.align_corners = True
self
.
padding_mode
=
"zeros"
# self.padding_mode = "zeros"
self
.
mode
=
"bilinear"
# self.mode = "bilinear"
class
LargeInputCase
(
TestGridSamplerOp
):
class
LargeInputCase
(
TestGridSamplerOp
):
...
@@ -209,16 +208,16 @@ class LargeInputCase(TestGridSamplerOp):
...
@@ -209,16 +208,16 @@ class LargeInputCase(TestGridSamplerOp):
self
.
mode
=
"bilinear"
self
.
mode
=
"bilinear"
# TODO(fwg): Test this case when cnnl support align_corners = True.
class
Case2
(
LargeInputCase
):
# class Case2(LargeInputCase):
#
def
initTestCase
(
self
):
# def initTestCase(self):
self
.
x_shape
=
(
2
,
3
,
128
,
128
)
# self.x_shape = (2, 3, 128, 128
)
self
.
grid_shape
=
(
2
,
130
,
130
,
2
)
# self.grid_shape = (2, 130, 130, 2
)
self
.
theta_shape
=
(
2
,
2
,
3
)
# self.theta_shape = (2, 2, 3)
self
.
align_corners
=
True
# self.align_corners = True
self
.
padding_mode
=
"zeros"
# self.padding_mode = "zeros
"
self
.
mode
=
"bilinear
"
# self.mode = "bilinear"
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py
浏览文件 @
3e1e482b
...
@@ -152,6 +152,11 @@ class TestKeepDim8DReduce(TestMLUReduceSumOp):
...
@@ -152,6 +152,11 @@ class TestKeepDim8DReduce(TestMLUReduceSumOp):
self
.
axis
=
(
3
,
4
,
5
)
self
.
axis
=
(
3
,
4
,
5
)
self
.
keep_dim
=
True
self
.
keep_dim
=
True
def
test_check_grad
(
self
):
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
,
max_relative_error
=
0.03
)
class
TestReduceAll
(
TestMLUReduceSumOp
):
class
TestReduceAll
(
TestMLUReduceSumOp
):
...
...
tools/dockerfile/Dockerfile.mlu
浏览文件 @
3e1e482b
# A image for building paddle binaries
# A image for building paddle binaries
# Update CNTOOLKIT_VERSION, CNNL_VERSION
and CNCL
_VERSION if using other versions
# Update CNTOOLKIT_VERSION, CNNL_VERSION
, CNCL_VERSION and MLUOPS
_VERSION if using other versions
#
#
# Build:
# Build:
# - CNTOOLKIT_VERSION 2.8.5
# - CNTOOLKIT_VERSION 3.0.2-1
# - CNNL_VERSION 1.10.5
# - CNNL_VERSION 1.13.0-1
# - CNCL_VERSION 1.1.2
# - CNCL_VERSION 1.2.1-1
# - MLUOPS_VERSION 0.2.0-1
#
#
# Download three packages from FTP (need to connect cambricon AE to get FTP url)
# Download three packages from FTP (need to connect cambricon AE to get FTP url)
# - cntoolkit_2.8.5.ubuntu18.04_amd64.deb
# - cntoolkit_3.0.2-1.ubuntu18.04_amd64.deb
# - cnnl_1.10.5.ubuntu18.04_amd64.deb
# - cnnl_1.13.0-1.ubuntu18.04_amd64.deb
# - cncl_1.1.2.ubuntu18.04_amd64.deb
# - cncl_1.2.1-1.ubuntu18.04_amd64.deb
# - mluops_0.2.0-1.ubuntu18.04_amd64.deb
# copy them to current directory first, then run build commands
# copy them to current directory first, then run build commands
#
#
# For example:
# For example:
...
@@ -19,11 +21,13 @@
...
@@ -19,11 +21,13 @@
# (get cntoolkit pkg)
# (get cntoolkit pkg)
# (get cnnl pkg)
# (get cnnl pkg)
# (get cncl pkg)
# (get cncl pkg)
# (get mluops pkg)
#
#
# docker build -f Dockerfile.mlu \
# docker build -f Dockerfile.mlu \
# --build-arg CNTOOLKIT_VERSION=2.8.5 \
# --build-arg CNTOOLKIT_VERSION=3.0.2-1 \
# --build-arg CNNL_VERSION=1.10.5 \
# --build-arg CNNL_VERSION=1.13.0-1 \
# --build-arg CNCL_VERSION=1.1.2 \
# --build-arg CNCL_VERSION=1.2.1-1 \
# --build-arg MLUOPS_VERSION=0.2.0-1 \
# -t paddlepaddle/paddle:latest-dev-mlu .
# -t paddlepaddle/paddle:latest-dev-mlu .
#
#
# without mlu device:
# without mlu device:
...
@@ -40,12 +44,14 @@ MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
...
@@ -40,12 +44,14 @@ MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ENV WITH_GPU=OFF
ENV WITH_GPU=OFF
ARG CNTOOLKIT_VERSION=2.8.5
ARG CNTOOLKIT_VERSION=3.0.2-1
ARG CNNL_VERSION=1.10.5
ARG CNNL_VERSION=1.13.0-1
ARG CNCL_VERSION=1.1.2
ARG CNCL_VERSION=1.2.1-1
ARG MLUOPS_VERSION=0.2.0-1
ARG CNTOOLKIT_PKG=cntoolkit_$CNTOOLKIT_VERSION.ubuntu18.04_amd64.deb
ARG CNTOOLKIT_PKG=cntoolkit_$CNTOOLKIT_VERSION.ubuntu18.04_amd64.deb
ARG CNNL_PKG=cnnl_$CNNL_VERSION.ubuntu18.04_amd64.deb
ARG CNNL_PKG=cnnl_$CNNL_VERSION.ubuntu18.04_amd64.deb
ARG CNCL_PKG=cncl_$CNCL_VERSION.ubuntu18.04_amd64.deb
ARG CNCL_PKG=cncl_$CNCL_VERSION.ubuntu18.04_amd64.deb
ARG MLUOPS_PKG=mluops_$MLUOPS_VERSION.ubuntu18.04_amd64.deb
# install cntoolkit
# install cntoolkit
COPY $CNTOOLKIT_PKG ./
COPY $CNTOOLKIT_PKG ./
...
@@ -67,6 +73,11 @@ COPY $CNCL_PKG ./
...
@@ -67,6 +73,11 @@ COPY $CNCL_PKG ./
RUN dpkg -i $CNCL_PKG && \
RUN dpkg -i $CNCL_PKG && \
rm -f $CNCL_PKG
rm -f $CNCL_PKG
# install mluops
COPY $MLUOPS_PKG ./
RUN dpkg -i $MLUOPS_PKG && \
rm -f $MLUOPS_PKG
# Clean
# Clean
RUN apt-get clean -y
RUN apt-get clean -y
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录