Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
3e1e482b
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3e1e482b
编写于
9月 26, 2022
作者:
C
cifar10
提交者:
GitHub
9月 26, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[MLU] fluid: add mluop (#46429)
上级
b0ec8efb
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
434 addition
and
47 deletion
+434
-47
cmake/neuware.cmake
cmake/neuware.cmake
+3
-1
paddle/fluid/operators/mlu/mlu_baseop.cc
paddle/fluid/operators/mlu/mlu_baseop.cc
+209
-11
paddle/fluid/operators/mlu/mlu_baseop.h
paddle/fluid/operators/mlu/mlu_baseop.h
+121
-0
paddle/fluid/platform/device/mlu/device_context.cc
paddle/fluid/platform/device/mlu/device_context.cc
+10
-1
paddle/fluid/platform/device/mlu/device_context.h
paddle/fluid/platform/device/mlu/device_context.h
+19
-0
paddle/fluid/platform/device/mlu/enforce.h
paddle/fluid/platform/device/mlu/enforce.h
+10
-0
paddle/fluid/platform/device/mlu/mlu_info.cc
paddle/fluid/platform/device/mlu/mlu_info.cc
+7
-0
paddle/fluid/platform/device/mlu/mlu_info.h
paddle/fluid/platform/device/mlu/mlu_info.h
+7
-1
python/paddle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py
...dle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py
+19
-20
python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py
...addle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py
+5
-0
tools/dockerfile/Dockerfile.mlu
tools/dockerfile/Dockerfile.mlu
+24
-13
未找到文件。
cmake/neuware.cmake
浏览文件 @
3e1e482b
...
...
@@ -15,12 +15,14 @@ set(NEUWARE_LIB_DIR ${NEUWARE_HOME}/lib64)
include_directories
(
${
NEUWARE_INCLUDE_DIR
}
)
set
(
CNNL_LIB
${
NEUWARE_LIB_DIR
}
/libcnnl.so
)
set
(
MLUOP_LIB
${
NEUWARE_LIB_DIR
}
/libmluops.so
)
set
(
CNRT_LIB
${
NEUWARE_LIB_DIR
}
/libcnrt.so
)
set
(
CNDRV_LIB
${
NEUWARE_LIB_DIR
}
/libcndrv.so
)
set
(
CNPAPI_LIB
${
NEUWARE_LIB_DIR
}
/libcnpapi.so
)
generate_dummy_static_lib
(
LIB_NAME
"neuware_lib"
GENERATOR
"neuware.cmake"
)
set
(
NEUWARE_LIB_DEPS
${
CNNL_LIB
}
${
CNRT_LIB
}
${
CNDRV_LIB
}
${
CNPAPI_LIB
}
)
set
(
NEUWARE_LIB_DEPS
${
CNNL_LIB
}
${
MLUOP_LIB
}
${
CNRT_LIB
}
${
CNDRV_LIB
}
${
CNPAPI_LIB
}
)
if
(
WITH_CNCL
)
message
(
STATUS
"Compile with CNCL!"
)
...
...
paddle/fluid/operators/mlu/mlu_baseop.cc
浏览文件 @
3e1e482b
...
...
@@ -256,6 +256,186 @@ MLUCnnlTensorDesc::~MLUCnnlTensorDesc() {
}
}
class
MLUOpTensorDescPool
{
public:
mluOpTensorDescriptor_t
Pop
()
{
mluOpTensorDescriptor_t
raw_desc
;
if
(
q_
.
try_dequeue
(
raw_desc
))
{
return
raw_desc
;
}
else
{
mluOpCreateTensorDescriptor
(
&
raw_desc
);
return
raw_desc
;
}
}
void
Recycle
(
mluOpTensorDescriptor_t
desc
)
{
mluOpResetTensorDescriptor
(
desc
);
q_
.
enqueue
(
desc
);
}
~
MLUOpTensorDescPool
()
{
auto
size
=
q_
.
size_approx
();
if
(
size
>
0
)
{
std
::
vector
<
mluOpTensorDescriptor_t
>
vec
(
size
);
q_
.
try_dequeue_bulk
(
vec
.
data
(),
size
);
for
(
auto
desc
:
vec
)
{
mluOpDestroyTensorDescriptor
(
desc
);
}
}
}
private:
moodycamel
::
ConcurrentQueue
<
mluOpTensorDescriptor_t
>
q_
;
};
static
MLUOpTensorDescPool
g_mluop_tensor_desc_pool
;
MLUOpTensorDesc
&
MLUOpTensorDesc
::
operator
=
(
MLUOpTensorDesc
&&
rhs
)
{
if
(
raw_tensor_desc
)
{
g_mluop_tensor_desc_pool
.
Recycle
(
raw_tensor_desc
);
}
raw_tensor_desc
=
rhs
.
raw_tensor_desc
;
rhs
.
raw_tensor_desc
=
nullptr
;
return
*
this
;
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
)
{
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
MLUOP_LAYOUT_ARRAY
,
tensor_dtype
,
tensor_dim
,
dim_sizes
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
const
mluOpTensorLayout_t
layout
)
{
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
layout
,
tensor_dtype
,
tensor_dim
,
dim_sizes
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
int
position
)
:
MLUOpTensorDesc
(
tensor_dim
,
dim_sizes
,
tensor_dtype
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptorPosition
(
raw_tensor_desc
,
position
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
)
{
std
::
vector
<
int
>
dim_sizes_int32
(
tensor_dim
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cbegin
(
dim_sizes
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cend
(
dim_sizes
+
tensor_dim
);
std
::
transform
(
int64_cbegin
,
int64_cend
,
dim_sizes_int32
.
begin
(),
&
CheckedNarrowing
<
int64_t
,
int
>
);
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
MLUOP_LAYOUT_ARRAY
,
tensor_dtype
,
tensor_dim
,
dim_sizes_int32
.
data
()));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
const
mluOpTensorLayout_t
layout
)
{
std
::
vector
<
int
>
dim_sizes_int32
(
tensor_dim
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cbegin
(
dim_sizes
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cend
(
dim_sizes
+
tensor_dim
);
std
::
transform
(
int64_cbegin
,
int64_cend
,
dim_sizes_int32
.
begin
(),
&
CheckedNarrowing
<
int64_t
,
int
>
);
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
layout
,
tensor_dtype
,
tensor_dim
,
dim_sizes_int32
.
data
()));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
int
position
)
{
std
::
vector
<
int
>
dim_sizes_int32
(
tensor_dim
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cbegin
(
dim_sizes
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cend
(
dim_sizes
+
tensor_dim
);
std
::
transform
(
int64_cbegin
,
int64_cend
,
dim_sizes_int32
.
begin
(),
&
CheckedNarrowing
<
int64_t
,
int
>
);
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
MLUOP_LAYOUT_ARRAY
,
tensor_dtype
,
tensor_dim
,
dim_sizes_int32
.
data
()));
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptorPosition
(
raw_tensor_desc
,
position
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
const
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
)
{
auto
dims
=
phi
::
vectorize
<
int
>
(
tensor
.
dims
());
int
tensor_dim
=
dims
.
size
();
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
if
(
tensor_dim
==
0
)
{
int
scalar_dims
[
1
]
=
{
1
};
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
layout
,
tensor_dtype
,
1
,
scalar_dims
));
}
else
{
std
::
vector
<
int
>
tensor_dim_sizes_int
(
dims
.
begin
(),
dims
.
end
());
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
layout
,
tensor_dtype
,
tensor_dim
,
tensor_dim_sizes_int
.
data
()));
}
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
Tensor
&
tensor
)
:
MLUOpTensorDesc
(
tensor
,
MLUOP_LAYOUT_ARRAY
,
ToMluOpDataType
(
tensor
.
dtype
()))
{}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
,
int
position
)
:
MLUOpTensorDesc
(
tensor
,
layout
,
tensor_dtype
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptorPosition
(
raw_tensor_desc
,
position
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
,
int
position
,
float
scale
)
:
MLUOpTensorDesc
(
tensor
,
layout
,
tensor_dtype
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptorPositionAndScale
(
raw_tensor_desc
,
position
,
scale
));
}
MLUOpTensorDesc
::~
MLUOpTensorDesc
()
{
if
(
raw_tensor_desc
)
{
g_mluop_tensor_desc_pool
.
Recycle
(
raw_tensor_desc
);
}
}
MLUCnnlActivationDesc
::
MLUCnnlActivationDesc
(
const
cnnlActivationMode_t
act_mode
,
const
float
ceof
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreateActivationDescriptor
(
&
active_desc_
));
...
...
@@ -1563,17 +1743,35 @@ MLURNNDesc::~MLURNNDesc() {
void
*
indices_out
)
{
cnnlHandle_t
handle
=
GetHandleFromCTX
(
ctx
);
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlTopKTensor
(
handle
,
input_desc
,
input
,
k
,
dim
,
largest
,
sorted
,
values_output_desc
,
values_out
,
indices_output_desc
,
indices_out
));
size_t
workspace_size
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlGetTopKTensorWorkspaceSize
(
handle
,
input_desc
,
k
,
dim
,
largest
,
values_output_desc
,
indices_output_desc
,
&
workspace_size
));
auto
&
dev_ctx
=
GetDevCtxFromCTX
(
ctx
);
Tensor
workspace
=
ctx
.
AllocateTmpTensor
<
int8_t
,
MLUDeviceContext
>
(
{
static_cast
<
int64_t
>
(
workspace_size
)},
dev_ctx
);
void
*
workspace_ptr
=
workspace
.
mutable_data
(
ctx
.
GetPlace
());
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlTopKTensor_v3
(
handle
,
input_desc
,
input
,
k
,
dim
,
largest
,
sorted
,
false
/*lower_index_first*/
,
workspace_ptr
,
workspace_size
,
values_output_desc
,
values_out
,
indices_output_desc
,
indices_out
));
}
/* static */
void
MLUCnnl
::
StridedSlice
(
...
...
paddle/fluid/operators/mlu/mlu_baseop.h
浏览文件 @
3e1e482b
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <cn_api.h>
#include <cnnl.h>
#include <concurrentqueue.h>
#include <mlu_op.h>
#include <string>
#include <vector>
...
...
@@ -138,6 +139,54 @@ inline cnnlDataType_t ToCnnlDataType() {
return
ToCnnlDataType
(
type
);
}
inline
mluOpDataType_t
ToMluOpDataType
(
const
paddle
::
experimental
::
DataType
&
dtype
)
{
mluOpDataType_t
type
=
MLUOP_DTYPE_FLOAT
;
switch
(
dtype
)
{
case
DataType
::
FLOAT16
:
type
=
MLUOP_DTYPE_HALF
;
break
;
case
DataType
::
FLOAT32
:
type
=
MLUOP_DTYPE_FLOAT
;
break
;
case
DataType
::
FLOAT64
:
type
=
MLUOP_DTYPE_DOUBLE
;
break
;
case
DataType
::
INT8
:
type
=
MLUOP_DTYPE_INT8
;
break
;
case
DataType
::
INT16
:
type
=
MLUOP_DTYPE_INT16
;
break
;
case
DataType
::
INT32
:
type
=
MLUOP_DTYPE_INT32
;
break
;
case
DataType
::
INT64
:
type
=
MLUOP_DTYPE_INT64
;
break
;
case
DataType
::
BOOL
:
type
=
MLUOP_DTYPE_BOOL
;
break
;
case
DataType
::
UINT8
:
type
=
MLUOP_DTYPE_UINT8
;
break
;
default:
break
;
}
return
type
;
}
inline
mluOpDataType_t
ToMluOpDataType
(
const
paddle
::
framework
::
proto
::
VarType
::
Type
&
type
)
{
return
ToMluOpDataType
(
framework
::
TransToPhiDataType
(
type
));
}
template
<
typename
T
>
inline
mluOpDataType_t
ToMluOpDataType
()
{
auto
type
=
framework
::
ToDataType
(
std
::
type_index
(
typeid
(
T
)));
return
ToMluOpDataType
(
type
);
}
// Converts (via narrowing) a type T value to a type U, and checks that the
// value has no value change due to the conversion.
template
<
typename
WideT
,
typename
NarrowT
>
...
...
@@ -152,6 +201,10 @@ inline static cnnlHandle_t GetHandleFromCTX(const ExecutionContext& ctx) {
return
ctx
.
template
device_context
<
MLUDeviceContext
>().
cnnl_handle
();
}
inline
static
mluOpHandle_t
GetMLUOpHandleFromCTX
(
const
ExecutionContext
&
ctx
)
{
return
ctx
.
template
device_context
<
MLUDeviceContext
>().
mluOp_handle
();
}
inline
static
const
MLUDeviceContext
&
GetDevCtxFromCTX
(
const
ExecutionContext
&
ctx
)
{
return
ctx
.
template
device_context
<
MLUDeviceContext
>();
...
...
@@ -281,6 +334,74 @@ class MLUCnnlTensorDesc {
cnnlTensorDescriptor_t
raw_tensor_desc
=
nullptr
;
};
class
MLUOpTensorDesc
{
public:
MLUOpTensorDesc
()
{}
// SE_DISALLOW_COPY_AND_ASSIGN
MLUOpTensorDesc
(
const
MLUOpTensorDesc
&
desc
)
=
delete
;
MLUOpTensorDesc
&
operator
=
(
const
MLUOpTensorDesc
&
)
=
delete
;
MLUOpTensorDesc
(
MLUOpTensorDesc
&&
rhs
)
:
raw_tensor_desc
(
rhs
.
raw_tensor_desc
)
{
rhs
.
raw_tensor_desc
=
nullptr
;
}
MLUOpTensorDesc
&
operator
=
(
MLUOpTensorDesc
&&
rhs
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
const
mluOpTensorLayout_t
layout
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
int
position
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
const
mluOpTensorLayout_t
layout
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
int
position
);
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
const
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
);
explicit
MLUOpTensorDesc
(
const
Tensor
&
tensor
);
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
,
int
position
);
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
,
int
position
,
float
scale
);
~
MLUOpTensorDesc
();
const
mluOpTensorDescriptor_t
get
()
const
{
return
raw_tensor_desc
;
}
private:
mluOpTensorDescriptor_t
raw_tensor_desc
=
nullptr
;
};
class
MLUCnnlActivationDesc
{
public:
MLUCnnlActivationDesc
(
const
MLUCnnlActivationDesc
&
desc
)
=
delete
;
...
...
paddle/fluid/platform/device/mlu/device_context.cc
浏览文件 @
3e1e482b
...
...
@@ -28,11 +28,13 @@ MLUContext::MLUContext(const MLUPlace& place, const int priority) {
MLUDeviceGuard
guard
(
place_
.
device
);
stream_
.
reset
(
new
stream
::
MLUStream
(
place_
,
priority
));
InitCNNLContext
();
InitMLUOPContext
();
}
MLUContext
::~
MLUContext
()
{
MLUDeviceGuard
guard
(
place_
.
device
);
DestoryCNNLContext
();
DestoryMLUOPContext
();
}
MLUDeviceContext
::
MLUDeviceContext
(
MLUPlace
place
)
:
place_
(
place
)
{
...
...
@@ -41,6 +43,7 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
driver_version_
=
GetMLUDriverVersion
(
place_
.
device
);
runtime_version_
=
GetMLURuntimeVersion
(
place_
.
device
);
cnnl_version_
=
GetMLUCnnlVersion
(
place_
.
device
);
mluOp_version_
=
GetMLUOpVersion
(
place_
.
device
);
LOG_FIRST_N
(
WARNING
,
1
)
<<
"Please NOTE: device: "
<<
static_cast
<
int
>
(
place_
.
device
)
...
...
@@ -51,7 +54,9 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
<<
", Runtime API Version: "
<<
runtime_version_
/
10000
<<
"."
<<
(
runtime_version_
/
100
)
%
100
<<
"."
<<
runtime_version_
%
100
<<
", Cnnl API Version: "
<<
cnnl_version_
/
10000
<<
"."
<<
(
cnnl_version_
/
100
)
%
100
<<
"."
<<
cnnl_version_
%
100
;
<<
(
cnnl_version_
/
100
)
%
100
<<
"."
<<
cnnl_version_
%
100
<<
", MluOp API Version: "
<<
mluOp_version_
/
10000
<<
"."
<<
(
mluOp_version_
/
100
)
%
100
<<
"."
<<
mluOp_version_
%
100
;
default_ctx_
.
reset
(
new
MLUContext
(
place_
));
}
...
...
@@ -70,6 +75,10 @@ mluCnnlHandle MLUDeviceContext::cnnl_handle() const {
return
context
()
->
CnnlHandle
();
}
mluOpHandle
MLUDeviceContext
::
mluOp_handle
()
const
{
return
context
()
->
MluOpHandle
();
}
mluStream
MLUDeviceContext
::
stream
()
const
{
return
context
()
->
RawStream
();
}
#endif
...
...
paddle/fluid/platform/device/mlu/device_context.h
浏览文件 @
3e1e482b
...
...
@@ -53,12 +53,19 @@ class MLUContext {
const
mluCnnlHandle
&
CnnlHandle
()
const
{
return
cnnl_handle_
;
}
const
mluOpHandle
&
MluOpHandle
()
const
{
return
mluOp_handle_
;
}
private:
void
InitCNNLContext
()
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreate
(
&
cnnl_handle_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetQueue
(
cnnl_handle_
,
RawStream
()));
}
void
InitMLUOPContext
()
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpCreate
(
&
mluOp_handle_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetQueue
(
mluOp_handle_
,
RawStream
()));
}
void
DestoryCNNLContext
()
{
if
(
cnnl_handle_
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlDestroy
(
cnnl_handle_
));
...
...
@@ -66,10 +73,18 @@ class MLUContext {
cnnl_handle_
=
nullptr
;
}
void
DestoryMLUOPContext
()
{
if
(
mluOp_handle_
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpDestroy
(
mluOp_handle_
));
}
mluOp_handle_
=
nullptr
;
}
MLUPlace
place_
;
std
::
unique_ptr
<
Eigen
::
DefaultDevice
>
eigen_device_
;
std
::
unique_ptr
<
stream
::
MLUStream
>
stream_
;
mluCnnlHandle
cnnl_handle_
;
mluOpHandle
mluOp_handle_
;
DISABLE_COPY_AND_ASSIGN
(
MLUContext
);
};
...
...
@@ -89,6 +104,9 @@ class MLUDeviceContext : public DeviceContext {
/*! \brief Return cnnl handle in the device context. */
mluCnnlHandle
cnnl_handle
()
const
;
/*! \brief Return mluOp handle in the device context. */
mluOpHandle
mluOp_handle
()
const
;
/*! \brief Return mlu stream in the device context. */
mluStream
stream
()
const
;
...
...
@@ -135,6 +153,7 @@ class MLUDeviceContext : public DeviceContext {
int
driver_version_
;
int
runtime_version_
;
int
cnnl_version_
;
int
mluOp_version_
;
MLUPlace
place_
;
std
::
shared_ptr
<
MLUContext
>
default_ctx_
;
...
...
paddle/fluid/platform/device/mlu/enforce.h
浏览文件 @
3e1e482b
...
...
@@ -41,6 +41,7 @@ struct MLUStatusType {};
DEFINE_MLU_STATUS_TYPE
(
cnrtStatus
,
cnrtSuccess
,
CNRT
);
DEFINE_MLU_STATUS_TYPE
(
cnnlStatus
,
CNNL_STATUS_SUCCESS
,
CNNL
);
DEFINE_MLU_STATUS_TYPE
(
mluOpStatus
,
MLUOP_STATUS_SUCCESS
,
MLUOP
);
DEFINE_MLU_STATUS_TYPE
(
cnStatus
,
CN_SUCCESS
,
CN
);
#ifdef PADDLE_WITH_CNCL
DEFINE_MLU_STATUS_TYPE
(
cnclStatus
,
CNCL_RET_SUCCESS
,
CNCL
);
...
...
@@ -68,6 +69,15 @@ inline std::string build_mlu_error_msg(cnnlStatus stat) {
return
sout
.
str
();
}
/*************** MLU OP ERROR ***************/
inline
bool
is_error
(
mluOpStatus
stat
)
{
return
stat
!=
MLUOP_STATUS_SUCCESS
;
}
inline
std
::
string
build_mlu_error_msg
(
mluOpStatus
stat
)
{
std
::
ostringstream
sout
;
sout
<<
"MLU OP error("
<<
stat
<<
"), "
<<
mluOpGetErrorString
(
stat
)
<<
". "
;
return
sout
.
str
();
}
/*************** CN API ERROR ***************/
inline
bool
is_error
(
cnStatus
stat
)
{
return
stat
!=
CN_SUCCESS
;
}
...
...
paddle/fluid/platform/device/mlu/mlu_info.cc
浏览文件 @
3e1e482b
...
...
@@ -126,6 +126,13 @@ int GetMLUCnnlVersion(int id) {
return
x
*
10000
+
y
*
100
+
z
;
}
int
GetMLUOpVersion
(
int
id
)
{
CheckDeviceId
(
id
);
int
x
,
y
,
z
;
mluOpGetLibVersion
(
&
x
,
&
y
,
&
z
);
return
x
*
10000
+
y
*
100
+
z
;
}
int
GetMLUCurrentDeviceId
()
{
int
device_id
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtGetDevice
(
&
device_id
));
...
...
paddle/fluid/platform/device/mlu/mlu_info.h
浏览文件 @
3e1e482b
...
...
@@ -16,10 +16,11 @@ limitations under the License. */
#ifdef PADDLE_WITH_MLU
#include <cn_api.h>
#include <cndrv_id.h>
#include <cnnl.h>
#include <cnpapi.h>
#include <cnpapi_cndrv_id.h>
#include <cnrt.h>
#include <mlu_op.h>
#ifdef PADDLE_WITH_CNCL
#include <cncl.h>
#endif
...
...
@@ -30,11 +31,13 @@ namespace paddle {
using
cnStatus
=
CNresult
;
using
cnrtStatus
=
cnrtRet_t
;
using
cnnlStatus
=
cnnlStatus_t
;
using
mluOpStatus
=
mluOpStatus_t
;
#ifdef PADDLE_WITH_CNCL
using
cnclStatus
=
cnclResult_t
;
#endif
using
mluStream
=
cnrtQueue_t
;
using
mluCnnlHandle
=
cnnlHandle_t
;
using
mluOpHandle
=
mluOpHandle_t
;
using
mluEventHandle
=
cnrtNotifier_t
;
using
mluDeviceHandle
=
CNdev
;
...
...
@@ -49,6 +52,9 @@ int GetMLURuntimeVersion(int id);
//! Get the cnnl version of the ith MLU.
int
GetMLUCnnlVersion
(
int
id
);
//! Get the mluOp version of the ith MLU.
int
GetMLUOpVersion
(
int
id
);
//! Get the total number of MLU devices in system.
int
GetMLUDeviceCount
();
...
...
python/paddle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py
浏览文件 @
3e1e482b
...
...
@@ -186,16 +186,15 @@ class TestGridSamplerOp(OpTest):
self
.
mode
=
"bilinear"
# TODO(fwg): Test this case when cnnl support align_corners = True.
# class Case1(TestGridSamplerOp):
#
# def initTestCase(self):
# self.x_shape = (2, 3, 5, 6)
# self.grid_shape = (2, 8, 9, 2)
# self.theta_shape = (2, 2, 3)
# self.align_corners = True
# self.padding_mode = "zeros"
# self.mode = "bilinear"
class
Case1
(
TestGridSamplerOp
):
def
initTestCase
(
self
):
self
.
x_shape
=
(
2
,
3
,
5
,
6
)
self
.
grid_shape
=
(
2
,
8
,
9
,
2
)
self
.
theta_shape
=
(
2
,
2
,
3
)
self
.
align_corners
=
True
self
.
padding_mode
=
"zeros"
self
.
mode
=
"bilinear"
class
LargeInputCase
(
TestGridSamplerOp
):
...
...
@@ -209,16 +208,16 @@ class LargeInputCase(TestGridSamplerOp):
self
.
mode
=
"bilinear"
# TODO(fwg): Test this case when cnnl support align_corners = True.
# class Case2(LargeInputCase):
#
# def initTestCase(self):
# self.x_shape = (2, 3, 128, 128
)
# self.grid_shape = (2, 130, 130, 2
)
# self.theta_shape = (2, 2, 3)
# self.align_corners = True
# self.padding_mode = "zeros
"
# self.mode = "bilinear"
class
Case2
(
LargeInputCase
):
def
initTestCase
(
self
):
self
.
x_shape
=
(
2
,
3
,
128
,
128
)
self
.
grid_shape
=
(
2
,
130
,
130
,
2
)
self
.
theta_shape
=
(
2
,
2
,
3
)
self
.
align_corners
=
True
self
.
padding_mode
=
"zeros"
self
.
mode
=
"bilinear
"
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py
浏览文件 @
3e1e482b
...
...
@@ -152,6 +152,11 @@ class TestKeepDim8DReduce(TestMLUReduceSumOp):
self
.
axis
=
(
3
,
4
,
5
)
self
.
keep_dim
=
True
def
test_check_grad
(
self
):
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
,
max_relative_error
=
0.03
)
class
TestReduceAll
(
TestMLUReduceSumOp
):
...
...
tools/dockerfile/Dockerfile.mlu
浏览文件 @
3e1e482b
# A image for building paddle binaries
# Update CNTOOLKIT_VERSION, CNNL_VERSION
and CNCL
_VERSION if using other versions
# Update CNTOOLKIT_VERSION, CNNL_VERSION
, CNCL_VERSION and MLUOPS
_VERSION if using other versions
#
# Build:
# - CNTOOLKIT_VERSION 2.8.5
# - CNNL_VERSION 1.10.5
# - CNCL_VERSION 1.1.2
# - CNTOOLKIT_VERSION 3.0.2-1
# - CNNL_VERSION 1.13.0-1
# - CNCL_VERSION 1.2.1-1
# - MLUOPS_VERSION 0.2.0-1
#
# Download three packages from FTP (need to connect cambricon AE to get FTP url)
# - cntoolkit_2.8.5.ubuntu18.04_amd64.deb
# - cnnl_1.10.5.ubuntu18.04_amd64.deb
# - cncl_1.1.2.ubuntu18.04_amd64.deb
# - cntoolkit_3.0.2-1.ubuntu18.04_amd64.deb
# - cnnl_1.13.0-1.ubuntu18.04_amd64.deb
# - cncl_1.2.1-1.ubuntu18.04_amd64.deb
# - mluops_0.2.0-1.ubuntu18.04_amd64.deb
# copy them to current directory first, then run build commands
#
# For example:
...
...
@@ -19,11 +21,13 @@
# (get cntoolkit pkg)
# (get cnnl pkg)
# (get cncl pkg)
# (get mluops pkg)
#
# docker build -f Dockerfile.mlu \
# --build-arg CNTOOLKIT_VERSION=2.8.5 \
# --build-arg CNNL_VERSION=1.10.5 \
# --build-arg CNCL_VERSION=1.1.2 \
# --build-arg CNTOOLKIT_VERSION=3.0.2-1 \
# --build-arg CNNL_VERSION=1.13.0-1 \
# --build-arg CNCL_VERSION=1.2.1-1 \
# --build-arg MLUOPS_VERSION=0.2.0-1 \
# -t paddlepaddle/paddle:latest-dev-mlu .
#
# without mlu device:
...
...
@@ -40,12 +44,14 @@ MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ENV WITH_GPU=OFF
ARG CNTOOLKIT_VERSION=2.8.5
ARG CNNL_VERSION=1.10.5
ARG CNCL_VERSION=1.1.2
ARG CNTOOLKIT_VERSION=3.0.2-1
ARG CNNL_VERSION=1.13.0-1
ARG CNCL_VERSION=1.2.1-1
ARG MLUOPS_VERSION=0.2.0-1
ARG CNTOOLKIT_PKG=cntoolkit_$CNTOOLKIT_VERSION.ubuntu18.04_amd64.deb
ARG CNNL_PKG=cnnl_$CNNL_VERSION.ubuntu18.04_amd64.deb
ARG CNCL_PKG=cncl_$CNCL_VERSION.ubuntu18.04_amd64.deb
ARG MLUOPS_PKG=mluops_$MLUOPS_VERSION.ubuntu18.04_amd64.deb
# install cntoolkit
COPY $CNTOOLKIT_PKG ./
...
...
@@ -67,6 +73,11 @@ COPY $CNCL_PKG ./
RUN dpkg -i $CNCL_PKG && \
rm -f $CNCL_PKG
# install mluops
COPY $MLUOPS_PKG ./
RUN dpkg -i $MLUOPS_PKG && \
rm -f $MLUOPS_PKG
# Clean
RUN apt-get clean -y
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录